Compare commits
109 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7efd91d4b4 | |||
| 0aa1269e56 | |||
| 3c29834354 | |||
| 0eb85906b0 | |||
| ff9b0528a2 | |||
| 8feaa7cd1b | |||
| 57a2b97ae8 | |||
| bd9afb027a | |||
| 6051fba9dc | |||
| 2acc8783d1 | |||
| acdcb167fb | |||
| 51f4c9827f | |||
| 2e78a2b6b2 | |||
| 5a1c599412 | |||
| 0f6eabb890 | |||
| eb93f88e1d | |||
| 3ccda2aa05 | |||
| 983bbe2d40 | |||
| 379b2273d9 | |||
| 7db2703b33 | |||
| 7c59e1a871 | |||
| 6fdbf2f2d7 | |||
| 0a679cb7ad | |||
| 41b4d69167 | |||
| 3f343cf7cf | |||
| 4ae5b58cb1 | |||
| 2258a181f0 | |||
| 11b2942f16 | |||
| b08cbc7a79 | |||
| c95c6bdb7c | |||
| bd929ea514 | |||
| 6a20e187dd | |||
| 9ff21437a0 | |||
| 44a0cbe525 | |||
| 2af0848f3c | |||
| 7baf370d3d | |||
| eeda18a9b7 | |||
| 3a9598337f | |||
| 98418afd5d | |||
| 42ff785771 | |||
| 04c489b587 | |||
| 0bb460b070 | |||
| 3504bd401b | |||
| 50d97edbe1 | |||
| e26c4f0e34 | |||
| 24f139e16a | |||
| ef5eaf8d87 | |||
| bf196a3fc0 | |||
| f593c367be | |||
| 470389e6a3 | |||
| 18d5ba8676 | |||
| 8b79acb8de | |||
| 0086fd894d | |||
| 5e67b38437 | |||
| 1df35a93b2 | |||
| 9599271180 | |||
| a5e4a86ebe | |||
| d42b6a2edd | |||
| d001814e3f | |||
| 9d147f7fde | |||
| 692ae6dd07 | |||
| b61ac8964b | |||
| a1ff6b45ea | |||
| 4a0c02b7dc | |||
| 83859b4da0 | |||
| 67c8f837fc | |||
| c7d023937c | |||
| 78d1e252fa | |||
| d0821b0573 | |||
| a0d8dd7ba3 | |||
| e020f46bec | |||
| a884f6d5d8 | |||
| b848ce2c79 | |||
| 1dfcda4e3c | |||
| 1cc0bdd5f3 | |||
| 07046096d9 | |||
| 97b9b3d6a6 | |||
| 165b2e481a | |||
| 327b57da91 | |||
| 64e6165686 | |||
| b5333abc30 | |||
| 255ba5bf26 | |||
| 8f5fee3e3e | |||
| b6ca3c28dc | |||
| 882278520b | |||
| 9bf6e1cd6e | |||
| 9a885fba31 | |||
| aa47812edf | |||
| c8ff70fe03 | |||
| f5af6520d0 | |||
| 1e445b2547 | |||
| f28f07e98e | |||
| 7c4dd7d660 | |||
| e91be4d7dc | |||
| 60d1edc38a | |||
| 3e01de0b09 | |||
| f7e86577bc | |||
| 2e75460066 | |||
| 82a0ed1afb | |||
| 071bdb5a3f | |||
| bc9518f660 | |||
| ce089169d5 | |||
| e3c0084140 | |||
| 5651a73331 | |||
| 81d925f2a5 | |||
| ec02d905c9 | |||
| b7bdf32d4e | |||
| d72985b7ce | |||
| 5a26938aa5 |
@@ -53,6 +53,9 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Build skills index (if not already present)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -36,6 +36,9 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
||||
@@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent
|
||||
## Development Environment
|
||||
|
||||
```bash
|
||||
source venv/bin/activate # ALWAYS activate before running Python
|
||||
# Prefer .venv; fall back to venv if that's what your checkout has.
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
```
|
||||
|
||||
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
|
||||
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
|
||||
main checkout).
|
||||
|
||||
## Project Structure
|
||||
|
||||
File counts shift constantly — don't treat the tree below as exhaustive.
|
||||
The canonical source is the filesystem. The notes call out the load-bearing
|
||||
entry points you'll actually edit.
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop
|
||||
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
|
||||
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
|
||||
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
|
||||
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
|
||||
├── agent/ # Agent internals
|
||||
│ ├── prompt_builder.py # System prompt assembly
|
||||
│ ├── context_compressor.py # Auto context compression
|
||||
│ ├── prompt_caching.py # Anthropic prompt caching
|
||||
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ ├── models_dev.py # models.dev registry integration (provider-aware context)
|
||||
│ ├── display.py # KawaiiSpinner, tool preview formatting
|
||||
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
├── hermes_cli/ # CLI subcommands and setup
|
||||
│ ├── main.py # Entry point — all `hermes` subcommands
|
||||
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
|
||||
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
|
||||
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── skin_engine.py # Skin/theme engine — CLI visual customization
|
||||
│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform
|
||||
│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform
|
||||
│ ├── skills_hub.py # `/skills` slash command (search, browse, install)
|
||||
│ ├── models.py # Model catalog, provider model lists
|
||||
│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway)
|
||||
│ └── auth.py # Provider credential resolution
|
||||
├── tools/ # Tool implementations (one file per tool)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection
|
||||
│ ├── terminal_tool.py # Terminal orchestration
|
||||
│ ├── process_registry.py # Background process management
|
||||
│ ├── file_tools.py # File read/write/search/patch
|
||||
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
|
||||
│ ├── browser_tool.py # Browserbase browser automation
|
||||
│ ├── code_execution_tool.py # execute_code sandbox
|
||||
│ ├── delegate_tool.py # Subagent delegation
|
||||
│ ├── mcp_tool.py # MCP client (~1050 lines)
|
||||
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
|
||||
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
|
||||
├── batch_runner.py # Parallel batch processing
|
||||
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
|
||||
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
|
||||
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
|
||||
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
├── gateway/ # Messaging platform gateway
|
||||
│ ├── run.py # Main loop, slash commands, message dispatch
|
||||
│ ├── session.py # SessionStore — conversation persistence
|
||||
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
|
||||
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
│ ├── src/entry.tsx # TTY gate + render()
|
||||
│ ├── src/app.tsx # Main state machine and UI
|
||||
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
|
||||
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
|
||||
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
|
||||
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
|
||||
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
|
||||
│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
|
||||
├── tui_gateway/ # Python JSON-RPC backend for the TUI
|
||||
│ ├── entry.py # stdio entrypoint
|
||||
│ ├── server.py # RPC handlers and session logic
|
||||
│ ├── render.py # Optional rich/ANSI bridge
|
||||
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── cron/ # Scheduler — jobs.py, scheduler.py
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── tests/ # Pytest suite (~3000 tests)
|
||||
└── batch_runner.py # Parallel batch processing
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
|
||||
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
|
||||
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
@@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/
|
||||
|
||||
## AIAgent Class (run_agent.py)
|
||||
|
||||
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
|
||||
session context, budget, credential pool, etc.). The signature below is the
|
||||
minimum subset you'll usually touch — read `run_agent.py` for the full list.
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(self,
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
max_iterations: int = 90,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
provider: str = None,
|
||||
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
|
||||
model: str = "", # empty → resolved from config/provider later
|
||||
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
quiet_mode: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
session_id: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
# ... plus provider, api_mode, callbacks, routing params
|
||||
credential_pool=None,
|
||||
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
|
||||
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
|
||||
): ...
|
||||
|
||||
def chat(self, message: str) -> str:
|
||||
@@ -120,10 +113,13 @@ class AIAgent:
|
||||
|
||||
### Agent Loop
|
||||
|
||||
The core loop is inside `run_conversation()` — entirely synchronous:
|
||||
The core loop is inside `run_conversation()` — entirely synchronous, with
|
||||
interrupt checks, budget tracking, and a one-turn grace call:
|
||||
|
||||
```python
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
|
||||
or self._budget_grace_call:
|
||||
if self._interrupt_requested: break
|
||||
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
@@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining >
|
||||
return response.content
|
||||
```
|
||||
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
|
||||
Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
|
||||
---
|
||||
|
||||
@@ -280,7 +277,7 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
|
||||
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
@@ -288,9 +285,13 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
### config.yaml options:
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. Bump `_config_version` (currently 5) to trigger migration for existing users
|
||||
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
|
||||
ONLY if you need to actively migrate/transform existing user config
|
||||
(renaming keys, changing structure). Adding a new key to an existing
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
|
||||
### .env variables:
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
"NEW_API_KEY": {
|
||||
@@ -302,13 +303,29 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
},
|
||||
```
|
||||
|
||||
### Config loaders (two separate systems):
|
||||
Non-secret settings (timeouts, thresholds, feature flags, paths, display
|
||||
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
|
||||
env var mirror for backward compatibility, bridge it from `config.yaml` to
|
||||
the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
|
||||
|
||||
### Config loaders (three paths — know which one you're in):
|
||||
|
||||
| Loader | Used by | Location |
|
||||
|--------|---------|----------|
|
||||
| `load_cli_config()` | CLI mode | `cli.py` |
|
||||
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
|
||||
| Direct YAML load | Gateway | `gateway/run.py` |
|
||||
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
|
||||
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
|
||||
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
|
||||
|
||||
If you add a new key and the CLI sees it but the gateway doesn't (or vice
|
||||
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
|
||||
|
||||
### Working directory:
|
||||
- **CLI** — uses the process's current directory (`os.getcwd()`).
|
||||
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
|
||||
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
|
||||
removed** — the config loader prints a deprecation warning if it's set in
|
||||
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
|
||||
`terminal.cwd` in `config.yaml`.
|
||||
|
||||
---
|
||||
|
||||
@@ -401,7 +418,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
|
||||
|
||||
---
|
||||
|
||||
## Plugins
|
||||
|
||||
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
|
||||
repo-shipped plugins can be discovered alongside user-installed ones in
|
||||
`~/.hermes/plugins/` and pip-installed entry points.
|
||||
|
||||
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
|
||||
|
||||
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
|
||||
and pip entry points. Each plugin exposes a `register(ctx)` function that
|
||||
can:
|
||||
|
||||
- Register Python-callback lifecycle hooks:
|
||||
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
|
||||
`on_session_start`, `on_session_end`
|
||||
- Register new tools via `ctx.register_tool(...)`
|
||||
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
|
||||
plugin's argparse tree is wired into `hermes` at startup so
|
||||
`hermes <pluginname> <subcmd>` works with no change to `main.py`
|
||||
|
||||
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
|
||||
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
|
||||
as a side effect of importing `model_tools.py`. Code paths that read plugin
|
||||
state without importing `model_tools.py` first must call `discover_plugins()`
|
||||
explicitly (it's idempotent).
|
||||
|
||||
### Memory-provider plugins (`plugins/memory/<name>/`)
|
||||
|
||||
Separate discovery system for pluggable memory backends. Current built-in
|
||||
providers include **honcho, mem0, supermemory, byterover, hindsight,
|
||||
holographic, openviking, retaindb**.
|
||||
|
||||
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
|
||||
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
|
||||
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
|
||||
`post_setup(hermes_home, config)` for setup-wizard integration.
|
||||
|
||||
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
|
||||
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
|
||||
it at argparse setup time and wires it into `hermes <plugin>`. The
|
||||
framework only exposes CLI commands for the **currently active** memory
|
||||
provider (read from `memory.provider` in config.yaml), so disabled
|
||||
providers don't clutter `hermes --help`.
|
||||
|
||||
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
|
||||
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
|
||||
If a plugin needs a capability the framework doesn't expose, expand the
|
||||
generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
|
||||
etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
|
||||
Context engines plug into `agent/context_engine.py`; image-gen providers
|
||||
into `agent/image_gen_provider.py`.
|
||||
|
||||
---
|
||||
|
||||
## Skills
|
||||
|
||||
Two parallel surfaces:
|
||||
|
||||
- **`skills/`** — built-in skills shipped and loadable by default.
|
||||
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
|
||||
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
|
||||
NOT active by default. Installed explicitly via
|
||||
`hermes skills install official/<category>/<skill>`. Adapter lives in
|
||||
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
|
||||
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
|
||||
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
|
||||
`research`, `security`, `web-development`.
|
||||
|
||||
When reviewing skill PRs, check which directory they target — heavy-dep or
|
||||
niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `platforms`
|
||||
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.config` (config.yaml settings the skill needs — stored
|
||||
under `skills.config.<key>`, prompted during setup, injected at load time).
|
||||
|
||||
---
|
||||
|
||||
## Important Policies
|
||||
|
||||
### Prompt Caching Must Not Break
|
||||
|
||||
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
|
||||
@@ -411,9 +516,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
|
||||
|
||||
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
|
||||
|
||||
### Working Directory Behavior
|
||||
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
|
||||
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
|
||||
must be **cache-aware**: default to deferred invalidation (change takes
|
||||
effect next session), with an opt-in `--now` flag for immediate
|
||||
invalidation. See `/skills install --now` for the canonical pattern.
|
||||
|
||||
### Background Process Notifications (Gateway)
|
||||
|
||||
@@ -435,7 +541,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
|
||||
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
|
||||
|
||||
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
|
||||
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
|
||||
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
|
||||
automatically scope to the active profile.
|
||||
|
||||
### Rules for profile-safe code
|
||||
@@ -492,8 +598,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
|
||||
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
|
||||
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
|
||||
|
||||
### DO NOT use `simple_term_menu` for interactive menus
|
||||
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
|
||||
### DO NOT introduce new `simple_term_menu` usage
|
||||
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
|
||||
the preferred UI is curses (stdlib) because `simple_term_menu` has
|
||||
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
|
||||
interactive menus must use `hermes_cli/curses_ui.py` — see
|
||||
`hermes_cli/tools_config.py` for the canonical pattern.
|
||||
|
||||
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
|
||||
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
|
||||
@@ -504,6 +614,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
|
||||
### DO NOT hardcode cross-tool references in schema descriptions
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### The gateway has TWO message guards — both must bypass approval/control commands
|
||||
When an agent is running, messages pass through two sequential guards:
|
||||
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
|
||||
`_pending_messages` when `session_key in self._active_sessions`, and
|
||||
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
|
||||
`/queue`, `/status`, `/approve`, `/deny` before they reach
|
||||
`running_agent.interrupt()`. Any new command that must reach the runner
|
||||
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
|
||||
guards and be dispatched inline, not via `_process_message_background()`
|
||||
(which races session lifecycle).
|
||||
|
||||
### Squash merges from stale branches silently revert recent fixes
|
||||
Before squash-merging a PR, ensure the branch is up to date with `main`
|
||||
(`git fetch origin main && git reset --hard origin/main` in the worktree,
|
||||
then re-apply the PR's commits). A stale branch's version of an unrelated
|
||||
file will silently overwrite recent fixes on main when squashed. Verify
|
||||
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
|
||||
red flag.
|
||||
|
||||
### Don't wire in dead code without E2E validation
|
||||
Unused code that was never shipped was dead for a reason. Before wiring an
|
||||
unused module into a live code path, E2E test the real resolution chain
|
||||
with actual imports (not mocks) against a temp `HERMES_HOME`.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
|
||||
@@ -559,7 +693,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
|
||||
```bash
|
||||
source venv/bin/activate
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
```
|
||||
|
||||
|
||||
+3
-3
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
|
||||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
|
||||
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
|
||||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
|
||||
Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
|
||||
|
||||
### Critical rules
|
||||
|
||||
@@ -597,7 +597,7 @@ refactor/description # Code restructuring
|
||||
|
||||
1. **Run tests**: `pytest tests/ -v`
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
||||
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
|
||||
| Set a personality | `/personality [name]` | `/personality [name]` |
|
||||
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
|
||||
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
|
||||
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
|
||||
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
|
||||
|
||||
@@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
python -m pytest tests/ -q
|
||||
scripts/run_tests.sh
|
||||
```
|
||||
|
||||
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -0,0 +1,453 @@
|
||||
# Hermes Agent v0.11.0 (v2026.4.23)
|
||||
|
||||
**Release Date:** April 23, 2026
|
||||
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
|
||||
|
||||
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
|
||||
|
||||
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
|
||||
|
||||
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
|
||||
|
||||
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
|
||||
|
||||
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
|
||||
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
|
||||
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
|
||||
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
|
||||
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
|
||||
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
|
||||
|
||||
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Transport Layer (NEW)
|
||||
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
|
||||
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
|
||||
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
|
||||
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
|
||||
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
|
||||
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
|
||||
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
|
||||
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
|
||||
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
|
||||
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
|
||||
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
|
||||
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
|
||||
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
|
||||
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
|
||||
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
|
||||
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
|
||||
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
|
||||
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
|
||||
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
|
||||
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
|
||||
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
|
||||
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
|
||||
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
|
||||
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
|
||||
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
|
||||
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
|
||||
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
|
||||
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
|
||||
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
|
||||
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
|
||||
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
|
||||
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
|
||||
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
|
||||
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
|
||||
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
|
||||
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
|
||||
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
|
||||
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
|
||||
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
|
||||
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
|
||||
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
|
||||
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
|
||||
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
|
||||
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
|
||||
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
|
||||
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
|
||||
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
|
||||
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
|
||||
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
|
||||
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
|
||||
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
|
||||
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
|
||||
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
|
||||
|
||||
### Session & Memory
|
||||
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
|
||||
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
|
||||
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
|
||||
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
|
||||
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
|
||||
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
|
||||
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ New Ink-based TUI
|
||||
|
||||
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
|
||||
|
||||
### TUI Foundations
|
||||
- New TUI based on Ink + Python JSON-RPC backend
|
||||
- Prettier + ESLint + vitest tooling for `ui-tui/`
|
||||
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
|
||||
- Persistent `_SlashWorker` subprocess for slash command dispatch
|
||||
|
||||
### UX & Features
|
||||
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
|
||||
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
|
||||
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
|
||||
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
|
||||
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
|
||||
- Sticky composer that freezes during scroll
|
||||
- OSC-52 clipboard support for copy across SSH sessions
|
||||
- Virtualized history rendering for performance
|
||||
- Slash command autocomplete via `complete.slash` RPC
|
||||
- Path autocomplete via `complete.path` RPC
|
||||
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
|
||||
|
||||
### Structural Refactors
|
||||
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
|
||||
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
|
||||
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
### Telegram
|
||||
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
|
||||
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
|
||||
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
|
||||
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
|
||||
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
|
||||
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
|
||||
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
|
||||
- Fix: Markdown special char escaping in `send_exec_approval`
|
||||
- Fix: parentheses in URLs during MarkdownV2 link conversion
|
||||
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
|
||||
- Many platform hint / streaming / session-key fixes
|
||||
|
||||
### Discord
|
||||
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
|
||||
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
|
||||
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
|
||||
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
|
||||
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
|
||||
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
|
||||
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
|
||||
|
||||
### Feishu
|
||||
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
|
||||
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
|
||||
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
|
||||
|
||||
### DingTalk
|
||||
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
|
||||
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
|
||||
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
|
||||
|
||||
### WhatsApp
|
||||
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
|
||||
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
|
||||
|
||||
### WeCom / Weixin
|
||||
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
|
||||
|
||||
### Signal
|
||||
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
|
||||
|
||||
### Slack
|
||||
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
|
||||
|
||||
### BlueBubbles (iMessage)
|
||||
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
|
||||
|
||||
### Gateway Core
|
||||
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
|
||||
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
|
||||
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
|
||||
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
|
||||
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
|
||||
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
|
||||
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
|
||||
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
|
||||
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
|
||||
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
|
||||
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
|
||||
- Fix: unlink stale PID + lock files on cleanup
|
||||
- Fix: force-unlink stale PID file after `--replace` takeover
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin Surface (major expansion)
|
||||
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
|
||||
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
|
||||
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
|
||||
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
|
||||
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
|
||||
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
|
||||
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
|
||||
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
|
||||
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
### Browser
|
||||
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
|
||||
- Camofox hardening + connection stability across the window
|
||||
|
||||
### Execute Code
|
||||
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
|
||||
|
||||
### Image Generation
|
||||
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
|
||||
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
|
||||
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
|
||||
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
|
||||
|
||||
### TTS / STT / Voice
|
||||
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
|
||||
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
|
||||
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
|
||||
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
|
||||
|
||||
### Webhook / Cron
|
||||
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
|
||||
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
|
||||
|
||||
### Delegate
|
||||
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
### File / Patch
|
||||
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
|
||||
|
||||
### API Server
|
||||
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
|
||||
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
|
||||
|
||||
### Docker / Podman
|
||||
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
|
||||
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
|
||||
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
|
||||
|
||||
### MCP
|
||||
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill System
|
||||
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
|
||||
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
|
||||
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
|
||||
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
|
||||
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
|
||||
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
|
||||
|
||||
### New Skills
|
||||
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
|
||||
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
|
||||
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
|
||||
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
|
||||
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
|
||||
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
|
||||
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
|
||||
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
|
||||
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
|
||||
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
|
||||
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
|
||||
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
|
||||
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
|
||||
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
|
||||
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
|
||||
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
|
||||
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
|
||||
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
|
||||
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
|
||||
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
|
||||
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
|
||||
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
|
||||
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
|
||||
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
|
||||
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
|
||||
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
|
||||
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
|
||||
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
|
||||
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
|
||||
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
|
||||
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
|
||||
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
|
||||
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
|
||||
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
|
||||
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
|
||||
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
The `fix:` category in this window covers 482 PRs. Highlights:
|
||||
|
||||
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
|
||||
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
|
||||
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
|
||||
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
|
||||
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
|
||||
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
|
||||
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
|
||||
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
|
||||
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
|
||||
- Doctor checks for Kimi China credentials fixed
|
||||
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
|
||||
- Rapid Telegram follow-ups no longer get cut off
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- Atropos + wandb links in user guide
|
||||
- ACP / VS Code / Zed / JetBrains integration docs refresh
|
||||
- Webhook subscription docs updated for direct-delivery mode
|
||||
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
|
||||
- Transport layer developer guide added
|
||||
- Website removed Discussions link from README
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count)
|
||||
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
|
||||
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
|
||||
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
|
||||
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
|
||||
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
|
||||
- **@ethernet8023** — 3 PRs
|
||||
- **@benbarclay** — 3 PRs
|
||||
- **@Aslaaen** — 2 PRs
|
||||
|
||||
### Also contributing
|
||||
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
|
||||
|
||||
### All Contributors (alphabetical)
|
||||
|
||||
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
|
||||
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
|
||||
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
|
||||
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
|
||||
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
|
||||
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
|
||||
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
|
||||
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
|
||||
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
|
||||
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
|
||||
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
|
||||
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
|
||||
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
|
||||
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
|
||||
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
|
||||
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
|
||||
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
|
||||
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
|
||||
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
|
||||
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
|
||||
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
|
||||
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
|
||||
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
|
||||
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
|
||||
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
|
||||
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
|
||||
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
|
||||
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
|
||||
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
|
||||
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
|
||||
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
|
||||
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
|
||||
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
|
||||
|
||||
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
|
||||
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
|
||||
@@ -357,7 +357,7 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional[float] = None):
|
||||
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
|
||||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||||
|
||||
If *timeout* is provided it overrides the default 900s read timeout. The
|
||||
|
||||
+20
-12
@@ -41,13 +41,10 @@ import threading
|
||||
import time
|
||||
from pathlib import Path # noqa: F401 — used by test mocks
|
||||
from types import SimpleNamespace
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from agent.gemini_native_adapter import GeminiNativeClient
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
@@ -154,7 +151,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
# differs from their main chat model, map it here. The vision auto-detect
|
||||
# "exotic provider" branch checks this before falling back to the main model.
|
||||
_PROVIDER_VISION_MODELS: Dict[str, str] = {
|
||||
"xiaomi": "mimo-v2-omni",
|
||||
"xiaomi": "mimo-v2.5",
|
||||
"zai": "glm-5v-turbo",
|
||||
}
|
||||
|
||||
@@ -813,11 +810,7 @@ def _read_codex_access_token() -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
# TODO(refactor): This function has messy types and duplicated logic (pool vs direct creds).
|
||||
# Ideal fix: (1) define an AuxiliaryClient Protocol both OpenAI/GeminiNativeClient satisfy,
|
||||
# (2) return a NamedTuple or dataclass instead of raw tuple, (3) extract the repeated
|
||||
# Gemini/Kimi/Copilot client-building into a helper.
|
||||
def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeClient"]], Optional[str]]:
|
||||
def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Try each API-key provider in PROVIDER_REGISTRY order.
|
||||
|
||||
Returns (client, model) for the first provider with usable runtime
|
||||
@@ -923,6 +916,19 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
"""Return a more precise OpenRouter auth failure reason for logs."""
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
if entry is None:
|
||||
return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
|
||||
if not _pool_runtime_api_key(entry):
|
||||
return "OpenRouter credential pool entry is missing a runtime API key"
|
||||
if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
|
||||
return "OPENROUTER_API_KEY not set"
|
||||
return "no usable OpenRouter credentials found"
|
||||
|
||||
|
||||
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
# Check cross-session rate limit guard before attempting Nous —
|
||||
# if another session already recorded a 429, skip Nous entirely
|
||||
@@ -1634,8 +1640,10 @@ def resolve_provider_client(
|
||||
if provider == "openrouter":
|
||||
client, default = _try_openrouter()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: openrouter requested "
|
||||
"but OPENROUTER_API_KEY not set")
|
||||
logger.warning(
|
||||
"resolve_provider_client: openrouter requested but %s",
|
||||
_describe_openrouter_unavailable(),
|
||||
)
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
|
||||
@@ -29,7 +29,6 @@ from hermes_cli.auth import (
|
||||
_save_auth_store,
|
||||
_save_provider_state,
|
||||
read_credential_pool,
|
||||
read_provider_credentials,
|
||||
write_credential_pool,
|
||||
)
|
||||
|
||||
@@ -322,7 +321,7 @@ def get_custom_provider_pool_key(base_url: str) -> Optional[str]:
|
||||
|
||||
def list_custom_pool_providers() -> List[str]:
|
||||
"""Return all 'custom:*' pool keys that have entries in auth.json."""
|
||||
pool_data = read_credential_pool()
|
||||
pool_data = read_credential_pool(None)
|
||||
return sorted(
|
||||
key for key in pool_data
|
||||
if key.startswith(CUSTOM_POOL_PREFIX)
|
||||
@@ -876,20 +875,6 @@ class CredentialPool:
|
||||
self._current_id = None
|
||||
return removed
|
||||
|
||||
def remove_entry(self, entry_id: str) -> Optional[PooledCredential]:
|
||||
for idx, entry in enumerate(self._entries):
|
||||
if entry.id == entry_id:
|
||||
removed = self._entries.pop(idx)
|
||||
self._entries = [
|
||||
replace(e, priority=new_priority)
|
||||
for new_priority, e in enumerate(self._entries)
|
||||
]
|
||||
self._persist()
|
||||
if self._current_id == removed.id:
|
||||
self._current_id = None
|
||||
return removed
|
||||
return None
|
||||
|
||||
def resolve_target(self, target: Any) -> Tuple[Optional[int], Optional[PooledCredential], Optional[str]]:
|
||||
raw = str(target or "").strip()
|
||||
if not raw:
|
||||
@@ -1340,7 +1325,7 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
|
||||
|
||||
def load_pool(provider: str) -> CredentialPool:
|
||||
provider = (provider or "").strip().lower()
|
||||
raw_entries = read_provider_credentials(provider)
|
||||
raw_entries = read_credential_pool(provider)
|
||||
entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries]
|
||||
|
||||
if provider.startswith(CUSTOM_POOL_PREFIX):
|
||||
|
||||
@@ -729,7 +729,6 @@ class KawaiiSpinner:
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
|
||||
assert self.start_time is not None # start() sets it before thread starts
|
||||
elapsed = time.time() - self.start_time
|
||||
if wings:
|
||||
left, right = wings[self.frame_idx % len(wings)]
|
||||
|
||||
@@ -45,6 +45,7 @@ class FailoverReason(enum.Enum):
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
@@ -194,6 +195,29 @@ _MODEL_NOT_FOUND_PATTERNS = [
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# OpenRouter aggregator policy-block patterns.
|
||||
#
|
||||
# When a user's OpenRouter account privacy setting (or a per-request
|
||||
# `provider.data_collection: deny` preference) excludes the only endpoint
|
||||
# serving a model, OpenRouter returns 404 with a *specific* message that is
|
||||
# distinct from "model not found":
|
||||
#
|
||||
# "No endpoints available matching your guardrail restrictions and
|
||||
# data policy. Configure: https://openrouter.ai/settings/privacy"
|
||||
#
|
||||
# We classify this as `provider_policy_blocked` rather than
|
||||
# `model_not_found` because:
|
||||
# - The model *exists* — model_not_found is misleading in logs
|
||||
# - Provider fallback won't help: the account-level setting applies to
|
||||
# every call on the same OpenRouter account
|
||||
# - The error body already contains the fix URL, so the user gets
|
||||
# actionable guidance without us rewriting the message
|
||||
_PROVIDER_POLICY_BLOCKED_PATTERNS = [
|
||||
"no endpoints available matching your guardrail",
|
||||
"no endpoints available matching your data policy",
|
||||
"no endpoints found matching your data policy",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
@@ -523,6 +547,17 @@ def _classify_by_status(
|
||||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
# OpenRouter policy-block 404 — distinct from "model not found".
|
||||
# The model exists; the user's account privacy setting excludes the
|
||||
# only endpoint serving it. Falling back to another provider won't
|
||||
# help (same account setting applies). The error body already
|
||||
# contains the fix URL, so just surface it.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -640,6 +675,12 @@ def _classify_400(
|
||||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -812,6 +853,15 @@ def _classify_by_message(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Provider policy-block (aggregator-side guardrail) — check before
|
||||
# model_not_found so we don't mis-label as a missing model.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
+128
-6
@@ -123,6 +123,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
"gpt-5.5": 400000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -183,12 +187,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"moonshotai/Kimi-K2.6": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 256000,
|
||||
"mimo-v2-pro": 1000000,
|
||||
"mimo-v2-omni": 256000,
|
||||
"mimo-v2-flash": 256000,
|
||||
"mimo-v2.5-pro": 1000000,
|
||||
"mimo-v2.5": 1000000,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 262144,
|
||||
"mimo-v2-pro": 1048576,
|
||||
"mimo-v2.5-pro": 1048576,
|
||||
"mimo-v2.5": 1048576,
|
||||
"mimo-v2-omni": 262144,
|
||||
"mimo-v2-flash": 262144,
|
||||
"zai-org/GLM-5": 202752,
|
||||
}
|
||||
|
||||
@@ -1002,6 +1006,115 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
||||
return None
|
||||
|
||||
|
||||
# Known ChatGPT Codex OAuth context windows (observed via live
|
||||
# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
|
||||
# `context_window` values, which are what Codex actually enforces — the
|
||||
# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
|
||||
# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
|
||||
#
|
||||
# Used as a fallback when the live probe fails (no token, network error).
|
||||
# Longest keys first so substring match picks the most specific entry.
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
|
||||
"gpt-5.1-codex-max": 272_000,
|
||||
"gpt-5.1-codex-mini": 272_000,
|
||||
"gpt-5.3-codex": 272_000,
|
||||
"gpt-5.2-codex": 272_000,
|
||||
"gpt-5.4-mini": 272_000,
|
||||
"gpt-5.5": 272_000,
|
||||
"gpt-5.4": 272_000,
|
||||
"gpt-5.2": 272_000,
|
||||
"gpt-5": 272_000,
|
||||
}
|
||||
|
||||
|
||||
_codex_oauth_context_cache: Dict[str, int] = {}
|
||||
_codex_oauth_context_cache_time: float = 0.0
|
||||
_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
|
||||
"""Probe the ChatGPT Codex /models endpoint for per-slug context windows.
|
||||
|
||||
Codex OAuth imposes its own context limits that differ from the direct
|
||||
OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
|
||||
`context_window` field in each model entry is the authoritative source.
|
||||
|
||||
Returns a ``{slug: context_window}`` dict. Empty on failure.
|
||||
"""
|
||||
global _codex_oauth_context_cache, _codex_oauth_context_cache_time
|
||||
now = time.time()
|
||||
if (
|
||||
_codex_oauth_context_cache
|
||||
and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
|
||||
):
|
||||
return _codex_oauth_context_cache
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.debug(
|
||||
"Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
|
||||
resp.status_code,
|
||||
)
|
||||
return {}
|
||||
data = resp.json()
|
||||
except Exception as exc:
|
||||
logger.debug("Codex /models probe failed: %s", exc)
|
||||
return {}
|
||||
|
||||
entries = data.get("models", []) if isinstance(data, dict) else []
|
||||
result: Dict[str, int] = {}
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
ctx = item.get("context_window")
|
||||
if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
|
||||
result[slug.strip()] = ctx
|
||||
|
||||
if result:
|
||||
_codex_oauth_context_cache = result
|
||||
_codex_oauth_context_cache_time = now
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_codex_oauth_context_length(
|
||||
model: str, access_token: str = ""
|
||||
) -> Optional[int]:
|
||||
"""Resolve a Codex OAuth model's real context window.
|
||||
|
||||
Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
|
||||
have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
|
||||
"""
|
||||
model_bare = _strip_provider_prefix(model).strip()
|
||||
if not model_bare:
|
||||
return None
|
||||
|
||||
if access_token:
|
||||
live = _fetch_codex_oauth_context_lengths(access_token)
|
||||
if model_bare in live:
|
||||
return live[model_bare]
|
||||
# Case-insensitive match in case casing drifts
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in live.items():
|
||||
if slug.lower() == model_lower:
|
||||
return ctx
|
||||
|
||||
# Fallback: longest-key-first substring match over hardcoded defaults.
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in sorted(
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
|
||||
):
|
||||
if slug in model_lower:
|
||||
return ctx
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_nous_context_length(model: str) -> Optional[int]:
|
||||
"""Resolve Nous Portal model context length via OpenRouter metadata.
|
||||
|
||||
@@ -1146,6 +1259,15 @@ def get_model_context_length(
|
||||
ctx = _resolve_nous_context_length(model)
|
||||
if ctx:
|
||||
return ctx
|
||||
if effective_provider == "openai-codex":
|
||||
# Codex OAuth enforces lower context limits than the direct OpenAI
|
||||
# API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
|
||||
# on Codex). Authoritative source is Codex's own /models endpoint.
|
||||
codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
|
||||
if codex_ctx:
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
|
||||
@@ -0,0 +1,190 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
|
||||
|
||||
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
|
||||
tool calling. Requests that violate it fail with HTTP 400:
|
||||
|
||||
tools.function.parameters is not a valid moonshot flavored json schema,
|
||||
details: <...>
|
||||
|
||||
Known rejection modes documented at
|
||||
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
|
||||
and MoonshotAI/kimi-cli#1595:
|
||||
|
||||
1. Every property schema must carry a ``type``. Standard JSON Schema allows
|
||||
type to be omitted (the value is then unconstrained); Moonshot refuses.
|
||||
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
|
||||
the parent. Presence of both causes "type should be defined in anyOf
|
||||
items instead of the parent schema".
|
||||
|
||||
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
|
||||
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
|
||||
applies at MCP registration time for all providers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Keys whose values are maps of name → schema (not schemas themselves).
|
||||
# When we recurse, we walk the values of these maps as schemas, but we do
|
||||
# NOT apply the missing-type repair to the map itself.
|
||||
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
|
||||
|
||||
# Keys whose values are lists of schemas.
|
||||
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
|
||||
|
||||
# Keys whose values are a single nested schema.
|
||||
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
|
||||
|
||||
|
||||
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
"""Recursively apply Moonshot repairs to a schema node.
|
||||
|
||||
``is_schema=True`` means this dict is a JSON Schema node and gets the
|
||||
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
|
||||
it's a container map (e.g. the value of ``properties``) and we only
|
||||
recurse into its values.
|
||||
"""
|
||||
if isinstance(node, list):
|
||||
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
|
||||
# every element is itself a schema.
|
||||
return [_repair_schema(item, is_schema=True) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
# Walk the dict, deciding per-key whether recursion is into a schema
|
||||
# node, a container map, or a scalar.
|
||||
repaired: Dict[str, Any] = {}
|
||||
for key, value in node.items():
|
||||
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
|
||||
# Map of name → schema. Don't treat the map itself as a schema
|
||||
# (it has no type / properties of its own), but each value is.
|
||||
repaired[key] = {
|
||||
sub_key: _repair_schema(sub_val, is_schema=True)
|
||||
for sub_key, sub_val in value.items()
|
||||
}
|
||||
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
|
||||
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
|
||||
elif key in _SCHEMA_NODE_KEYS:
|
||||
# items / not / additionalProperties: single nested schema.
|
||||
# additionalProperties can also be a bool — leave those alone.
|
||||
if isinstance(value, dict):
|
||||
repaired[key] = _repair_schema(value, is_schema=True)
|
||||
else:
|
||||
repaired[key] = value
|
||||
else:
|
||||
# Scalars (description, title, format, enum values, etc.) pass through.
|
||||
repaired[key] = value
|
||||
|
||||
if not is_schema:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
return repaired
|
||||
|
||||
# Rule 1: property schemas without type need one. $ref nodes are exempt
|
||||
# — their type comes from the referenced definition.
|
||||
if "$ref" in repaired:
|
||||
return repaired
|
||||
return _fill_missing_type(repaired)
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Infer a reasonable ``type`` if this schema node has none."""
|
||||
if "type" in node and node["type"] not in (None, ""):
|
||||
return node
|
||||
|
||||
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
|
||||
# → type of first enum value, else fall back to ``string`` (safest scalar).
|
||||
if "properties" in node or "required" in node or "additionalProperties" in node:
|
||||
inferred = "object"
|
||||
elif "items" in node or "prefixItems" in node:
|
||||
inferred = "array"
|
||||
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
|
||||
sample = node["enum"][0]
|
||||
if isinstance(sample, bool):
|
||||
inferred = "boolean"
|
||||
elif isinstance(sample, int):
|
||||
inferred = "integer"
|
||||
elif isinstance(sample, float):
|
||||
inferred = "number"
|
||||
else:
|
||||
inferred = "string"
|
||||
else:
|
||||
inferred = "string"
|
||||
|
||||
return {**node, "type": inferred}
|
||||
|
||||
|
||||
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a Moonshot-compatible object schema.
|
||||
|
||||
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
|
||||
applied. Input is not mutated.
|
||||
"""
|
||||
if not isinstance(parameters, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
|
||||
if not isinstance(repaired, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
# Top-level must be an object schema
|
||||
if repaired.get("type") != "object":
|
||||
repaired["type"] = "object"
|
||||
if "properties" not in repaired:
|
||||
repaired["properties"] = {}
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
|
||||
if not tools:
|
||||
return tools
|
||||
|
||||
sanitized: List[Dict[str, Any]] = []
|
||||
any_change = False
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
fn = tool.get("function")
|
||||
if not isinstance(fn, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
params = fn.get("parameters")
|
||||
repaired = sanitize_moonshot_tool_parameters(params)
|
||||
if repaired is not params:
|
||||
any_change = True
|
||||
new_fn = {**fn, "parameters": repaired}
|
||||
sanitized.append({**tool, "function": new_fn})
|
||||
else:
|
||||
sanitized.append(tool)
|
||||
|
||||
return sanitized if any_change else tools
|
||||
|
||||
|
||||
def is_moonshot_model(model: str | None) -> bool:
|
||||
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
|
||||
|
||||
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
|
||||
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
|
||||
Detection by model name covers Nous / OpenRouter / other aggregators that
|
||||
route to Moonshot's inference, where the base URL is the aggregator's, not
|
||||
``api.moonshot.ai``.
|
||||
"""
|
||||
if not model:
|
||||
return False
|
||||
bare = model.strip().lower()
|
||||
# Last path segment (covers aggregator-prefixed slugs)
|
||||
tail = bare.rsplit("/", 1)[-1]
|
||||
if tail.startswith("kimi-") or tail == "kimi":
|
||||
return True
|
||||
# Vendor-prefixed forms commonly used on aggregators
|
||||
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
|
||||
return True
|
||||
return False
|
||||
@@ -345,7 +345,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
|
||||
disabled = _get_disabled_skill_names()
|
||||
seen_names: set = set()
|
||||
|
||||
@@ -356,7 +356,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
dirs_to_scan.extend(get_external_skills_dirs())
|
||||
|
||||
for scan_dir in dirs_to_scan:
|
||||
for skill_md in scan_dir.rglob("SKILL.md"):
|
||||
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
|
||||
continue
|
||||
try:
|
||||
|
||||
@@ -455,8 +455,7 @@ def parse_qualified_name(name: str) -> Tuple[Optional[str], str]:
|
||||
"""
|
||||
if ":" not in name:
|
||||
return None, name
|
||||
ns, bare = name.split(":", 1)
|
||||
return ns, bare
|
||||
return tuple(name.split(":", 1)) # type: ignore[return-value]
|
||||
|
||||
|
||||
def is_valid_namespace(candidate: Optional[str]) -> bool:
|
||||
|
||||
@@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
@@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
|
||||
# etc.) compatible, in addition to direct moonshot.ai endpoints.
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > provider default
|
||||
|
||||
@@ -61,6 +61,20 @@ class ToolCall:
|
||||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
@property
|
||||
def extra_content(self) -> Optional[Dict[str, Any]]:
|
||||
"""Gemini extra_content (thought_signature) from provider_data.
|
||||
|
||||
Gemini 3 thinking models attach ``extra_content`` with a
|
||||
``thought_signature`` to each tool call. This signature must be
|
||||
replayed on subsequent API calls — without it the API rejects the
|
||||
request with HTTP 400. The chat_completions transport stores this
|
||||
in ``provider_data["extra_content"]``; this property exposes it so
|
||||
``_build_assistant_message`` can ``getattr(tc, "extra_content")``
|
||||
uniformly.
|
||||
"""
|
||||
return (self.provider_data or {}).get("extra_content")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Usage:
|
||||
|
||||
@@ -20,13 +20,9 @@ Usage:
|
||||
python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run --distribution=image_gen
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
@@ -1130,7 +1126,7 @@ def main(
|
||||
num_workers: int = 4,
|
||||
resume: bool = False,
|
||||
verbose: bool = False,
|
||||
show_distributions: bool = False,
|
||||
list_distributions: bool = False,
|
||||
ephemeral_system_prompt: str = None,
|
||||
log_prefix_chars: int = 100,
|
||||
providers_allowed: str = None,
|
||||
@@ -1158,7 +1154,7 @@ def main(
|
||||
num_workers (int): Number of parallel worker processes (default: 4)
|
||||
resume (bool): Resume from checkpoint if run was interrupted (default: False)
|
||||
verbose (bool): Enable verbose logging (default: False)
|
||||
show_distributions (bool): List available toolset distributions and exit
|
||||
list_distributions (bool): List available toolset distributions and exit
|
||||
ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
|
||||
log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
|
||||
providers_allowed (str): Comma-separated list of OpenRouter providers to allow (e.g. "anthropic,openai")
|
||||
@@ -1190,10 +1186,10 @@ def main(
|
||||
--prefill_messages_file=configs/prefill_opus.json
|
||||
|
||||
# List available distributions
|
||||
python batch_runner.py --show_distributions
|
||||
python batch_runner.py --list_distributions
|
||||
"""
|
||||
# Handle list distributions
|
||||
if show_distributions:
|
||||
if list_distributions:
|
||||
from toolset_distributions import print_distribution_info
|
||||
|
||||
print("📊 Available Toolset Distributions")
|
||||
@@ -507,6 +507,13 @@ agent:
|
||||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Max app-level retry attempts for API errors (connection drops, provider
|
||||
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
|
||||
# to 1 if you use fallback providers and want fast failover on flaky
|
||||
# primaries (default 3). The OpenAI SDK does its own low-level retries
|
||||
# underneath this wrapper — this is the Hermes-level loop.
|
||||
# api_max_retries: 3
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
|
||||
@@ -30,7 +30,7 @@ from urllib.parse import unquote, urlparse
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, TypedDict
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -84,34 +84,6 @@ _project_env = Path(__file__).parent / '.env'
|
||||
load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
|
||||
|
||||
|
||||
class _ModelPickerState(TypedDict, total=False):
|
||||
stage: str
|
||||
providers: List[Dict[str, Any]]
|
||||
selected: int
|
||||
current_model: str
|
||||
current_provider: str
|
||||
user_provs: Optional[Dict[str, Any]]
|
||||
custom_provs: Optional[Dict[str, Any]]
|
||||
provider_data: Dict[str, Any]
|
||||
model_list: List[str]
|
||||
|
||||
|
||||
class _ApprovalState(TypedDict, total=False):
|
||||
command: str
|
||||
description: str
|
||||
choices: List[str]
|
||||
selected: int
|
||||
response_queue: "queue.Queue[str]"
|
||||
show_full: bool
|
||||
|
||||
|
||||
class _ClarifyState(TypedDict, total=False):
|
||||
question: str
|
||||
choices: List[str]
|
||||
selected: int
|
||||
response_queue: "queue.Queue[str]"
|
||||
|
||||
|
||||
_REASONING_TAGS = (
|
||||
"REASONING_SCRATCHPAD",
|
||||
"think",
|
||||
@@ -1756,7 +1728,7 @@ def _parse_skills_argument(skills: str | list[str] | tuple[str, ...] | None) ->
|
||||
return parsed
|
||||
|
||||
|
||||
def save_config_value(key_path: str, value: Any) -> bool:
|
||||
def save_config_value(key_path: str, value: any) -> bool:
|
||||
"""
|
||||
Save a value to the active config file at the specified key path.
|
||||
|
||||
@@ -2093,16 +2065,16 @@ class HermesCLI:
|
||||
self._interrupt_queue = queue.Queue()
|
||||
self._should_exit = False
|
||||
self._last_ctrl_c_time = 0
|
||||
self._clarify_state: Optional[_ClarifyState] = None
|
||||
self._clarify_state = None
|
||||
self._clarify_freetext = False
|
||||
self._clarify_deadline = 0
|
||||
self._sudo_state = None
|
||||
self._sudo_deadline = 0
|
||||
self._modal_input_snapshot = None
|
||||
self._approval_state: Optional[_ApprovalState] = None
|
||||
self._approval_state = None
|
||||
self._approval_deadline = 0
|
||||
self._approval_lock = threading.Lock()
|
||||
self._model_picker_state: Optional[_ModelPickerState] = None
|
||||
self._model_picker_state = None
|
||||
self._secret_state = None
|
||||
self._secret_deadline = 0
|
||||
self._spinner_text: str = "" # thinking spinner text for TUI
|
||||
@@ -6713,6 +6685,13 @@ class HermesCLI:
|
||||
print(f" ⚠ Port {_port} is not reachable at {cdp_url}")
|
||||
|
||||
os.environ["BROWSER_CDP_URL"] = cdp_url
|
||||
# Eagerly start the CDP supervisor so pending_dialogs + frame_tree
|
||||
# show up in the next browser_snapshot. No-op if already started.
|
||||
try:
|
||||
from tools.browser_tool import _ensure_cdp_supervisor # type: ignore[import-not-found]
|
||||
_ensure_cdp_supervisor("default")
|
||||
except Exception:
|
||||
pass
|
||||
print()
|
||||
print("🌐 Browser connected to live Chrome via CDP")
|
||||
print(f" Endpoint: {cdp_url}")
|
||||
@@ -6734,7 +6713,8 @@ class HermesCLI:
|
||||
if current:
|
||||
os.environ.pop("BROWSER_CDP_URL", None)
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
from tools.browser_tool import cleanup_all_browsers, _stop_cdp_supervisor
|
||||
_stop_cdp_supervisor("default")
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -7184,7 +7164,7 @@ class HermesCLI:
|
||||
logging.getLogger(noisy).setLevel(logging.WARNING)
|
||||
else:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
for quiet_logger in ('tools', 'run_agent', 'scripts.trajectory_compressor', 'cron', 'hermes_cli'):
|
||||
for quiet_logger in ('tools', 'run_agent', 'trajectory_compressor', 'cron', 'hermes_cli'):
|
||||
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
|
||||
|
||||
def _show_insights(self, command: str = "/insights"):
|
||||
|
||||
@@ -384,6 +384,7 @@ def create_job(
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new cron job.
|
||||
@@ -403,6 +404,9 @@ def create_job(
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
|
||||
Returns:
|
||||
The created job dict
|
||||
@@ -433,6 +437,8 @@ def create_job(
|
||||
normalized_base_url = normalized_base_url or None
|
||||
normalized_script = str(script).strip() if isinstance(script, str) else None
|
||||
normalized_script = normalized_script or None
|
||||
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
@@ -464,6 +470,7 @@ def create_job(
|
||||
# Delivery configuration
|
||||
"deliver": deliver,
|
||||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
"enabled_toolsets": normalized_toolsets,
|
||||
}
|
||||
|
||||
jobs = load_jobs()
|
||||
|
||||
+34
-3
@@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
|
||||
"""Resolve the toolset list for a cron job.
|
||||
|
||||
Precedence:
|
||||
1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
|
||||
Keeps the agent's job-scoped toolset override intact — #6130.
|
||||
2. Per-platform ``hermes tools`` config for the ``cron`` platform.
|
||||
Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
|
||||
so users can gate cron toolsets globally without recreating every job.
|
||||
3. ``None`` on any lookup failure — AIAgent loads the full default set
|
||||
(legacy behavior before this change, preserved as the safety net).
|
||||
|
||||
_DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
|
||||
``_get_platform_tools`` for unconfigured platforms, so fresh installs
|
||||
get cron WITHOUT ``moa`` by default (issue reported by Norbert —
|
||||
surprise $4.63 run).
|
||||
"""
|
||||
per_job = job.get("enabled_toolsets")
|
||||
if per_job:
|
||||
return per_job
|
||||
try:
|
||||
from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load
|
||||
return sorted(_get_platform_tools(cfg or {}, "cron"))
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Cron toolset resolution failed, falling back to full default toolset: %s",
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
# Valid delivery platforms — used to validate user-supplied platform names
|
||||
# in cron delivery targets, preventing env var enumeration via crafted names.
|
||||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
@@ -439,9 +470,8 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
delivery_errors.append(msg)
|
||||
continue
|
||||
|
||||
error = result.get("error") if result else None
|
||||
if error:
|
||||
msg = f"delivery error: {error}"
|
||||
if result and result.get("error"):
|
||||
msg = f"delivery error: {result['error']}"
|
||||
logger.error("Job '%s': %s", job["id"], msg)
|
||||
delivery_errors.append(msg)
|
||||
continue
|
||||
@@ -887,6 +917,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
|
||||
|
||||
@@ -29,7 +29,7 @@ echo "📝 Logging to: $LOG_FILE"
|
||||
# Point to the example dataset in this directory
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
python scripts/batch_runner.py \
|
||||
python batch_runner.py \
|
||||
--dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
|
||||
--batch_size=5 \
|
||||
--run_name="browser_tasks_example" \
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# Generates tool-calling trajectories for multi-step web research tasks.
|
||||
#
|
||||
# Usage:
|
||||
# python scripts/batch_runner.py \
|
||||
# python batch_runner.py \
|
||||
# --config datagen-config-examples/web_research.yaml \
|
||||
# --run_name web_research_v1
|
||||
|
||||
|
||||
@@ -18,10 +18,7 @@ import logging
|
||||
import os
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from tools.budget_config import BudgetConfig
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
from model_tools import handle_function_call
|
||||
from tools.terminal_tool import get_active_env
|
||||
|
||||
@@ -32,7 +32,14 @@ import sqlite3
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
from aiohttp import web
|
||||
|
||||
try:
|
||||
from aiohttp import web
|
||||
AIOHTTP_AVAILABLE = True
|
||||
except ImportError:
|
||||
AIOHTTP_AVAILABLE = False
|
||||
web = None # type: ignore[assignment]
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
@@ -263,6 +270,12 @@ def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Respons
|
||||
status=400,
|
||||
)
|
||||
|
||||
|
||||
def check_api_server_requirements() -> bool:
|
||||
"""Check if API server dependencies are available."""
|
||||
return AIOHTTP_AVAILABLE
|
||||
|
||||
|
||||
class ResponseStore:
|
||||
"""
|
||||
SQLite-backed LRU store for Responses API state.
|
||||
@@ -378,26 +391,30 @@ _CORS_HEADERS = {
|
||||
}
|
||||
|
||||
|
||||
@web.middleware
|
||||
async def cors_middleware(request, handler):
|
||||
"""Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
|
||||
adapter = request.app.get("api_server_adapter")
|
||||
origin = request.headers.get("Origin", "")
|
||||
cors_headers = None
|
||||
if adapter is not None:
|
||||
if not adapter._origin_allowed(origin):
|
||||
return web.Response(status=403)
|
||||
cors_headers = adapter._cors_headers_for_origin(origin)
|
||||
if AIOHTTP_AVAILABLE:
|
||||
@web.middleware
|
||||
async def cors_middleware(request, handler):
|
||||
"""Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
|
||||
adapter = request.app.get("api_server_adapter")
|
||||
origin = request.headers.get("Origin", "")
|
||||
cors_headers = None
|
||||
if adapter is not None:
|
||||
if not adapter._origin_allowed(origin):
|
||||
return web.Response(status=403)
|
||||
cors_headers = adapter._cors_headers_for_origin(origin)
|
||||
|
||||
if request.method == "OPTIONS":
|
||||
if cors_headers is None:
|
||||
return web.Response(status=403)
|
||||
return web.Response(status=200, headers=cors_headers)
|
||||
if request.method == "OPTIONS":
|
||||
if cors_headers is None:
|
||||
return web.Response(status=403)
|
||||
return web.Response(status=200, headers=cors_headers)
|
||||
|
||||
response = await handler(request)
|
||||
if cors_headers is not None:
|
||||
response.headers.update(cors_headers)
|
||||
return response
|
||||
else:
|
||||
cors_middleware = None # type: ignore[assignment]
|
||||
|
||||
response = await handler(request)
|
||||
if cors_headers is not None:
|
||||
response.headers.update(cors_headers)
|
||||
return response
|
||||
|
||||
def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
|
||||
"""OpenAI-style error envelope."""
|
||||
@@ -411,18 +428,21 @@ def _openai_error(message: str, err_type: str = "invalid_request_error", param:
|
||||
}
|
||||
|
||||
|
||||
@web.middleware
|
||||
async def body_limit_middleware(request, handler):
|
||||
"""Reject overly large request bodies early based on Content-Length."""
|
||||
if request.method in ("POST", "PUT", "PATCH"):
|
||||
cl = request.headers.get("Content-Length")
|
||||
if cl is not None:
|
||||
try:
|
||||
if int(cl) > MAX_REQUEST_BYTES:
|
||||
return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
|
||||
except ValueError:
|
||||
return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
|
||||
return await handler(request)
|
||||
if AIOHTTP_AVAILABLE:
|
||||
@web.middleware
|
||||
async def body_limit_middleware(request, handler):
|
||||
"""Reject overly large request bodies early based on Content-Length."""
|
||||
if request.method in ("POST", "PUT", "PATCH"):
|
||||
cl = request.headers.get("Content-Length")
|
||||
if cl is not None:
|
||||
try:
|
||||
if int(cl) > MAX_REQUEST_BYTES:
|
||||
return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
|
||||
except ValueError:
|
||||
return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
|
||||
return await handler(request)
|
||||
else:
|
||||
body_limit_middleware = None # type: ignore[assignment]
|
||||
|
||||
_SECURITY_HEADERS = {
|
||||
"X-Content-Type-Options": "nosniff",
|
||||
@@ -430,13 +450,16 @@ _SECURITY_HEADERS = {
|
||||
}
|
||||
|
||||
|
||||
@web.middleware
|
||||
async def security_headers_middleware(request, handler):
|
||||
"""Add security headers to all responses (including errors)."""
|
||||
response = await handler(request)
|
||||
for k, v in _SECURITY_HEADERS.items():
|
||||
response.headers.setdefault(k, v)
|
||||
return response
|
||||
if AIOHTTP_AVAILABLE:
|
||||
@web.middleware
|
||||
async def security_headers_middleware(request, handler):
|
||||
"""Add security headers to all responses (including errors)."""
|
||||
response = await handler(request)
|
||||
for k, v in _SECURITY_HEADERS.items():
|
||||
response.headers.setdefault(k, v)
|
||||
return response
|
||||
else:
|
||||
security_headers_middleware = None # type: ignore[assignment]
|
||||
|
||||
|
||||
class _IdempotencyCache:
|
||||
@@ -781,7 +804,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
],
|
||||
})
|
||||
|
||||
async def _handle_chat_completions(self, request: "web.Request") -> "web.StreamResponse":
|
||||
async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/chat/completions — OpenAI Chat Completions format."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
@@ -1565,7 +1588,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_responses(self, request: "web.Request") -> "web.StreamResponse":
|
||||
async def _handle_responses(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/responses — OpenAI Responses API format."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
@@ -2459,6 +2482,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Start the aiohttp web server."""
|
||||
if not AIOHTTP_AVAILABLE:
|
||||
logger.warning("[%s] aiohttp not installed", self.name)
|
||||
return False
|
||||
|
||||
try:
|
||||
mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
|
||||
self._app = web.Application(middlewares=mws)
|
||||
|
||||
+291
-40
@@ -187,14 +187,16 @@ def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
if proxy_url.lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"aiohttp-socks is required for SOCKS proxy support. "
|
||||
"Install with: pip install hermes-agent[messaging]"
|
||||
) from None
|
||||
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return {}
|
||||
return {"proxy": proxy_url}
|
||||
|
||||
|
||||
@@ -218,14 +220,16 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
|
||||
if proxy_url.lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"aiohttp-socks is required for SOCKS proxy support. "
|
||||
"Install with: pip install hermes-agent[messaging]"
|
||||
) from None
|
||||
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}, {}
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}, {}
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return {}, {}
|
||||
return {}, {"proxy": proxy_url}
|
||||
|
||||
|
||||
@@ -424,7 +428,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
raise
|
||||
raise AssertionError("unreachable: retry loop exhausted")
|
||||
|
||||
|
||||
def cleanup_image_cache(max_age_hours: int = 24) -> int:
|
||||
@@ -539,7 +542,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
raise
|
||||
raise AssertionError("unreachable: retry loop exhausted")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -898,10 +900,16 @@ class BasePlatformAdapter(ABC):
|
||||
self._fatal_error_retryable = True
|
||||
self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
|
||||
|
||||
# Track active message handlers per session for interrupt support
|
||||
# Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
|
||||
# Track active message handlers per session for interrupt support.
|
||||
# _active_sessions stores the per-session interrupt Event; _session_tasks
|
||||
# maps session → the specific Task currently processing it so that
|
||||
# session-terminating commands (/stop, /new, /reset) can cancel the
|
||||
# right task and release the adapter-level guard deterministically.
|
||||
# Without the owner-task map, an old task's finally block could delete
|
||||
# a newer task's guard, leaving stale busy state.
|
||||
self._active_sessions: Dict[str, asyncio.Event] = {}
|
||||
self._pending_messages: Dict[str, MessageEvent] = {}
|
||||
self._session_tasks: Dict[str, asyncio.Task] = {}
|
||||
# Background message-processing tasks spawned by handle_message().
|
||||
# Gateway shutdown cancels these so an old gateway instance doesn't keep
|
||||
# working on a task after --replace or manual restarts.
|
||||
@@ -1678,6 +1686,222 @@ class BasePlatformAdapter(ABC):
|
||||
return f"{existing_text}\n\n{new_text}".strip()
|
||||
return existing_text
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session task + guard ownership helpers
|
||||
# ------------------------------------------------------------------
|
||||
# These were introduced together with the _session_tasks owner map to
|
||||
# make session lifecycle reconciliation deterministic across (a) the
|
||||
# normal completion path, (b) /stop/ /new/ /reset bypass commands,
|
||||
# and (c) stale-lock self-heal on the next inbound message.
|
||||
|
||||
def _release_session_guard(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
guard: Optional[asyncio.Event] = None,
|
||||
) -> None:
|
||||
"""Release the adapter-level guard for a session.
|
||||
|
||||
When ``guard`` is provided, only release the entry if it still points
|
||||
at that exact Event. This lets reset-like commands swap in a temporary
|
||||
guard while the old processing task unwinds, without having the old
|
||||
task's cleanup accidentally clear the replacement guard.
|
||||
"""
|
||||
current_guard = self._active_sessions.get(session_key)
|
||||
if current_guard is None:
|
||||
return
|
||||
if guard is not None and current_guard is not guard:
|
||||
return
|
||||
del self._active_sessions[session_key]
|
||||
|
||||
def _session_task_is_stale(self, session_key: str) -> bool:
|
||||
"""Return True if the owner task for ``session_key`` is done/cancelled.
|
||||
|
||||
A lock is "stale" when the adapter still has ``_active_sessions[key]``
|
||||
AND a known owner task in ``_session_tasks`` that has already exited.
|
||||
When there is no owner task at all, that usually means the guard was
|
||||
installed by some path other than handle_message() (tests sometimes
|
||||
install guards directly) — don't treat that as stale. The on-entry
|
||||
self-heal only needs to handle the production split-brain case where
|
||||
an owner task was recorded, then exited without clearing its guard.
|
||||
"""
|
||||
task = self._session_tasks.get(session_key)
|
||||
if task is None:
|
||||
return False
|
||||
done = getattr(task, "done", None)
|
||||
return bool(done and done())
|
||||
|
||||
def _heal_stale_session_lock(self, session_key: str) -> bool:
|
||||
"""Clear a stale session lock if the owner task is already gone.
|
||||
|
||||
Returns True if a stale lock was healed. Returns False if there is
|
||||
no lock, or the owner task is still alive (the normal busy case).
|
||||
|
||||
This is the on-entry safety net sidbin's issue #11016 analysis calls
|
||||
for: without it, a split-brain — adapter still thinks the session is
|
||||
active, but nothing is actually processing — traps the chat in
|
||||
infinite "Interrupting current task..." until the gateway is
|
||||
restarted.
|
||||
"""
|
||||
if session_key not in self._active_sessions:
|
||||
return False
|
||||
if not self._session_task_is_stale(session_key):
|
||||
return False
|
||||
logger.warning(
|
||||
"[%s] Healing stale session lock for %s (owner task is done/absent)",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
self._active_sessions.pop(session_key, None)
|
||||
self._pending_messages.pop(session_key, None)
|
||||
self._session_tasks.pop(session_key, None)
|
||||
return True
|
||||
|
||||
def _start_session_processing(
|
||||
self,
|
||||
event: MessageEvent,
|
||||
session_key: str,
|
||||
*,
|
||||
interrupt_event: Optional[asyncio.Event] = None,
|
||||
) -> bool:
|
||||
"""Spawn a background processing task under the given session guard.
|
||||
|
||||
Returns True on success. If the runtime stubs ``create_task`` with a
|
||||
non-Task sentinel (some tests do this), the guard is rolled back and
|
||||
False is returned so the caller isn't left holding a half-installed
|
||||
session lock.
|
||||
"""
|
||||
guard = interrupt_event or asyncio.Event()
|
||||
self._active_sessions[session_key] = guard
|
||||
|
||||
task = asyncio.create_task(self._process_message_background(event, session_key))
|
||||
self._session_tasks[session_key] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
# Tests stub create_task() with lightweight sentinels that are not
|
||||
# hashable and do not support lifecycle callbacks.
|
||||
self._session_tasks.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=guard)
|
||||
return False
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
return True
|
||||
|
||||
async def cancel_session_processing(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
release_guard: bool = True,
|
||||
discard_pending: bool = True,
|
||||
) -> None:
|
||||
"""Cancel in-flight processing for a single session.
|
||||
|
||||
``release_guard=False`` keeps the adapter-level session guard in place
|
||||
so reset-like commands can finish atomically before follow-up messages
|
||||
are allowed to start a fresh background task.
|
||||
"""
|
||||
task = self._session_tasks.pop(session_key, None)
|
||||
if task is not None and not task.done():
|
||||
logger.debug(
|
||||
"[%s] Cancelling active processing for session %s",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Session cancellation raised while unwinding %s",
|
||||
self.name,
|
||||
session_key,
|
||||
exc_info=True,
|
||||
)
|
||||
if discard_pending:
|
||||
self._pending_messages.pop(session_key, None)
|
||||
if release_guard:
|
||||
self._release_session_guard(session_key)
|
||||
|
||||
async def _drain_pending_after_session_command(
|
||||
self,
|
||||
session_key: str,
|
||||
command_guard: asyncio.Event,
|
||||
) -> None:
|
||||
"""Resume the latest queued follow-up once a session command completes.
|
||||
|
||||
Called at the tail of /stop, /new, and /reset dispatch. Releases the
|
||||
command-scoped guard, then — if a follow-up message landed while the
|
||||
command was running — spawns a fresh processing task for it.
|
||||
"""
|
||||
pending_event = self._pending_messages.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
if pending_event is None:
|
||||
return
|
||||
self._start_session_processing(pending_event, session_key)
|
||||
|
||||
async def _dispatch_active_session_command(
|
||||
self,
|
||||
event: MessageEvent,
|
||||
session_key: str,
|
||||
cmd: str,
|
||||
) -> None:
|
||||
"""Dispatch a reset-like bypass command while preserving guard ordering.
|
||||
|
||||
/stop, /new, and /reset must:
|
||||
1. Keep the session guard installed while the runner processes the
|
||||
command (so a racing follow-up message stays queued, not
|
||||
dispatched as a second parallel run).
|
||||
2. Cancel the old in-flight adapter task only AFTER the runner has
|
||||
finished handling the command (so the runner sees consistent
|
||||
state and its response is sent in order).
|
||||
3. Release the command-scoped guard and drain the latest queued
|
||||
follow-up exactly once, after 1 and 2 complete.
|
||||
"""
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name,
|
||||
cmd,
|
||||
session_key,
|
||||
)
|
||||
|
||||
current_guard = self._active_sessions.get(session_key)
|
||||
command_guard = asyncio.Event()
|
||||
self._active_sessions[session_key] = command_guard
|
||||
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
|
||||
try:
|
||||
response = await self._message_handler(event)
|
||||
# Old adapter task (if any) is cancelled AFTER the runner has
|
||||
# fully handled the command — keeps ordering deterministic.
|
||||
await self.cancel_session_processing(
|
||||
session_key,
|
||||
release_guard=False,
|
||||
discard_pending=False,
|
||||
)
|
||||
if response:
|
||||
await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=response,
|
||||
reply_to=event.message_id,
|
||||
metadata=thread_meta,
|
||||
)
|
||||
except Exception:
|
||||
# On failure, restore the original guard if one still exists so
|
||||
# we don't leave the session in a half-reset state.
|
||||
if self._active_sessions.get(session_key) is command_guard:
|
||||
if session_key in self._session_tasks and current_guard is not None:
|
||||
self._active_sessions[session_key] = current_guard
|
||||
else:
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
raise
|
||||
|
||||
await self._drain_pending_after_session_command(session_key, command_guard)
|
||||
|
||||
async def handle_message(self, event: MessageEvent) -> None:
|
||||
"""
|
||||
Process an incoming message.
|
||||
@@ -1694,7 +1918,15 @@ class BasePlatformAdapter(ABC):
|
||||
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
|
||||
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
|
||||
)
|
||||
|
||||
|
||||
# On-entry self-heal: if the adapter still has an _active_sessions
|
||||
# entry for this key but the owner task has already exited (done or
|
||||
# cancelled), the lock is stale. Clear it and fall through to
|
||||
# normal dispatch so the user isn't trapped behind a dead guard —
|
||||
# this is the split-brain tail described in issue #11016.
|
||||
if session_key in self._active_sessions:
|
||||
self._heal_stale_session_lock(session_key)
|
||||
|
||||
# Check if there's already an active handler for this session
|
||||
if session_key in self._active_sessions:
|
||||
# Certain commands must bypass the active-session guard and be
|
||||
@@ -1711,6 +1943,23 @@ class BasePlatformAdapter(ABC):
|
||||
from hermes_cli.commands import should_bypass_active_session
|
||||
|
||||
if should_bypass_active_session(cmd):
|
||||
# /stop, /new, /reset must cancel the in-flight adapter task
|
||||
# and preserve ordering of queued follow-ups. Route those
|
||||
# through the dedicated handoff path that serializes
|
||||
# cancellation + runner response + pending drain.
|
||||
if cmd in ("stop", "new", "reset"):
|
||||
try:
|
||||
await self._dispatch_active_session_command(event, session_key, cmd)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[%s] Command '/%s' dispatch failed: %s",
|
||||
self.name, cmd, e, exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
# Other bypass commands (/approve, /deny, /status,
|
||||
# /background, /restart) just need direct dispatch — they
|
||||
# don't cancel the running task.
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name, cmd, session_key,
|
||||
@@ -1756,19 +2005,9 @@ class BasePlatformAdapter(ABC):
|
||||
# starts would also pass the _active_sessions check and spawn a
|
||||
# duplicate task. (grammY sequentialize / aiogram EventIsolation
|
||||
# pattern — set the guard synchronously, not inside the task.)
|
||||
self._active_sessions[session_key] = asyncio.Event()
|
||||
|
||||
# Spawn background task to process this message
|
||||
task = asyncio.create_task(self._process_message_background(event, session_key))
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
# Some tests stub create_task() with lightweight sentinels that are not
|
||||
# hashable and do not support lifecycle callbacks.
|
||||
return
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
# _start_session_processing installs the guard AND the owner-task
|
||||
# mapping atomically so stale-lock detection works.
|
||||
self._start_session_processing(event, session_key)
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
@@ -1829,11 +2068,8 @@ class BasePlatformAdapter(ABC):
|
||||
try:
|
||||
await self._run_processing_hook("on_processing_start", event)
|
||||
|
||||
handler = self._message_handler
|
||||
if handler is None:
|
||||
return
|
||||
|
||||
response = await handler(event)
|
||||
# Call the handler (this can take a while with tool calls)
|
||||
response = await self._message_handler(event)
|
||||
|
||||
# Send response if any. A None/empty response is normal when
|
||||
# streaming already delivered the text (already_sent=True) or
|
||||
@@ -2131,6 +2367,9 @@ class BasePlatformAdapter(ABC):
|
||||
drain_task = asyncio.create_task(
|
||||
self._process_message_background(late_pending, session_key)
|
||||
)
|
||||
# Hand ownership of the session to the drain task so stale-lock
|
||||
# detection keeps working while it runs.
|
||||
self._session_tasks[session_key] = drain_task
|
||||
try:
|
||||
self._background_tasks.add(drain_task)
|
||||
drain_task.add_done_callback(self._background_tasks.discard)
|
||||
@@ -2140,9 +2379,14 @@ class BasePlatformAdapter(ABC):
|
||||
# Leave _active_sessions[session_key] populated — the drain
|
||||
# task's own lifecycle will clean it up.
|
||||
else:
|
||||
# Clean up session tracking
|
||||
if session_key in self._active_sessions:
|
||||
del self._active_sessions[session_key]
|
||||
# Clean up session tracking. Guard-match both deletes so a
|
||||
# reset-like command that already swapped in its own
|
||||
# command_guard (and cancelled us) can't be accidentally
|
||||
# cleared by our unwind. The command owns the session now.
|
||||
current_task = asyncio.current_task()
|
||||
if current_task is not None and self._session_tasks.get(session_key) is current_task:
|
||||
del self._session_tasks[session_key]
|
||||
self._release_session_guard(session_key, guard=interrupt_event)
|
||||
|
||||
async def cancel_background_tasks(self) -> None:
|
||||
"""Cancel any in-flight background message-processing tasks.
|
||||
@@ -2172,6 +2416,7 @@ class BasePlatformAdapter(ABC):
|
||||
# will be in self._background_tasks now. Re-check.
|
||||
self._background_tasks.clear()
|
||||
self._expected_cancelled_tasks.clear()
|
||||
self._session_tasks.clear()
|
||||
self._pending_messages.clear()
|
||||
self._active_sessions.clear()
|
||||
|
||||
@@ -2195,6 +2440,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt: Optional[str] = None,
|
||||
chat_id_alt: Optional[str] = None,
|
||||
is_bot: bool = False,
|
||||
guild_id: Optional[str] = None,
|
||||
parent_chat_id: Optional[str] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> SessionSource:
|
||||
"""Helper to build a SessionSource for this platform."""
|
||||
# Normalize empty topic to None
|
||||
@@ -2212,6 +2460,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt=user_id_alt,
|
||||
chat_id_alt=chat_id_alt,
|
||||
is_bot=is_bot,
|
||||
guild_id=str(guild_id) if guild_id else None,
|
||||
parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
|
||||
message_id=str(message_id) if message_id else None,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -14,7 +14,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
@@ -377,7 +377,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
payload = {
|
||||
"addresses": [address],
|
||||
"message": message,
|
||||
"tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
|
||||
"tempGuid": f"temp-{datetime.utcnow().timestamp()}",
|
||||
}
|
||||
try:
|
||||
res = await self._api_post("/api/v1/chat/new", payload)
|
||||
@@ -417,7 +417,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
)
|
||||
payload: Dict[str, Any] = {
|
||||
"chatGuid": guid,
|
||||
"tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
|
||||
"tempGuid": f"temp-{datetime.utcnow().timestamp()}",
|
||||
"message": chunk,
|
||||
}
|
||||
if reply_to and self._private_api_enabled and self._helper_connected:
|
||||
|
||||
+209
-19
@@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
|
||||
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
|
||||
|
||||
try:
|
||||
import discord
|
||||
@@ -802,8 +803,27 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not self._client:
|
||||
return
|
||||
try:
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
|
||||
sync_policy = self._get_discord_command_sync_policy()
|
||||
if sync_policy == "off":
|
||||
logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
|
||||
return
|
||||
|
||||
if sync_policy == "bulk":
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
|
||||
return
|
||||
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
|
||||
logger.info(
|
||||
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
|
||||
self.name,
|
||||
summary["total"],
|
||||
summary["unchanged"],
|
||||
summary["updated"],
|
||||
summary["recreated"],
|
||||
summary["created"],
|
||||
summary["deleted"],
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
|
||||
except asyncio.CancelledError:
|
||||
@@ -811,6 +831,183 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)
|
||||
|
||||
def _get_discord_command_sync_policy(self) -> str:
|
||||
raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
|
||||
if raw in _DISCORD_COMMAND_SYNC_POLICIES:
|
||||
return raw
|
||||
if raw:
|
||||
logger.warning(
|
||||
"[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
|
||||
self.name,
|
||||
raw,
|
||||
)
|
||||
return "safe"
|
||||
|
||||
def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Reduce command payloads to the semantic fields Hermes manages."""
|
||||
contexts = payload.get("contexts")
|
||||
integration_types = payload.get("integration_types")
|
||||
return {
|
||||
"type": int(payload.get("type", 1) or 1),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"default_member_permissions": self._normalize_permissions(
|
||||
payload.get("default_member_permissions")
|
||||
),
|
||||
"dm_permission": bool(payload.get("dm_permission", True)),
|
||||
"nsfw": bool(payload.get("nsfw", False)),
|
||||
"contexts": sorted(int(c) for c in contexts) if contexts else None,
|
||||
"integration_types": (
|
||||
sorted(int(i) for i in integration_types) if integration_types else None
|
||||
),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_permissions(value: Any) -> Optional[str]:
|
||||
"""Discord emits default_member_permissions as str server-side but discord.py
|
||||
sets it as int locally. Normalize to str-or-None so the comparison is stable."""
|
||||
if value is None:
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
|
||||
"""Build a canonical-ready dict from an AppCommand.
|
||||
|
||||
discord.py's AppCommand.to_dict() does NOT include nsfw,
|
||||
dm_permission, or default_member_permissions (they live only on the
|
||||
attributes). Pull them from the attributes so the canonicalizer sees
|
||||
the real server-side values instead of defaults — otherwise any
|
||||
command using non-default permissions would diff on every startup.
|
||||
"""
|
||||
payload = dict(command.to_dict())
|
||||
nsfw = getattr(command, "nsfw", None)
|
||||
if nsfw is not None:
|
||||
payload["nsfw"] = bool(nsfw)
|
||||
guild_only = getattr(command, "guild_only", None)
|
||||
if guild_only is not None:
|
||||
payload["dm_permission"] = not bool(guild_only)
|
||||
default_permissions = getattr(command, "default_member_permissions", None)
|
||||
if default_permissions is not None:
|
||||
payload["default_member_permissions"] = getattr(
|
||||
default_permissions, "value", default_permissions
|
||||
)
|
||||
return payload
|
||||
|
||||
def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
"type": int(payload.get("type", 0) or 0),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"required": bool(payload.get("required", False)),
|
||||
"autocomplete": bool(payload.get("autocomplete", False)),
|
||||
"choices": [
|
||||
{
|
||||
"name": str(choice.get("name", "") or ""),
|
||||
"value": choice.get("value"),
|
||||
}
|
||||
for choice in payload.get("choices", []) or []
|
||||
if isinstance(choice, dict)
|
||||
],
|
||||
"channel_types": list(payload.get("channel_types", []) or []),
|
||||
"min_value": payload.get("min_value"),
|
||||
"max_value": payload.get("max_value"),
|
||||
"min_length": payload.get("min_length"),
|
||||
"max_length": payload.get("max_length"),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Fields supported by discord.py's edit_global_command route."""
|
||||
canonical = self._canonicalize_app_command_payload(payload)
|
||||
return {
|
||||
"name": canonical["name"],
|
||||
"description": canonical["description"],
|
||||
"options": canonical["options"],
|
||||
}
|
||||
|
||||
async def _safe_sync_slash_commands(self) -> Dict[str, int]:
|
||||
"""Diff existing global commands and only mutate the commands that changed."""
|
||||
if not self._client:
|
||||
return {
|
||||
"total": 0,
|
||||
"unchanged": 0,
|
||||
"updated": 0,
|
||||
"recreated": 0,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
|
||||
tree = self._client.tree
|
||||
app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
|
||||
if not app_id:
|
||||
raise RuntimeError("Discord application ID is unavailable for slash command sync")
|
||||
|
||||
desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
|
||||
desired_by_key = {
|
||||
(int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
|
||||
for payload in desired_payloads
|
||||
}
|
||||
existing_commands = await tree.fetch_commands()
|
||||
existing_by_key = {
|
||||
(
|
||||
int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
|
||||
str(command.name or "").lower(),
|
||||
): command
|
||||
for command in existing_commands
|
||||
}
|
||||
|
||||
unchanged = 0
|
||||
updated = 0
|
||||
recreated = 0
|
||||
created = 0
|
||||
deleted = 0
|
||||
http = self._client.http
|
||||
|
||||
for key, desired in desired_by_key.items():
|
||||
current = existing_by_key.pop(key, None)
|
||||
if current is None:
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
created += 1
|
||||
continue
|
||||
|
||||
current_existing_payload = self._existing_command_to_payload(current)
|
||||
current_payload = self._canonicalize_app_command_payload(current_existing_payload)
|
||||
desired_payload = self._canonicalize_app_command_payload(desired)
|
||||
if current_payload == desired_payload:
|
||||
unchanged += 1
|
||||
continue
|
||||
|
||||
if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
recreated += 1
|
||||
continue
|
||||
|
||||
await http.edit_global_command(app_id, current.id, desired)
|
||||
updated += 1
|
||||
|
||||
for current in existing_by_key.values():
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
deleted += 1
|
||||
|
||||
return {
|
||||
"total": len(desired_payloads),
|
||||
"unchanged": unchanged,
|
||||
"updated": updated,
|
||||
"recreated": recreated,
|
||||
"created": created,
|
||||
"deleted": deleted,
|
||||
}
|
||||
|
||||
async def _add_reaction(self, message: Any, emoji: str) -> bool:
|
||||
"""Add an emoji reaction to a Discord message."""
|
||||
if not message or not hasattr(message, "add_reaction"):
|
||||
@@ -1196,16 +1393,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
import base64
|
||||
|
||||
try:
|
||||
from mutagen.oggopus import OggOpus
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"mutagen is required for Discord voice messages. "
|
||||
"Install with: pip install hermes-agent[messaging]"
|
||||
) from None
|
||||
|
||||
duration_secs = 5.0
|
||||
try:
|
||||
from mutagen.oggopus import OggOpus
|
||||
info = OggOpus(audio_path)
|
||||
duration_secs = info.info.length
|
||||
except Exception:
|
||||
@@ -1898,7 +2088,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# Fetch full member list (requires members intent)
|
||||
try:
|
||||
members = guild.members
|
||||
if guild.member_count is not None and len(members) < guild.member_count:
|
||||
if len(members) < guild.member_count:
|
||||
members = [m async for m in guild.fetch_members(limit=None)]
|
||||
except Exception as e:
|
||||
logger.warning("Failed to fetch members for guild %s: %s", guild.name, e)
|
||||
@@ -2511,7 +2701,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if isinstance(skills, str):
|
||||
return [skills]
|
||||
if isinstance(skills, list) and skills:
|
||||
return list(dict.fromkeys(skills)) # ty: ignore[invalid-return-type] # dedup, preserve order
|
||||
return list(dict.fromkeys(skills)) # dedup, preserve order
|
||||
return None
|
||||
|
||||
def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
|
||||
@@ -3047,7 +3237,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
# Skip the mention check if the message is in a thread where
|
||||
# the bot has previously participated (auto-created or replied in).
|
||||
in_bot_thread = is_thread and thread_id is not None and thread_id in self._threads
|
||||
in_bot_thread = is_thread and thread_id in self._threads
|
||||
|
||||
if require_mention and not is_free_channel and not in_bot_thread:
|
||||
if self._client.user not in message.mentions and not mention_prefix:
|
||||
@@ -3066,6 +3256,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
parent_channel_id = str(message.channel.id)
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
@@ -3125,6 +3316,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
is_bot=getattr(message.author, "bot", False),
|
||||
guild_id=str(message.guild.id) if message.guild else None,
|
||||
parent_chat_id=parent_channel_id,
|
||||
message_id=str(message.id),
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
@@ -3640,9 +3834,7 @@ if DISCORD_AVAILABLE:
|
||||
)
|
||||
return
|
||||
|
||||
if interaction.data is None:
|
||||
return
|
||||
provider_slug = interaction.data["values"][0] # ty: ignore[invalid-key]
|
||||
provider_slug = interaction.data["values"][0]
|
||||
self._selected_provider = provider_slug
|
||||
provider = next(
|
||||
(p for p in self.providers if p["slug"] == provider_slug), None
|
||||
@@ -3676,10 +3868,8 @@ if DISCORD_AVAILABLE:
|
||||
)
|
||||
return
|
||||
|
||||
if interaction.data is None:
|
||||
return
|
||||
self.resolved = True
|
||||
model_id = interaction.data["values"][0] # ty: ignore[invalid-key]
|
||||
model_id = interaction.data["values"][0]
|
||||
|
||||
try:
|
||||
result_text = await self.on_model_selected(
|
||||
|
||||
@@ -532,7 +532,6 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send an image URL as part of an email body."""
|
||||
text = caption or ""
|
||||
|
||||
@@ -2170,8 +2170,8 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
ul_match = re.match(r"^[\s]*[-*+]\s+(.+)$", line)
|
||||
if ul_match:
|
||||
items = []
|
||||
while i < len(lines) and (m := re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i])):
|
||||
items.append(m.group(1))
|
||||
while i < len(lines) and re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]):
|
||||
items.append(re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]).group(1))
|
||||
i += 1
|
||||
li = "".join(f"<li>{item}</li>" for item in items)
|
||||
out_lines.append(f"<ul>{li}</ul>")
|
||||
@@ -2181,8 +2181,8 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
ol_match = re.match(r"^[\s]*\d+[.)]\s+(.+)$", line)
|
||||
if ol_match:
|
||||
items = []
|
||||
while i < len(lines) and (m := re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i])):
|
||||
items.append(m.group(1))
|
||||
while i < len(lines) and re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]):
|
||||
items.append(re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]).group(1))
|
||||
i += 1
|
||||
li = "".join(f"<li>{item}</li>" for item in items)
|
||||
out_lines.append(f"<ol>{li}</ol>")
|
||||
|
||||
@@ -1842,7 +1842,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
await asyncio.sleep(1.5 * (attempt + 1))
|
||||
else:
|
||||
raise
|
||||
raise AssertionError("unreachable: retry loop exhausted")
|
||||
|
||||
# Maximum time (seconds) to wait for reconnection before giving up on send.
|
||||
_RECONNECT_WAIT_SECONDS = 15.0
|
||||
|
||||
@@ -1690,7 +1690,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
await asyncio.sleep(1.5 * (attempt + 1))
|
||||
continue
|
||||
raise
|
||||
raise AssertionError("unreachable: retry loop exhausted")
|
||||
|
||||
async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
|
||||
"""Download a Slack file and return raw bytes, with retry."""
|
||||
@@ -1716,7 +1715,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
await asyncio.sleep(1.5 * (attempt + 1))
|
||||
continue
|
||||
raise
|
||||
raise AssertionError("unreachable: retry loop exhausted")
|
||||
|
||||
# ── Channel mention gating ─────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -25,10 +25,7 @@ import hmac
|
||||
import logging
|
||||
import os
|
||||
import urllib.parse
|
||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import aiohttp
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
|
||||
@@ -2820,8 +2820,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
|
||||
sticker = msg.sticker
|
||||
if sticker is None:
|
||||
return
|
||||
emoji = sticker.emoji or ""
|
||||
set_name = sticker.set_name or ""
|
||||
|
||||
|
||||
@@ -151,7 +151,7 @@ def _resolve_system_dns() -> set[str]:
|
||||
"""Return the IPv4 addresses that the OS resolver gives for api.telegram.org."""
|
||||
try:
|
||||
results = socket.getaddrinfo(_TELEGRAM_API_HOST, 443, socket.AF_INET)
|
||||
return {str(addr[4][0]) for addr in results}
|
||||
return {addr[4][0] for addr in results}
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
|
||||
@@ -703,8 +703,7 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
elif isinstance(appmsg.get("image"), dict):
|
||||
refs.append(("image", appmsg["image"]))
|
||||
|
||||
raw_quote = body.get("quote")
|
||||
quote = raw_quote if isinstance(raw_quote, dict) else {}
|
||||
quote = body.get("quote") if isinstance(body.get("quote"), dict) else {}
|
||||
quote_type = str(quote.get("msgtype") or "").lower()
|
||||
if quote_type == "image" and isinstance(quote.get("image"), dict):
|
||||
refs.append(("image", quote["image"]))
|
||||
|
||||
@@ -25,10 +25,7 @@ import subprocess
|
||||
|
||||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Any, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import aiohttp
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
from hermes_constants import get_hermes_dir
|
||||
|
||||
|
||||
+146
-67
@@ -1551,27 +1551,23 @@ class GatewayRunner:
|
||||
)
|
||||
return True
|
||||
|
||||
# --- Normal busy case (agent actively running a task) ---
|
||||
# The user sent a message while the agent is working. Interrupt the
|
||||
# agent immediately so it stops the current tool-calling loop and
|
||||
# processes the new message. The pending message is stored in the
|
||||
# adapter so the base adapter picks it up once the interrupted run
|
||||
# returns. A brief ack tells the user what's happening (debounced
|
||||
# to avoid spam when they fire multiple messages quickly).
|
||||
|
||||
# Normal busy case (agent actively running a task)
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
if not adapter:
|
||||
return False # let default path handle it
|
||||
|
||||
# Store the message so it's processed as the next turn after the
|
||||
# interrupt causes the current run to exit.
|
||||
# current run finishes (or is interrupted).
|
||||
from gateway.platforms.base import merge_pending_message_event
|
||||
merge_pending_message_event(adapter._pending_messages, session_key, event)
|
||||
|
||||
# Interrupt the running agent — this aborts in-flight tool calls and
|
||||
# causes the agent loop to exit at the next check point.
|
||||
is_queue_mode = self._busy_input_mode == "queue"
|
||||
|
||||
# If not in queue mode, interrupt the running agent immediately.
|
||||
# This aborts in-flight tool calls and causes the agent loop to exit
|
||||
# at the next check point.
|
||||
running_agent = self._running_agents.get(session_key)
|
||||
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||||
if not is_queue_mode and running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||||
try:
|
||||
running_agent.interrupt(event.text)
|
||||
except Exception:
|
||||
@@ -1583,7 +1579,7 @@ class GatewayRunner:
|
||||
now = time.time()
|
||||
last_ack = self._busy_ack_ts.get(session_key, 0)
|
||||
if now - last_ack < _BUSY_ACK_COOLDOWN:
|
||||
return True # interrupt sent, ack already delivered recently
|
||||
return True # interrupt sent (if not queue), ack already delivered recently
|
||||
|
||||
self._busy_ack_ts[session_key] = now
|
||||
|
||||
@@ -1608,10 +1604,16 @@ class GatewayRunner:
|
||||
pass
|
||||
|
||||
status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
|
||||
message = (
|
||||
f"⚡ Interrupting current task{status_detail}. "
|
||||
f"I'll respond to your message shortly."
|
||||
)
|
||||
if is_queue_mode:
|
||||
message = (
|
||||
f"⏳ Queued for the next turn{status_detail}. "
|
||||
f"I'll respond once the current task finishes."
|
||||
)
|
||||
else:
|
||||
message = (
|
||||
f"⚡ Interrupting current task{status_detail}. "
|
||||
f"I'll respond to your message shortly."
|
||||
)
|
||||
|
||||
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
try:
|
||||
@@ -2560,6 +2562,40 @@ class GatewayRunner:
|
||||
return
|
||||
|
||||
async def _stop_impl() -> None:
|
||||
def _kill_tool_subprocesses(phase: str) -> None:
|
||||
"""Kill tool subprocesses + tear down terminal envs + browsers.
|
||||
|
||||
Called twice in the shutdown path: once eagerly after a
|
||||
drain timeout forces agent interrupt (so we reclaim bash/
|
||||
sleep children before systemd TimeoutStopSec escalates to
|
||||
SIGKILL on the cgroup — #8202), and once as a final
|
||||
catch-all at the end of _stop_impl() for the graceful
|
||||
path or anything respawned mid-teardown.
|
||||
|
||||
All steps are best-effort; exceptions are swallowed so
|
||||
one subsystem's failure doesn't block the rest.
|
||||
"""
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
_killed = process_registry.kill_all()
|
||||
if _killed:
|
||||
logger.info(
|
||||
"Shutdown (%s): killed %d tool subprocess(es)",
|
||||
phase, _killed,
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug("process_registry.kill_all (%s) error: %s", phase, _e)
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception as _e:
|
||||
logger.debug("cleanup_all_environments (%s) error: %s", phase, _e)
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
cleanup_all_browsers()
|
||||
except Exception as _e:
|
||||
logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e)
|
||||
|
||||
logger.info(
|
||||
"Stopping gateway%s...",
|
||||
" for restart" if self._restart_requested else "",
|
||||
@@ -2621,6 +2657,16 @@ class GatewayRunner:
|
||||
self._update_runtime_status("draining")
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Kill lingering tool subprocesses NOW, before we spend more
|
||||
# budget on adapter disconnect / session DB close. Under
|
||||
# systemd (TimeoutStopSec bounded by drain_timeout+headroom),
|
||||
# deferring this to the end of stop() risks systemd escalating
|
||||
# to SIGKILL on the cgroup first — at which point bash/sleep
|
||||
# children left behind by an interrupted terminal tool get
|
||||
# killed by systemd instead of us (issue #8202). The final
|
||||
# catch-all cleanup below still runs for the graceful path.
|
||||
_kill_tool_subprocesses("post-interrupt")
|
||||
|
||||
if self._restart_requested and self._restart_detached:
|
||||
try:
|
||||
await self._launch_detached_restart_command()
|
||||
@@ -2656,22 +2702,13 @@ class GatewayRunner:
|
||||
self._shutdown_event.set()
|
||||
|
||||
# Global cleanup: kill any remaining tool subprocesses not tied
|
||||
# to a specific agent (catch-all for zombie prevention).
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
process_registry.kill_all()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
# to a specific agent (catch-all for zombie prevention). On the
|
||||
# drain-timeout path we already did this earlier after agent
|
||||
# interrupt — this second call catches (a) the graceful path
|
||||
# where drain succeeded without interrupt, and (b) anything
|
||||
# that got respawned between the earlier call and adapter
|
||||
# disconnect (defense in depth; safe to call repeatedly).
|
||||
_kill_tool_subprocesses("final-cleanup")
|
||||
|
||||
# Close SQLite session DBs so the WAL write lock is released.
|
||||
# Without this, --replace and similar restart flows leave the
|
||||
@@ -2859,12 +2896,10 @@ class GatewayRunner:
|
||||
return MatrixAdapter(config)
|
||||
|
||||
elif platform == Platform.API_SERVER:
|
||||
try:
|
||||
import aiohttp # noqa: F401
|
||||
except ImportError:
|
||||
from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
|
||||
if not check_api_server_requirements():
|
||||
logger.warning("API Server: aiohttp not installed")
|
||||
return None
|
||||
from gateway.platforms.api_server import APIServerAdapter
|
||||
return APIServerAdapter(config)
|
||||
|
||||
elif platform == Platform.WEBHOOK:
|
||||
@@ -4431,10 +4466,9 @@ class GatewayRunner:
|
||||
# is speaking, without needing a separate tool call.
|
||||
# -----------------------------------------------------------------
|
||||
if source.platform == Platform.DISCORD:
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
adapter = self.adapters.get(Platform.DISCORD)
|
||||
guild_id = self._get_guild_id(event)
|
||||
if guild_id and isinstance(adapter, DiscordAdapter):
|
||||
if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
|
||||
vc_context = adapter.get_voice_channel_context(guild_id)
|
||||
if vc_context:
|
||||
context_prompt += f"\n\n{vc_context}"
|
||||
@@ -5877,7 +5911,7 @@ class GatewayRunner:
|
||||
available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
|
||||
return f"Unknown personality: `{args}`\n\nAvailable: {available}"
|
||||
|
||||
async def _handle_retry_command(self, event: MessageEvent) -> Optional[str]:
|
||||
async def _handle_retry_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /retry command - re-send the last user message."""
|
||||
source = event.source
|
||||
session_entry = self.session_store.get_or_create_session(source)
|
||||
@@ -6027,10 +6061,9 @@ class GatewayRunner:
|
||||
"all": "TTS (voice reply to all messages)",
|
||||
}
|
||||
# Append voice channel info if connected
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
guild_id = self._get_guild_id(event)
|
||||
if guild_id and isinstance(adapter, DiscordAdapter):
|
||||
if guild_id and hasattr(adapter, "get_voice_channel_info"):
|
||||
info = adapter.get_voice_channel_info(guild_id)
|
||||
if info:
|
||||
lines = [
|
||||
@@ -6061,9 +6094,8 @@ class GatewayRunner:
|
||||
|
||||
async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
|
||||
"""Join the user's current Discord voice channel."""
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
if not isinstance(adapter, DiscordAdapter):
|
||||
if not hasattr(adapter, "join_voice_channel"):
|
||||
return "Voice channels are not supported on this platform."
|
||||
|
||||
guild_id = self._get_guild_id(event)
|
||||
@@ -6078,8 +6110,10 @@ class GatewayRunner:
|
||||
|
||||
# Wire callbacks BEFORE join so voice input arriving immediately
|
||||
# after connection is not lost.
|
||||
adapter._voice_input_callback = self._handle_voice_channel_input
|
||||
adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
|
||||
if hasattr(adapter, "_voice_input_callback"):
|
||||
adapter._voice_input_callback = self._handle_voice_channel_input
|
||||
if hasattr(adapter, "_on_voice_disconnect"):
|
||||
adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
|
||||
|
||||
try:
|
||||
success = await adapter.join_voice_channel(voice_channel)
|
||||
@@ -6096,7 +6130,8 @@ class GatewayRunner:
|
||||
|
||||
if success:
|
||||
adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
|
||||
adapter._voice_sources[guild_id] = event.source.to_dict()
|
||||
if hasattr(adapter, "_voice_sources"):
|
||||
adapter._voice_sources[guild_id] = event.source.to_dict()
|
||||
self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
|
||||
self._save_voice_modes()
|
||||
self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
|
||||
@@ -6110,14 +6145,13 @@ class GatewayRunner:
|
||||
|
||||
async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
|
||||
"""Leave the Discord voice channel."""
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
guild_id = self._get_guild_id(event)
|
||||
|
||||
if not guild_id or not isinstance(adapter, DiscordAdapter):
|
||||
if not guild_id or not hasattr(adapter, "leave_voice_channel"):
|
||||
return "Not in a voice channel."
|
||||
|
||||
if not adapter.is_in_voice_channel(guild_id):
|
||||
if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
|
||||
return "Not in a voice channel."
|
||||
|
||||
try:
|
||||
@@ -6128,7 +6162,8 @@ class GatewayRunner:
|
||||
self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off"
|
||||
self._save_voice_modes()
|
||||
self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
|
||||
adapter._voice_input_callback = None
|
||||
if hasattr(adapter, "_voice_input_callback"):
|
||||
adapter._voice_input_callback = None
|
||||
return "Left voice channel."
|
||||
|
||||
def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
|
||||
@@ -6288,13 +6323,13 @@ class GatewayRunner:
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
|
||||
# If connected to a voice channel, play there instead of sending a file
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
guild_id = self._get_guild_id(event)
|
||||
if (guild_id
|
||||
and isinstance(adapter, DiscordAdapter)
|
||||
and hasattr(adapter, "play_in_voice_channel")
|
||||
and hasattr(adapter, "is_in_voice_channel")
|
||||
and adapter.is_in_voice_channel(guild_id)):
|
||||
await adapter.play_in_voice_channel(guild_id, actual_path)
|
||||
elif adapter:
|
||||
elif adapter and hasattr(adapter, "send_voice"):
|
||||
send_kwargs: Dict[str, Any] = {
|
||||
"chat_id": event.source.chat_id,
|
||||
"audio_path": actual_path,
|
||||
@@ -8667,7 +8702,12 @@ class GatewayRunner:
|
||||
override = self._session_model_overrides.get(session_key)
|
||||
return override is not None and override.get("model") == agent_model
|
||||
|
||||
def _release_running_agent_state(self, session_key: str) -> None:
|
||||
def _release_running_agent_state(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
run_generation: Optional[int] = None,
|
||||
) -> bool:
|
||||
"""Pop ALL per-running-agent state entries for ``session_key``.
|
||||
|
||||
Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
|
||||
@@ -8683,13 +8723,25 @@ class GatewayRunner:
|
||||
across turns (``_session_model_overrides``, ``_voice_mode``,
|
||||
``_pending_approvals``, ``_update_prompt_pending``) is NOT
|
||||
touched here — those have their own lifecycles.
|
||||
|
||||
When ``run_generation`` is provided, only clear the slot if that
|
||||
generation is still current for the session. This prevents an
|
||||
older async run whose generation was bumped by /stop or /new from
|
||||
clobbering a newer run's state during its own unwind. Returns
|
||||
True when the slot was cleared, False when an ownership guard
|
||||
blocked it.
|
||||
"""
|
||||
if not session_key:
|
||||
return
|
||||
return False
|
||||
if run_generation is not None and not self._is_session_run_current(
|
||||
session_key, run_generation
|
||||
):
|
||||
return False
|
||||
self._running_agents.pop(session_key, None)
|
||||
self._running_agents_ts.pop(session_key, None)
|
||||
if hasattr(self, "_busy_ack_ts"):
|
||||
self._busy_ack_ts.pop(session_key, None)
|
||||
return True
|
||||
|
||||
def _clear_session_boundary_security_state(self, session_key: str) -> None:
|
||||
"""Clear approval state that must not survive a real conversation switch."""
|
||||
@@ -10251,10 +10303,24 @@ class GatewayRunner:
|
||||
# Wait for agent to be created
|
||||
while agent_holder[0] is None:
|
||||
await asyncio.sleep(0.05)
|
||||
if session_key:
|
||||
self._running_agents[session_key] = agent_holder[0]
|
||||
if self._draining:
|
||||
self._update_runtime_status("draining")
|
||||
if not session_key:
|
||||
return
|
||||
# Only promote the sentinel to the real agent if this run is still
|
||||
# current. If /stop or /new bumped the generation while we were
|
||||
# spinning up, leave the newer run's slot alone — we'll be
|
||||
# discarded by the stale-result check in _handle_message_with_agent.
|
||||
if run_generation is not None and not self._is_session_run_current(
|
||||
session_key, run_generation
|
||||
):
|
||||
logger.info(
|
||||
"Skipping stale agent promotion for %s — generation %s is no longer current",
|
||||
(session_key or "")[:20],
|
||||
run_generation,
|
||||
)
|
||||
return
|
||||
self._running_agents[session_key] = agent_holder[0]
|
||||
if self._draining:
|
||||
self._update_runtime_status("draining")
|
||||
|
||||
tracking_task = asyncio.create_task(track_agent())
|
||||
|
||||
@@ -10309,9 +10375,9 @@ class GatewayRunner:
|
||||
# Periodic "still working" notifications for long-running tasks.
|
||||
# Fires every N seconds so the user knows the agent hasn't died.
|
||||
# Config: agent.gateway_notify_interval in config.yaml, or
|
||||
# HERMES_AGENT_NOTIFY_INTERVAL env var. Default 600s (10 min).
|
||||
# HERMES_AGENT_NOTIFY_INTERVAL env var. Default 180s (3 min).
|
||||
# 0 = disable notifications.
|
||||
_NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
|
||||
_NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180))
|
||||
_NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
|
||||
_notify_start = time.time()
|
||||
|
||||
@@ -10490,7 +10556,6 @@ class GatewayRunner:
|
||||
if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
|
||||
_timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)
|
||||
|
||||
assert _agent_timeout is not None # narrowed by _idle_secs >= _agent_timeout above
|
||||
_timeout_mins = int(_agent_timeout // 60) or 1
|
||||
|
||||
# Construct a user-facing message with diagnostic context.
|
||||
@@ -10609,7 +10674,7 @@ class GatewayRunner:
|
||||
pending = None
|
||||
|
||||
if pending_event or pending:
|
||||
logger.debug("Processing pending message: '%s...'", (pending or "")[:40])
|
||||
logger.debug("Processing pending message: '%s...'", pending[:40])
|
||||
|
||||
# Clear the adapter's interrupt event so the next _run_agent call
|
||||
# doesn't immediately re-trigger the interrupt before the new agent
|
||||
@@ -10628,6 +10693,8 @@ class GatewayRunner:
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if adapter and pending_event:
|
||||
merge_pending_message_event(adapter._pending_messages, session_key, pending_event)
|
||||
elif adapter and hasattr(adapter, 'queue_message'):
|
||||
adapter.queue_message(session_key, pending)
|
||||
return result_holder[0] or {"final_response": response, "messages": history}
|
||||
|
||||
was_interrupted = result.get("interrupted")
|
||||
@@ -10709,7 +10776,7 @@ class GatewayRunner:
|
||||
history=updated_history,
|
||||
)
|
||||
if next_message is None:
|
||||
return result # ty: ignore[invalid-return-type]
|
||||
return result
|
||||
next_message_id = getattr(pending_event, "message_id", None)
|
||||
next_channel_prompt = getattr(pending_event, "channel_prompt", None)
|
||||
|
||||
@@ -10759,7 +10826,14 @@ class GatewayRunner:
|
||||
# Clean up tracking
|
||||
tracking_task.cancel()
|
||||
if session_key:
|
||||
self._release_running_agent_state(session_key)
|
||||
# Only release the slot if this run's generation still owns
|
||||
# it. A /stop or /new that bumped the generation while we
|
||||
# were unwinding has already installed its own state; this
|
||||
# guard prevents an old run from clobbering it on the way
|
||||
# out.
|
||||
self._release_running_agent_state(
|
||||
session_key, run_generation=run_generation
|
||||
)
|
||||
if self._draining:
|
||||
self._update_runtime_status("draining")
|
||||
|
||||
@@ -10882,6 +10956,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
from gateway.status import (
|
||||
acquire_gateway_runtime_lock,
|
||||
get_running_pid,
|
||||
get_process_start_time,
|
||||
release_gateway_runtime_lock,
|
||||
remove_pid_file,
|
||||
terminate_pid,
|
||||
@@ -10889,6 +10964,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
existing_pid = get_running_pid()
|
||||
if existing_pid is not None and existing_pid != os.getpid():
|
||||
if replace:
|
||||
existing_start_time = get_process_start_time(existing_pid)
|
||||
logger.info(
|
||||
"Replacing existing gateway instance (PID %d) with --replace.",
|
||||
existing_pid,
|
||||
@@ -10957,7 +11033,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
# leaving stale lock files that block the new gateway from starting.
|
||||
try:
|
||||
from gateway.status import release_all_scoped_locks
|
||||
_released = release_all_scoped_locks()
|
||||
_released = release_all_scoped_locks(
|
||||
owner_pid=existing_pid,
|
||||
owner_start_time=existing_start_time,
|
||||
)
|
||||
if _released:
|
||||
logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
|
||||
except Exception:
|
||||
|
||||
+41
-9
@@ -83,6 +83,9 @@ class SessionSource:
|
||||
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
|
||||
guild_id: Optional[str] = None # Discord guild / Slack workspace / Matrix server scope
|
||||
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
|
||||
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -120,8 +123,14 @@ class SessionSource:
|
||||
d["user_id_alt"] = self.user_id_alt
|
||||
if self.chat_id_alt:
|
||||
d["chat_id_alt"] = self.chat_id_alt
|
||||
if self.guild_id:
|
||||
d["guild_id"] = self.guild_id
|
||||
if self.parent_chat_id:
|
||||
d["parent_chat_id"] = self.parent_chat_id
|
||||
if self.message_id:
|
||||
d["message_id"] = self.message_id
|
||||
return d
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
|
||||
return cls(
|
||||
@@ -135,6 +144,9 @@ class SessionSource:
|
||||
chat_topic=data.get("chat_topic"),
|
||||
user_id_alt=data.get("user_id_alt"),
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
guild_id=data.get("guild_id"),
|
||||
parent_chat_id=data.get("parent_chat_id"),
|
||||
message_id=data.get("message_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -273,14 +285,34 @@ def build_session_context_prompt(
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.DISCORD:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
# The discord tool self-gates on DISCORD_BOT_TOKEN at registry
|
||||
# check time. Match that condition so the prompt stays honest:
|
||||
# with a token the agent has fetch_messages/search_members/
|
||||
# create_thread (and optionally discord_admin) and should know
|
||||
# the IDs it can call them with; without one it really is
|
||||
# limited to reading/replying via the gateway.
|
||||
if (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
|
||||
src = context.source
|
||||
id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
|
||||
if src.guild_id:
|
||||
id_lines.append(f" - Guild: `{src.guild_id}`")
|
||||
if src.thread_id and src.parent_chat_id:
|
||||
id_lines.append(f" - Parent channel: `{src.parent_chat_id}`")
|
||||
id_lines.append(f" - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
|
||||
else:
|
||||
id_lines.append(f" - Channel: `{src.chat_id}`")
|
||||
if src.message_id:
|
||||
id_lines.append(f" - Triggering message: `{src.message_id}`")
|
||||
lines.extend(id_lines)
|
||||
else:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
platforms_list = ["local (files on this machine)"]
|
||||
|
||||
+34
-3
@@ -113,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def get_process_start_time(pid: int) -> Optional[int]:
|
||||
"""Public wrapper for retrieving a process start time when available."""
|
||||
return _get_process_start_time(pid)
|
||||
|
||||
|
||||
def _read_process_cmdline(pid: int) -> Optional[str]:
|
||||
"""Return the process command line as a space-separated string."""
|
||||
cmdline_path = Path(f"/proc/{pid}/cmdline")
|
||||
@@ -562,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def release_all_scoped_locks() -> int:
|
||||
"""Remove all scoped lock files in the lock directory.
|
||||
def release_all_scoped_locks(
|
||||
*,
|
||||
owner_pid: Optional[int] = None,
|
||||
owner_start_time: Optional[int] = None,
|
||||
) -> int:
|
||||
"""Remove scoped lock files in the lock directory.
|
||||
|
||||
Called during --replace to clean up stale locks left by stopped/killed
|
||||
gateway processes that did not release their locks gracefully.
|
||||
gateway processes that did not release their locks gracefully. When an
|
||||
``owner_pid`` is provided, only lock records belonging to that gateway
|
||||
process are removed. ``owner_start_time`` further narrows the match to
|
||||
protect against PID reuse.
|
||||
|
||||
When no owner is provided, preserves the legacy behavior and removes every
|
||||
scoped lock file in the directory.
|
||||
|
||||
Returns the number of lock files removed.
|
||||
"""
|
||||
lock_dir = _get_lock_dir()
|
||||
removed = 0
|
||||
if lock_dir.exists():
|
||||
for lock_file in lock_dir.glob("*.lock"):
|
||||
if owner_pid is not None:
|
||||
record = _read_json_file(lock_file)
|
||||
if not isinstance(record, dict):
|
||||
continue
|
||||
try:
|
||||
record_pid = int(record.get("pid"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if record_pid != owner_pid:
|
||||
continue
|
||||
if (
|
||||
owner_start_time is not None
|
||||
and record.get("start_time") != owner_start_time
|
||||
):
|
||||
continue
|
||||
try:
|
||||
lock_file.unlink(missing_ok=True)
|
||||
removed += 1
|
||||
|
||||
@@ -11,5 +11,5 @@ Provides subcommands for:
|
||||
- hermes cron - Manage cron jobs
|
||||
"""
|
||||
|
||||
__version__ = "0.10.0"
|
||||
__release_date__ = "2026.4.16"
|
||||
__version__ = "0.11.0"
|
||||
__release_date__ = "2026.4.23"
|
||||
|
||||
+25
-11
@@ -619,7 +619,25 @@ def _oauth_trace(event: str, *, sequence_id: Optional[str] = None, **fields: Any
|
||||
# =============================================================================
|
||||
|
||||
def _auth_file_path() -> Path:
|
||||
return get_hermes_home() / "auth.json"
|
||||
path = get_hermes_home() / "auth.json"
|
||||
# Seat belt: if pytest is running and HERMES_HOME resolves to the real
|
||||
# user's auth store, refuse rather than silently corrupt it. This catches
|
||||
# tests that forgot to monkeypatch HERMES_HOME, tests invoked without the
|
||||
# hermetic conftest, or sandbox escapes via threads/subprocesses. In
|
||||
# production (no PYTEST_CURRENT_TEST) this is a single dict lookup.
|
||||
if os.environ.get("PYTEST_CURRENT_TEST"):
|
||||
real_home_auth = (Path.home() / ".hermes" / "auth.json").resolve(strict=False)
|
||||
try:
|
||||
resolved = path.resolve(strict=False)
|
||||
except Exception:
|
||||
resolved = path
|
||||
if resolved == real_home_auth:
|
||||
raise RuntimeError(
|
||||
f"Refusing to touch real user auth store during test run: {path}. "
|
||||
"Set HERMES_HOME to a tmp_path in your test fixture, or run "
|
||||
"via scripts/run_tests.sh for hermetic CI-parity env."
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
def _auth_lock_path() -> Path:
|
||||
@@ -768,20 +786,16 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di
|
||||
auth_store["active_provider"] = provider_id
|
||||
|
||||
|
||||
def read_credential_pool() -> Dict[str, Any]:
|
||||
"""Return the entire persisted credential pool."""
|
||||
def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Return the persisted credential pool, or one provider slice."""
|
||||
auth_store = _load_auth_store()
|
||||
pool = auth_store.get("credential_pool")
|
||||
if not isinstance(pool, dict):
|
||||
pool = {}
|
||||
return dict(pool)
|
||||
|
||||
|
||||
def read_provider_credentials(provider_id: str) -> List[Dict[str, Any]]:
|
||||
"""Return credential entries for a single provider."""
|
||||
pool = read_credential_pool()
|
||||
entries = pool.get(provider_id)
|
||||
return list(entries) if isinstance(entries, list) else []
|
||||
if provider_id is None:
|
||||
return dict(pool)
|
||||
provider_entries = pool.get(provider_id)
|
||||
return list(provider_entries) if isinstance(provider_entries, list) else []
|
||||
|
||||
|
||||
def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
|
||||
|
||||
+54
-1
@@ -238,6 +238,52 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
|
||||
return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}
|
||||
|
||||
|
||||
_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag"
|
||||
_latest_release_cache: Optional[tuple] = None # (tag, url) once resolved
|
||||
|
||||
|
||||
def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]:
|
||||
"""Return ``(tag, release_url)`` for the latest git tag, or None.
|
||||
|
||||
Local-only — runs ``git describe --tags --abbrev=0`` against the
|
||||
Hermes checkout. Cached per-process. Release URL always points at the
|
||||
canonical NousResearch/hermes-agent repo (forks don't get a link).
|
||||
"""
|
||||
global _latest_release_cache
|
||||
if _latest_release_cache is not None:
|
||||
return _latest_release_cache or None
|
||||
|
||||
repo_dir = repo_dir or _resolve_repo_dir()
|
||||
if repo_dir is None:
|
||||
_latest_release_cache = () # falsy sentinel — skip future lookups
|
||||
return None
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "describe", "--tags", "--abbrev=0"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=3,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
except Exception:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
tag = (result.stdout or "").strip()
|
||||
if not tag:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
url = f"{_RELEASE_URL_BASE}/{tag}"
|
||||
_latest_release_cache = (tag, url)
|
||||
return _latest_release_cache
|
||||
|
||||
|
||||
def format_banner_version_label() -> str:
|
||||
"""Return the version label shown in the startup banner title."""
|
||||
base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
|
||||
@@ -519,9 +565,16 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
agent_name = _skin_branding("agent_name", "Hermes Agent")
|
||||
title_color = _skin_color("banner_title", "#FFD700")
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
version_label = format_banner_version_label()
|
||||
release_info = get_latest_release_tag()
|
||||
if release_info:
|
||||
_tag, _url = release_info
|
||||
title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]"
|
||||
else:
|
||||
title_markup = f"[bold {title_color}]{version_label}[/]"
|
||||
outer_panel = Panel(
|
||||
layout_table,
|
||||
title=f"[bold {title_color}]{format_banner_version_label()}[/]",
|
||||
title=title_markup,
|
||||
border_style=border_color,
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
||||
@@ -276,7 +276,7 @@ def _get_ps_exe() -> str | None:
|
||||
global _ps_exe
|
||||
if _ps_exe is False:
|
||||
_ps_exe = _find_powershell()
|
||||
return _ps_exe if isinstance(_ps_exe, str) else None
|
||||
return _ps_exe
|
||||
|
||||
|
||||
def _windows_has_image() -> bool:
|
||||
@@ -387,8 +387,6 @@ def _wayland_save(dest: Path) -> bool:
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.debug("wl-paste not installed — Wayland clipboard unavailable")
|
||||
except ImportError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.debug("wl-paste clipboard extraction failed: %s", e)
|
||||
dest.unlink(missing_ok=True)
|
||||
@@ -397,17 +395,14 @@ def _wayland_save(dest: Path) -> bool:
|
||||
|
||||
def _convert_to_png(path: Path) -> bool:
|
||||
"""Convert an image file to PNG in-place (requires Pillow or ImageMagick)."""
|
||||
# Try Pillow first (likely installed in the venv)
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Pillow is required for clipboard image conversion. "
|
||||
"Install with: pip install hermes-agent[cli]"
|
||||
) from None
|
||||
try:
|
||||
img = Image.open(path)
|
||||
img.save(path, "PNG")
|
||||
return True
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug("Pillow BMP→PNG conversion failed: %s", e)
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ import os
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_CODEX_MODELS: List[str] = [
|
||||
"gpt-5.5",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.4",
|
||||
"gpt-5.3-codex",
|
||||
@@ -21,6 +22,7 @@ DEFAULT_CODEX_MODELS: List[str] = [
|
||||
]
|
||||
|
||||
_FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
|
||||
("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
|
||||
("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.3-codex", ("gpt-5.2-codex",)),
|
||||
|
||||
+75
-15
@@ -361,6 +361,15 @@ DEFAULT_CONFIG = {
|
||||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
# Max app-level retry attempts for API errors (connection drops,
|
||||
# provider timeouts, 5xx, etc.) before the agent surfaces the
|
||||
# failure. The OpenAI SDK already does its own low-level retries
|
||||
# (max_retries=2 default) for transient network errors; this is
|
||||
# the Hermes-level retry loop that wraps the whole call. Lower
|
||||
# this to 1 if you use fallback providers and want fast failover
|
||||
# on flaky primaries; raise it if you prefer to tolerate longer
|
||||
# provider hiccups on a single provider.
|
||||
"api_max_retries": 3,
|
||||
"service_tier": "",
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
@@ -375,7 +384,11 @@ DEFAULT_CONFIG = {
|
||||
# Periodic "still working" notification interval (seconds).
|
||||
# Sends a status message every N seconds so the user knows the
|
||||
# agent hasn't died during long tasks. 0 = disable notifications.
|
||||
"gateway_notify_interval": 600,
|
||||
# Lower values mean faster feedback on slow tasks but more chat
|
||||
# noise; 180s is a compromise that catches spinning weak-model runs
|
||||
# (60+ tool iterations with tiny output) before users assume the
|
||||
# bot is dead and /restart.
|
||||
"gateway_notify_interval": 180,
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
@@ -394,17 +407,23 @@ DEFAULT_CONFIG = {
|
||||
# (bash doesn't source bashrc in non-interactive login mode) or
|
||||
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
|
||||
# Paths support ``~`` / ``${VAR}``. Missing files are silently
|
||||
# skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
|
||||
# skipped. When empty, Hermes auto-sources ``~/.profile``,
|
||||
# ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
|
||||
# snapshot shell is bash (this is the ``auto_source_bashrc``
|
||||
# behaviour — disable with that key if you want strict login-only
|
||||
# semantics).
|
||||
"shell_init_files": [],
|
||||
# When true (default), Hermes sources ``~/.bashrc`` in the login
|
||||
# shell used to build the environment snapshot. This captures
|
||||
# PATH additions, shell functions, and aliases defined in the
|
||||
# user's bashrc — which a plain ``bash -l -c`` would otherwise
|
||||
# miss because bash skips bashrc in non-interactive login mode.
|
||||
# Turn this off if you have a bashrc that misbehaves when sourced
|
||||
# When true (default), Hermes sources the user's shell rc files
|
||||
# (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
|
||||
# login shell used to build the environment snapshot. This
|
||||
# captures PATH additions, shell functions, and aliases — which a
|
||||
# plain ``bash -l -c`` would otherwise miss because bash skips
|
||||
# bashrc in non-interactive login mode, and because a default
|
||||
# Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
|
||||
# sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
|
||||
# because ``n`` / ``nvm`` / ``asdf`` installers typically write
|
||||
# their PATH exports there without an interactivity guard. Turn
|
||||
# this off if your rc files misbehave when sourced
|
||||
# non-interactively (e.g. one that hard-exits on TTY checks).
|
||||
"auto_source_bashrc": True,
|
||||
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
@@ -447,6 +466,12 @@ DEFAULT_CONFIG = {
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||
# Active only when a CDP-capable backend is attached (Browserbase or
|
||||
# local Chrome via /browser connect). See
|
||||
# website/docs/developer-guide/browser-supervisor.md.
|
||||
"dialog_policy": "must_respond", # must_respond | auto_dismiss | auto_accept
|
||||
"dialog_timeout_s": 300, # Safety auto-dismiss after N seconds under must_respond
|
||||
"camofox": {
|
||||
# When true, Hermes sends a stable profile-scoped userId to Camofox
|
||||
# so the server maps it to a persistent Firefox profile automatically.
|
||||
@@ -467,7 +492,27 @@ DEFAULT_CONFIG = {
|
||||
# exceed this are rejected with guidance to use offset+limit.
|
||||
# 100K chars ≈ 25–35K tokens across typical tokenisers.
|
||||
"file_read_max_chars": 100_000,
|
||||
|
||||
|
||||
# Tool-output truncation thresholds. When terminal output or a
|
||||
# single read_file page exceeds these limits, Hermes truncates the
|
||||
# payload sent to the model (keeping head + tail for terminal,
|
||||
# enforcing pagination for read_file). Tuning these trades context
|
||||
# footprint against how much raw output the model can see in one
|
||||
# shot. Ported from anomalyco/opencode PR #23770.
|
||||
#
|
||||
# - max_bytes: terminal_tool output cap, in chars
|
||||
# (default 50_000 ≈ 12-15K tokens).
|
||||
# - max_lines: read_file pagination cap — the maximum `limit`
|
||||
# a single read_file call can request before
|
||||
# being clamped (default 2000).
|
||||
# - max_line_length: per-line cap applied when read_file emits a
|
||||
# line-numbered view (default 2000 chars).
|
||||
"tool_output": {
|
||||
"max_bytes": 50_000,
|
||||
"max_lines": 2000,
|
||||
"max_line_length": 2000,
|
||||
},
|
||||
|
||||
"compression": {
|
||||
"enabled": True,
|
||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||
@@ -720,6 +765,10 @@ DEFAULT_CONFIG = {
|
||||
"inherit_mcp_toolsets": True,
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s,
|
||||
# no ceiling). High-reasoning models on large tasks
|
||||
# (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
|
||||
# raise if children time out before producing output.
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
|
||||
@@ -754,6 +803,17 @@ DEFAULT_CONFIG = {
|
||||
"inline_shell": False,
|
||||
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
|
||||
"inline_shell_timeout": 10,
|
||||
# Run the keyword/pattern security scanner on skills the agent
|
||||
# writes via skill_manage (create/edit/patch). Off by default
|
||||
# because the agent can already execute the same code paths via
|
||||
# terminal() with no gate, so the scan adds friction (blocks
|
||||
# skills that mention risky keywords in prose) without meaningful
|
||||
# security. Turn on if you want the belt-and-suspenders — a
|
||||
# dangerous verdict will then surface as a tool error to the
|
||||
# agent, which can retry with the flagged content removed.
|
||||
# External hub installs (trusted/community sources) are always
|
||||
# scanned regardless of this setting.
|
||||
"guard_agent_created": False,
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
@@ -773,7 +833,7 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# discord / discord_admin tools: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
@@ -1274,7 +1334,7 @@ OPTIONAL_ENV_VARS = {
|
||||
"advanced": True,
|
||||
},
|
||||
"XIAOMI_API_KEY": {
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"prompt": "Xiaomi MiMo API Key",
|
||||
"url": "https://platform.xiaomimimo.com",
|
||||
"password": True,
|
||||
@@ -1904,7 +1964,7 @@ def get_missing_config_fields() -> List[Dict[str, Any]]:
|
||||
config = load_config()
|
||||
missing = []
|
||||
|
||||
def _check(defaults: Dict[str, Any], current: Dict[str, Any], prefix: str = ""):
|
||||
def _check(defaults: dict, current: dict, prefix: str = ""):
|
||||
for key, default_value in defaults.items():
|
||||
if key.startswith('_'):
|
||||
continue
|
||||
@@ -2146,8 +2206,8 @@ def check_config_version() -> Tuple[int, int]:
|
||||
Returns (current_version, latest_version).
|
||||
"""
|
||||
config = load_config()
|
||||
current = int(config.get("_config_version", 0))
|
||||
latest = int(DEFAULT_CONFIG.get("_config_version", 1))
|
||||
current = config.get("_config_version", 0)
|
||||
latest = DEFAULT_CONFIG.get("_config_version", 1)
|
||||
return current, latest
|
||||
|
||||
|
||||
@@ -2867,7 +2927,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
return results
|
||||
|
||||
|
||||
def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _deep_merge(base: dict, override: dict) -> dict:
|
||||
"""Recursively merge *override* into *base*, preserving nested defaults.
|
||||
|
||||
Keys in *override* take precedence. If both values are dicts the merge
|
||||
|
||||
@@ -18,7 +18,7 @@ import os
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
from typing import Any, Callable, Optional, Tuple
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
@@ -108,7 +108,7 @@ def wait_for_registration_success(
|
||||
device_code: str,
|
||||
interval: int = 3,
|
||||
expires_in: int = 7200,
|
||||
on_waiting: Optional[Callable[..., Any]] = None,
|
||||
on_waiting: Optional[callable] = None,
|
||||
) -> Tuple[str, str]:
|
||||
"""Block until the registration succeeds or times out.
|
||||
|
||||
|
||||
+62
-7
@@ -175,6 +175,60 @@ def _request_gateway_self_restart(pid: int) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
|
||||
"""Send SIGUSR1 to a gateway PID and wait for it to exit gracefully.
|
||||
|
||||
SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
|
||||
which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
|
||||
seconds), then exits with code 75. Both systemd (``Restart=on-failure``
|
||||
+ ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
|
||||
= false``) relaunch the process after the graceful exit.
|
||||
|
||||
This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
|
||||
which SIGKILL in-flight agents after a short timeout.
|
||||
|
||||
Args:
|
||||
pid: Gateway process PID (systemd MainPID, launchd PID, or bare
|
||||
process PID).
|
||||
drain_timeout: Seconds to wait for the process to exit after sending
|
||||
SIGUSR1. Should be slightly larger than the gateway's
|
||||
``agent.restart_drain_timeout`` to allow the drain loop to
|
||||
finish cleanly.
|
||||
|
||||
Returns:
|
||||
True if the PID was signalled and exited within the timeout.
|
||||
False if SIGUSR1 couldn't be sent or the process didn't exit in
|
||||
time (caller should fall back to a harder restart path).
|
||||
"""
|
||||
if not hasattr(signal, "SIGUSR1"):
|
||||
return False
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, signal.SIGUSR1)
|
||||
except ProcessLookupError:
|
||||
# Already gone — nothing to drain.
|
||||
return True
|
||||
except (PermissionError, OSError):
|
||||
return False
|
||||
|
||||
import time as _time
|
||||
|
||||
deadline = _time.monotonic() + max(drain_timeout, 1.0)
|
||||
while _time.monotonic() < deadline:
|
||||
try:
|
||||
os.kill(pid, 0) # signal 0 — probe liveness
|
||||
except ProcessLookupError:
|
||||
return True
|
||||
except PermissionError:
|
||||
# Process still exists but we can't signal it. Treat as alive
|
||||
# so the caller falls back.
|
||||
pass
|
||||
_time.sleep(0.5)
|
||||
# Drain didn't finish in time.
|
||||
return False
|
||||
|
||||
|
||||
def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
|
||||
if pid is None or pid <= 0:
|
||||
return
|
||||
@@ -1469,7 +1523,14 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
||||
path_entries.append(resolved_node_dir)
|
||||
|
||||
common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
|
||||
restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))
|
||||
# systemd's TimeoutStopSec must exceed the gateway's drain_timeout so
|
||||
# there's budget left for post-interrupt cleanup (tool subprocess kill,
|
||||
# adapter disconnect, session DB close) before systemd escalates to
|
||||
# SIGKILL on the cgroup — otherwise bash/sleep tool-call children left
|
||||
# by a force-interrupted agent get reaped by systemd instead of us
|
||||
# (#8202). 30s of headroom covers the worst case we've observed.
|
||||
_drain_timeout = int(_get_restart_drain_timeout() or 0)
|
||||
restart_timeout = max(60, _drain_timeout) + 30
|
||||
|
||||
if system:
|
||||
username, group_name, home_dir = _system_service_identity(run_as_user)
|
||||
@@ -3047,12 +3108,6 @@ def _setup_wecom():
|
||||
print_success("💬 WeCom configured!")
|
||||
|
||||
|
||||
def _setup_wecom_callback():
|
||||
"""Configure WeCom Callback (self-built app) via the standard platform setup."""
|
||||
wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom_callback")
|
||||
_setup_standard_platform(wecom_platform)
|
||||
|
||||
|
||||
def _is_service_installed() -> bool:
|
||||
"""Check if the gateway is installed as a system service."""
|
||||
if supports_systemd_services():
|
||||
|
||||
+144
-47
@@ -3984,7 +3984,18 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
pass
|
||||
|
||||
if mdev_models:
|
||||
model_list = mdev_models
|
||||
# Merge models.dev with curated list so newly added models
|
||||
# (not yet in models.dev) still appear in the picker.
|
||||
if curated:
|
||||
seen = {m.lower() for m in mdev_models}
|
||||
merged = list(mdev_models)
|
||||
for m in curated:
|
||||
if m.lower() not in seen:
|
||||
merged.append(m)
|
||||
seen.add(m.lower())
|
||||
model_list = merged
|
||||
else:
|
||||
model_list = mdev_models
|
||||
print(f" Found {len(model_list)} model(s) from models.dev registry")
|
||||
elif curated and len(curated) >= 8:
|
||||
# Curated list is substantial — use it directly, skip live probe
|
||||
@@ -5853,12 +5864,15 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
# Write exit code *before* the gateway restart attempt.
|
||||
# When running as ``hermes update --gateway`` (spawned by the gateway's
|
||||
# /update command), this process lives inside the gateway's systemd
|
||||
# cgroup. ``systemctl restart hermes-gateway`` kills everything in the
|
||||
# cgroup (KillMode=mixed → SIGKILL to remaining processes), including
|
||||
# us and the wrapping bash shell. The shell never reaches its
|
||||
# ``printf $status > .update_exit_code`` epilogue, so the exit-code
|
||||
# marker file is never created. The new gateway's update watcher then
|
||||
# polls for 30 minutes and sends a spurious timeout message.
|
||||
# cgroup. A graceful SIGUSR1 restart keeps the drain loop alive long
|
||||
# enough for the exit-code marker to be written below, but the
|
||||
# fallback ``systemctl restart`` path (see below) kills everything in
|
||||
# the cgroup (KillMode=mixed → SIGKILL to remaining processes),
|
||||
# including us and the wrapping bash shell. The shell never reaches
|
||||
# its ``printf $status > .update_exit_code`` epilogue, so the
|
||||
# exit-code marker file would never be created. The new gateway's
|
||||
# update watcher would then poll for 30 minutes and send a spurious
|
||||
# timeout message.
|
||||
#
|
||||
# Writing the marker here — after git pull + pip install succeed but
|
||||
# before we attempt the restart — ensures the new gateway sees it
|
||||
@@ -5880,9 +5894,37 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
_ensure_user_systemd_env,
|
||||
find_gateway_pids,
|
||||
_get_service_pids,
|
||||
_graceful_restart_via_sigusr1,
|
||||
)
|
||||
import signal as _signal
|
||||
|
||||
# Drain budget for graceful SIGUSR1 restarts. The gateway drains
|
||||
# for up to ``agent.restart_drain_timeout`` (default 60s) before
|
||||
# exiting with code 75; we wait slightly longer so the drain
|
||||
# completes before we fall back to a hard restart. On older
|
||||
# systemd units without SIGUSR1 wiring this wait just times out
|
||||
# and we fall back to ``systemctl restart`` (the old behaviour).
|
||||
try:
|
||||
from hermes_constants import (
|
||||
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN,
|
||||
)
|
||||
except Exception:
|
||||
_DEFAULT_DRAIN = 60.0
|
||||
_cfg_drain = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
_cfg_agent = (load_config().get("agent") or {})
|
||||
_cfg_drain = _cfg_agent.get("restart_drain_timeout")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
_drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
|
||||
except (TypeError, ValueError):
|
||||
_drain_budget = float(_DEFAULT_DRAIN)
|
||||
# Add a 15s margin so the drain loop + final exit finish before
|
||||
# we escalate to ``systemctl restart`` / SIGTERM.
|
||||
_drain_budget = max(_drain_budget, 30.0) + 15.0
|
||||
|
||||
restarted_services = []
|
||||
killed_pids = set()
|
||||
|
||||
@@ -5929,59 +5971,114 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if check.stdout.strip() == "active":
|
||||
restart = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
if check.stdout.strip() != "active":
|
||||
continue
|
||||
|
||||
# Prefer a graceful SIGUSR1 restart so in-flight
|
||||
# agent runs drain instead of being SIGKILLed.
|
||||
# The gateway's SIGUSR1 handler calls
|
||||
# request_restart(via_service=True) → drain →
|
||||
# exit(75); systemd's Restart=on-failure (and
|
||||
# RestartForceExitStatus=75) respawns the unit.
|
||||
_main_pid = 0
|
||||
try:
|
||||
_show = subprocess.run(
|
||||
scope_cmd + [
|
||||
"show", svc_name,
|
||||
"--property=MainPID", "--value",
|
||||
],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
_main_pid = int((_show.stdout or "").strip() or 0)
|
||||
except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
|
||||
_main_pid = 0
|
||||
|
||||
_graceful_ok = False
|
||||
if _main_pid > 0:
|
||||
print(
|
||||
f" → {svc_name}: draining (up to {int(_drain_budget)}s)..."
|
||||
)
|
||||
_graceful_ok = _graceful_restart_via_sigusr1(
|
||||
_main_pid, drain_timeout=_drain_budget,
|
||||
)
|
||||
|
||||
if _graceful_ok:
|
||||
# Gateway exited 75; systemd should relaunch
|
||||
# via Restart=on-failure. Verify the new
|
||||
# process came up.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
continue
|
||||
# Process exited but wasn't respawned (older
|
||||
# unit without Restart=on-failure or
|
||||
# RestartForceExitStatus=75). Fall through
|
||||
# to systemctl start/restart.
|
||||
print(
|
||||
f" ⚠ {svc_name} drained but didn't relaunch — forcing restart"
|
||||
)
|
||||
|
||||
# Fallback: blunt systemctl restart. This is
|
||||
# what the old code always did; we get here only
|
||||
# when the graceful path failed (unit missing
|
||||
# SIGUSR1 wiring, drain exceeded the budget,
|
||||
# restart-policy mismatch).
|
||||
restart = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
# Verify the service actually survived the
|
||||
# restart. systemctl restart returns 0 even
|
||||
# if the new process crashes immediately.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
timeout=5,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
# Verify the service actually survived the
|
||||
# restart. systemctl restart returns 0 even
|
||||
# if the new process crashes immediately.
|
||||
if verify.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
else:
|
||||
# Retry once — transient startup failures
|
||||
# (stale module cache, import race) often
|
||||
# resolve on the second attempt.
|
||||
print(
|
||||
f" ⚠ {svc_name} died after restart, retrying..."
|
||||
)
|
||||
retry = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
verify2 = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
if verify2.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
print(f" ✓ {svc_name} recovered on retry")
|
||||
else:
|
||||
# Retry once — transient startup failures
|
||||
# (stale module cache, import race) often
|
||||
# resolve on the second attempt.
|
||||
print(
|
||||
f" ⚠ {svc_name} died after restart, retrying..."
|
||||
f" ✗ {svc_name} failed to stay running after restart.\n"
|
||||
f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
|
||||
f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
|
||||
)
|
||||
retry = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
_time.sleep(3)
|
||||
verify2 = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify2.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
print(f" ✓ {svc_name} recovered on retry")
|
||||
else:
|
||||
print(
|
||||
f" ✗ {svc_name} failed to stay running after restart.\n"
|
||||
f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
|
||||
f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
|
||||
+142
-19
@@ -304,6 +304,113 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
|
||||
# Alias resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _model_sort_key(model_id: str, prefix: str) -> tuple:
|
||||
"""Sort key for model version preference.
|
||||
|
||||
Extracts version numbers after the family prefix and returns a sort key
|
||||
that prefers higher versions. Suffix tokens (``pro``, ``omni``, etc.)
|
||||
are used as tiebreakers, with common quality indicators ranked.
|
||||
|
||||
Examples (with prefix ``"mimo"``)::
|
||||
|
||||
mimo-v2.5-pro → (-2.5, 0, 'pro') # highest version wins
|
||||
mimo-v2.5 → (-2.5, 1, '') # no suffix = lower than pro
|
||||
mimo-v2-pro → (-2.0, 0, 'pro')
|
||||
mimo-v2-omni → (-2.0, 1, 'omni')
|
||||
mimo-v2-flash → (-2.0, 1, 'flash')
|
||||
"""
|
||||
# Strip the prefix (and optional "/" separator for aggregator slugs)
|
||||
rest = model_id[len(prefix):]
|
||||
if rest.startswith("/"):
|
||||
rest = rest[1:]
|
||||
rest = rest.lstrip("-").strip()
|
||||
|
||||
# Parse version and suffix from the remainder.
|
||||
# "v2.5-pro" → version [2.5], suffix "pro"
|
||||
# "-omni" → version [], suffix "omni"
|
||||
# State machine: start → in_version → between → in_suffix
|
||||
nums: list[float] = []
|
||||
suffix_buf = ""
|
||||
state = "start"
|
||||
num_buf = ""
|
||||
|
||||
for ch in rest:
|
||||
if state == "start":
|
||||
if ch in "vV":
|
||||
state = "in_version"
|
||||
elif ch.isdigit():
|
||||
state = "in_version"
|
||||
num_buf += ch
|
||||
elif ch in "-_.":
|
||||
pass # skip separators before any content
|
||||
else:
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "in_version":
|
||||
if ch.isdigit():
|
||||
num_buf += ch
|
||||
elif ch == ".":
|
||||
if "." in num_buf:
|
||||
# Second dot — flush current number, start new component
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
else:
|
||||
num_buf += ch
|
||||
elif ch in "-_.":
|
||||
if num_buf:
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
state = "between"
|
||||
else:
|
||||
if num_buf:
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "between":
|
||||
if ch.isdigit():
|
||||
state = "in_version"
|
||||
num_buf = ch
|
||||
elif ch in "vV":
|
||||
state = "in_version"
|
||||
elif ch in "-_.":
|
||||
pass
|
||||
else:
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "in_suffix":
|
||||
suffix_buf += ch
|
||||
|
||||
# Flush remaining buffer (strip trailing dots — "5.4." → "5.4")
|
||||
if num_buf and state == "in_version":
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
suffix = suffix_buf.lower().strip("-_.")
|
||||
suffix = suffix.strip()
|
||||
|
||||
# Negate versions so higher → sorts first
|
||||
version_key = tuple(-n for n in nums)
|
||||
|
||||
# Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite
|
||||
# Lower number = preferred
|
||||
_SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0}
|
||||
suffix_rank = _SUFFIX_RANK.get(suffix, 1)
|
||||
|
||||
return version_key + (suffix_rank, suffix)
|
||||
|
||||
|
||||
def resolve_alias(
|
||||
raw_input: str,
|
||||
current_provider: str,
|
||||
@@ -311,9 +418,9 @@ def resolve_alias(
|
||||
"""Resolve a short alias against the current provider's catalog.
|
||||
|
||||
Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
|
||||
current provider's models.dev catalog for the first model whose ID
|
||||
starts with ``vendor/family`` (or just ``family`` for non-aggregator
|
||||
providers).
|
||||
current provider's models.dev catalog for the model whose ID starts
|
||||
with ``vendor/family`` (or just ``family`` for non-aggregator
|
||||
providers) and has the **highest version**.
|
||||
|
||||
Returns:
|
||||
``(provider, resolved_model_id, alias_name)`` if a match is
|
||||
@@ -341,28 +448,44 @@ def resolve_alias(
|
||||
|
||||
vendor, family = identity
|
||||
|
||||
# Search the provider's catalog from models.dev
|
||||
# Build catalog from models.dev, then merge in static _PROVIDER_MODELS
|
||||
# entries that models.dev may be missing (e.g. newly added models not
|
||||
# yet synced to the registry).
|
||||
catalog = list_provider_models(current_provider)
|
||||
if not catalog:
|
||||
return None
|
||||
try:
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
static = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if static:
|
||||
seen = {m.lower() for m in catalog}
|
||||
for m in static:
|
||||
if m.lower() not in seen:
|
||||
catalog.append(m)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# For aggregators, models are vendor/model-name format
|
||||
aggregator = is_aggregator(current_provider)
|
||||
|
||||
for model_id in catalog:
|
||||
mid_lower = model_id.lower()
|
||||
if aggregator:
|
||||
# Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
|
||||
prefix = f"{vendor}/{family}".lower()
|
||||
if mid_lower.startswith(prefix):
|
||||
return (current_provider, model_id, key)
|
||||
else:
|
||||
# Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
|
||||
family_lower = family.lower()
|
||||
if mid_lower.startswith(family_lower):
|
||||
return (current_provider, model_id, key)
|
||||
if aggregator:
|
||||
prefix = f"{vendor}/{family}".lower()
|
||||
matches = [
|
||||
mid for mid in catalog
|
||||
if mid.lower().startswith(prefix)
|
||||
]
|
||||
else:
|
||||
family_lower = family.lower()
|
||||
matches = [
|
||||
mid for mid in catalog
|
||||
if mid.lower().startswith(family_lower)
|
||||
]
|
||||
|
||||
return None
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
# Sort by version descending — prefer the latest/highest version
|
||||
prefix_for_sort = f"{vendor}/{family}" if aggregator else family
|
||||
matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort))
|
||||
return (current_provider, matches[0], key)
|
||||
|
||||
|
||||
def get_authenticated_provider_slugs(
|
||||
|
||||
+24
-2
@@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
||||
# (model_id, display description shown in menus)
|
||||
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("moonshotai/kimi-k2.6", "recommended"),
|
||||
("deepseek/deepseek-v4-pro", ""),
|
||||
("deepseek/deepseek-v4-flash", ""),
|
||||
("anthropic/claude-opus-4.7", ""),
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("anthropic/claude-sonnet-4.6", ""),
|
||||
@@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]:
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"moonshotai/kimi-k2.6",
|
||||
"deepseek/deepseek-v4-pro",
|
||||
"deepseek/deepseek-v4-flash",
|
||||
"xiaomi/mimo-v2.5-pro",
|
||||
"xiaomi/mimo-v2.5",
|
||||
"anthropic/claude-opus-4.7",
|
||||
@@ -246,10 +250,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"claude-haiku-4-5-20251001",
|
||||
],
|
||||
"deepseek": [
|
||||
"deepseek-v4-pro",
|
||||
"deepseek-v4-flash",
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
],
|
||||
"xiaomi": [
|
||||
"mimo-v2.5-pro",
|
||||
"mimo-v2.5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"mimo-v2-flash",
|
||||
@@ -301,6 +309,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"kimi-k2.5",
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"mimo-v2.5-pro",
|
||||
"mimo-v2.5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"minimax-m2.7",
|
||||
@@ -692,7 +702,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
|
||||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
@@ -1674,7 +1684,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
if normalized == "openai-codex":
|
||||
from hermes_cli.codex_models import get_codex_model_ids
|
||||
|
||||
return get_codex_model_ids()
|
||||
# Pass the live OAuth access token so the picker matches whatever
|
||||
# ChatGPT lists for this account right now (new models appear without
|
||||
# a Hermes release). Falls back to the hardcoded catalog if no token
|
||||
# or the endpoint is unreachable.
|
||||
access_token = None
|
||||
try:
|
||||
from hermes_cli.auth import resolve_codex_runtime_credentials
|
||||
|
||||
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
|
||||
access_token = creds.get("api_key")
|
||||
except Exception:
|
||||
access_token = None
|
||||
return get_codex_model_ids(access_token=access_token)
|
||||
if normalized in {"copilot", "copilot-acp"}:
|
||||
try:
|
||||
live = _fetch_github_models(_resolve_copilot_catalog_api_key())
|
||||
|
||||
@@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
||||
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
|
||||
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
|
||||
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
|
||||
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
|
||||
])
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -103,7 +103,7 @@ _DEFAULT_PROVIDER_MODELS = {
|
||||
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
||||
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
||||
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
|
||||
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
|
||||
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
|
||||
"huggingface": [
|
||||
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
|
||||
|
||||
@@ -289,6 +289,7 @@ TIPS = [
|
||||
"When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
|
||||
"agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
|
||||
"agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
|
||||
"agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.",
|
||||
"The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
|
||||
"Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
|
||||
"The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
|
||||
|
||||
+48
-20
@@ -13,7 +13,7 @@ import json as _json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypedDict
|
||||
from typing import Dict, List, Optional, Set
|
||||
|
||||
|
||||
from hermes_cli.config import (
|
||||
@@ -67,12 +67,13 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("messaging", "📨 Cross-Platform Messaging", "send_message"),
|
||||
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
|
||||
("homeassistant", "🏠 Home Assistant", "smart home device control"),
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
|
||||
# but the setup checklist won't pre-select them for first-time users.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"}
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "discord_admin"}
|
||||
|
||||
|
||||
def _get_effective_configurable_toolsets():
|
||||
@@ -549,7 +550,7 @@ def _get_platform_tools(
|
||||
include_default_mcp_servers: bool = True,
|
||||
) -> Set[str]:
|
||||
"""Resolve which individual toolset names are enabled for a platform."""
|
||||
from toolsets import resolve_toolset
|
||||
from toolsets import resolve_toolset, TOOLSETS
|
||||
|
||||
platform_toolsets = config.get("platform_toolsets") or {}
|
||||
toolset_names = platform_toolsets.get(platform)
|
||||
@@ -563,6 +564,8 @@ def _get_platform_tools(
|
||||
toolset_names = [str(ts) for ts in toolset_names]
|
||||
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
|
||||
# If the saved list contains any configurable keys directly, the user
|
||||
# has explicitly configured this platform — use direct membership.
|
||||
@@ -585,16 +588,46 @@ def _get_platform_tools(
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if ts_tools and ts_tools.issubset(all_tool_names):
|
||||
enabled_toolsets.add(ts_key)
|
||||
|
||||
default_off = set(_DEFAULT_OFF_TOOLSETS)
|
||||
if platform in default_off:
|
||||
default_off.remove(platform)
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
|
||||
# feishu_drive). These are part of the platform's default composite but
|
||||
# absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
|
||||
# checklist or in a user-saved config. Must run in BOTH branches —
|
||||
# otherwise saving via `hermes tools` (which flips has_explicit_config
|
||||
# to True) silently drops them.
|
||||
platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
|
||||
configurable_tool_universe = set()
|
||||
for ck in configurable_keys:
|
||||
configurable_tool_universe.update(resolve_toolset(ck))
|
||||
claimed = set()
|
||||
for ts_key in enabled_toolsets:
|
||||
claimed.update(resolve_toolset(ts_key))
|
||||
skip = configurable_keys | plugin_ts_keys | platform_default_keys
|
||||
skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
|
||||
skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
|
||||
for ts_key, ts_def in TOOLSETS.items():
|
||||
if ts_key in skip:
|
||||
continue
|
||||
if ts_def.get("includes"):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
|
||||
continue
|
||||
if ts_tools.issubset(configurable_tool_universe):
|
||||
continue
|
||||
if not ts_tools.issubset(claimed):
|
||||
enabled_toolsets.add(ts_key)
|
||||
claimed.update(ts_tools)
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled.
|
||||
# A plugin toolset is "known" for a platform once `hermes tools`
|
||||
# has been saved for that platform (tracked via known_plugin_toolsets).
|
||||
# Unknown plugins default to enabled; known-but-absent = disabled.
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
if plugin_ts_keys:
|
||||
known_map = config.get("known_plugin_toolsets", {})
|
||||
known_for_platform = set(known_map.get(platform, []))
|
||||
@@ -609,7 +642,6 @@ def _get_platform_tools(
|
||||
|
||||
# Preserve any explicit non-configurable toolset entries (for example,
|
||||
# custom toolsets or MCP server names saved in platform_toolsets).
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
explicit_passthrough = {
|
||||
ts
|
||||
for ts in toolset_names
|
||||
@@ -669,6 +701,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
|
||||
if not isinstance(existing_toolsets, list):
|
||||
existing_toolsets = []
|
||||
existing_toolsets = [str(ts) for ts in existing_toolsets]
|
||||
|
||||
# Preserve any entries that are NOT configurable toolsets and NOT platform
|
||||
# defaults (i.e. only MCP server names should be preserved)
|
||||
@@ -676,6 +709,8 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
entry for entry in existing_toolsets
|
||||
if entry not in configurable_keys and entry not in platform_default_keys
|
||||
}
|
||||
if "no_mcp" not in enabled_toolset_keys:
|
||||
preserved_entries.discard("no_mcp")
|
||||
|
||||
# Merge preserved entries with new enabled toolsets
|
||||
config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
|
||||
@@ -748,7 +783,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
|
||||
OpenAI-format tool schema. Triggers tool discovery on first call,
|
||||
then caches the result for the rest of the process.
|
||||
|
||||
Returns an empty dict when the registry is unavailable.
|
||||
Returns an empty dict when tiktoken or the registry is unavailable.
|
||||
"""
|
||||
global _tool_token_cache
|
||||
if _tool_token_cache is not None:
|
||||
@@ -756,12 +791,11 @@ def _estimate_tool_tokens() -> Dict[str, int]:
|
||||
|
||||
try:
|
||||
import tiktoken
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"tiktoken is required for tool token estimation. "
|
||||
"Install with: pip install hermes-agent[cli]"
|
||||
) from None
|
||||
enc = tiktoken.get_encoding("cl100k_base")
|
||||
enc = tiktoken.get_encoding("cl100k_base")
|
||||
except Exception:
|
||||
logger.debug("tiktoken unavailable; skipping tool token estimation")
|
||||
_tool_token_cache = {}
|
||||
return _tool_token_cache
|
||||
|
||||
try:
|
||||
# Trigger full tool discovery (imports all tool modules).
|
||||
@@ -1099,19 +1133,13 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
|
||||
# right catalog at picker time.
|
||||
|
||||
|
||||
class _ImagegenBackend(TypedDict):
|
||||
display: str
|
||||
config_key: str
|
||||
catalog_fn: Callable[[], Tuple[Dict[str, Dict[str, Any]], str]]
|
||||
|
||||
|
||||
def _fal_model_catalog() -> Tuple[Dict[str, Dict[str, Any]], str]:
|
||||
def _fal_model_catalog():
|
||||
"""Lazy-load the FAL model catalog from the tool module."""
|
||||
from tools.image_generation_tool import FAL_MODELS, DEFAULT_MODEL
|
||||
return FAL_MODELS, DEFAULT_MODEL
|
||||
|
||||
|
||||
IMAGEGEN_BACKENDS: Dict[str, _ImagegenBackend] = {
|
||||
IMAGEGEN_BACKENDS = {
|
||||
"fal": {
|
||||
"display": "FAL.ai",
|
||||
"config_key": "image_gen",
|
||||
|
||||
@@ -0,0 +1,548 @@
|
||||
"""Process-wide voice recording + TTS API for the TUI gateway.
|
||||
|
||||
Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
|
||||
(text-to-speech) behind idempotent, stateful entry points that the gateway's
|
||||
``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
|
||||
call from a dedicated thread. The gateway imports this module lazily so that
|
||||
missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
|
||||
an ``ImportError`` at call time, not at startup.
|
||||
|
||||
Two usage modes are exposed:
|
||||
|
||||
* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
|
||||
manually-bounded capture used when the caller drives the start/stop pair
|
||||
explicitly.
|
||||
* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
|
||||
the classic CLI voice mode: recording auto-stops on silence, transcribes,
|
||||
hands the result to a callback, and then auto-restarts for the next turn.
|
||||
Three consecutive no-speech cycles stop the loop and fire
|
||||
``on_silent_limit`` so the UI can turn the mode off.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from tools.voice_mode import (
|
||||
create_audio_recorder,
|
||||
is_whisper_hallucination,
|
||||
play_audio_file,
|
||||
transcribe_recording,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _debug(msg: str) -> None:
|
||||
"""Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
|
||||
|
||||
Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
|
||||
which createGatewayEventHandler shows as an Activity line — exactly
|
||||
what we need to diagnose "why didn't the loop auto-restart?" in the
|
||||
user's real terminal without shipping a separate debug RPC.
|
||||
|
||||
Any OSError / BrokenPipeError is swallowed because this fires from
|
||||
background threads (silence callback, TTS daemon, beep) where a
|
||||
broken stderr pipe must not kill the whole gateway — the main
|
||||
command pipe (stdin+stdout) is what actually matters.
|
||||
"""
|
||||
if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
|
||||
return
|
||||
try:
|
||||
print(f"[voice] {msg}", file=sys.stderr, flush=True)
|
||||
except (BrokenPipeError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _beeps_enabled() -> bool:
|
||||
"""CLI parity: voice.beep_enabled in config.yaml (default True)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
voice_cfg = load_config().get("voice", {})
|
||||
if isinstance(voice_cfg, dict):
|
||||
return bool(voice_cfg.get("beep_enabled", True))
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def _play_beep(frequency: int, count: int = 1) -> None:
|
||||
"""Audible cue matching cli.py's record/stop beeps.
|
||||
|
||||
880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
|
||||
660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
|
||||
Best-effort — sounddevice failures are silently swallowed so the
|
||||
voice loop never breaks because a speaker was unavailable.
|
||||
"""
|
||||
if not _beeps_enabled():
|
||||
return
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
|
||||
play_beep(frequency=frequency, count=count)
|
||||
except Exception as e:
|
||||
_debug(f"beep {frequency}Hz failed: {e}")
|
||||
|
||||
# ── Push-to-talk state ───────────────────────────────────────────────
|
||||
_recorder = None
|
||||
_recorder_lock = threading.Lock()
|
||||
|
||||
# ── Continuous (VAD) state ───────────────────────────────────────────
|
||||
_continuous_lock = threading.Lock()
|
||||
_continuous_active = False
|
||||
_continuous_recorder: Any = None
|
||||
|
||||
# ── TTS-vs-STT feedback guard ────────────────────────────────────────
|
||||
# When TTS plays the agent reply over the speakers, the live microphone
|
||||
# picks it up and transcribes the agent's own voice as user input — an
|
||||
# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
|
||||
# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
|
||||
# playing, set while silent. _continuous_on_silence waits on it before
|
||||
# re-arming the recorder, and speak_text itself cancels any live capture
|
||||
# before starting playback so the tail of the previous utterance doesn't
|
||||
# leak into the mic.
|
||||
_tts_playing = threading.Event()
|
||||
_tts_playing.set() # initially "not playing"
|
||||
_continuous_on_transcript: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_status: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_silent_limit: Optional[Callable[[], None]] = None
|
||||
_continuous_no_speech_count = 0
|
||||
_CONTINUOUS_NO_SPEECH_LIMIT = 3
|
||||
|
||||
|
||||
# ── Push-to-talk API ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_recording() -> None:
|
||||
"""Begin capturing from the default input device (push-to-talk).
|
||||
|
||||
Idempotent — calling again while a recording is in progress is a no-op.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
if _recorder is not None and getattr(_recorder, "is_recording", False):
|
||||
return
|
||||
rec = create_audio_recorder()
|
||||
rec.start()
|
||||
_recorder = rec
|
||||
|
||||
|
||||
def stop_and_transcribe() -> Optional[str]:
|
||||
"""Stop the active push-to-talk recording, transcribe, return text.
|
||||
|
||||
Returns ``None`` when no recording is active, when the microphone
|
||||
captured no speech, or when Whisper returned a known hallucination.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
rec = _recorder
|
||||
_recorder = None
|
||||
|
||||
if rec is None:
|
||||
return None
|
||||
|
||||
wav_path = rec.stop()
|
||||
if not wav_path:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
except Exception as e:
|
||||
logger.warning("voice transcription failed: %s", e)
|
||||
return None
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# transcribe_recording returns {"success": bool, "transcript": str, ...}
|
||||
# — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
|
||||
if not result.get("success"):
|
||||
return None
|
||||
text = (result.get("transcript") or "").strip()
|
||||
if not text or is_whisper_hallucination(text):
|
||||
return None
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# ── Continuous (VAD) API ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_continuous(
|
||||
on_transcript: Callable[[str], None],
|
||||
on_status: Optional[Callable[[str], None]] = None,
|
||||
on_silent_limit: Optional[Callable[[], None]] = None,
|
||||
silence_threshold: int = 200,
|
||||
silence_duration: float = 3.0,
|
||||
) -> None:
|
||||
"""Start a VAD-driven continuous recording loop.
|
||||
|
||||
The loop calls ``on_transcript(text)`` each time speech is detected and
|
||||
transcribed successfully, then auto-restarts. After
|
||||
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
|
||||
picked up at all) the loop stops itself and calls ``on_silent_limit``
|
||||
so the UI can reflect "voice off". Idempotent — calling while already
|
||||
active is a no-op.
|
||||
|
||||
``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
|
||||
``"idle"`` so the UI can show a live indicator.
|
||||
"""
|
||||
global _continuous_active, _continuous_recorder
|
||||
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if _continuous_active:
|
||||
_debug("start_continuous: already active — no-op")
|
||||
return
|
||||
_continuous_active = True
|
||||
_continuous_on_transcript = on_transcript
|
||||
_continuous_on_status = on_status
|
||||
_continuous_on_silent_limit = on_silent_limit
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if _continuous_recorder is None:
|
||||
_continuous_recorder = create_audio_recorder()
|
||||
|
||||
_continuous_recorder._silence_threshold = silence_threshold
|
||||
_continuous_recorder._silence_duration = silence_duration
|
||||
rec = _continuous_recorder
|
||||
|
||||
_debug(
|
||||
f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
|
||||
)
|
||||
|
||||
# CLI parity: single 880 Hz beep *before* opening the stream — placing
|
||||
# the beep after stream.start() on macOS triggers a CoreAudio conflict
|
||||
# (cli.py:7528 comment).
|
||||
_play_beep(frequency=880, count=1)
|
||||
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to start continuous recording: %s", e)
|
||||
_debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
raise
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def stop_continuous() -> None:
|
||||
"""Stop the active continuous loop and release the microphone.
|
||||
|
||||
Idempotent — calling while not active is a no-op. Any in-flight
|
||||
transcription completes but its result is discarded (the callback
|
||||
checks ``_continuous_active`` before firing).
|
||||
"""
|
||||
global _continuous_active, _continuous_on_transcript
|
||||
global _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_recorder, _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
return
|
||||
_continuous_active = False
|
||||
rec = _continuous_recorder
|
||||
on_status = _continuous_on_status
|
||||
_continuous_on_transcript = None
|
||||
_continuous_on_status = None
|
||||
_continuous_on_silent_limit = None
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if rec is not None:
|
||||
try:
|
||||
# cancel() (not stop()) discards buffered frames — the loop
|
||||
# is over, we don't want to transcribe a half-captured turn.
|
||||
rec.cancel()
|
||||
except Exception as e:
|
||||
logger.warning("failed to cancel recorder: %s", e)
|
||||
|
||||
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
|
||||
# silence-auto-stop path plays).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def is_continuous_active() -> bool:
|
||||
"""Whether a continuous voice loop is currently running."""
|
||||
with _continuous_lock:
|
||||
return _continuous_active
|
||||
|
||||
|
||||
def _continuous_on_silence() -> None:
|
||||
"""AudioRecorder silence callback — runs in a daemon thread.
|
||||
|
||||
Stops the current capture, transcribes, delivers the text via
|
||||
``on_transcript``, and — if the loop is still active — starts the
|
||||
next capture. Three consecutive silent cycles end the loop.
|
||||
"""
|
||||
global _continuous_active, _continuous_no_speech_count
|
||||
|
||||
_debug("_continuous_on_silence: fired")
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: loop inactive — abort")
|
||||
return
|
||||
rec = _continuous_recorder
|
||||
on_transcript = _continuous_on_transcript
|
||||
on_status = _continuous_on_status
|
||||
on_silent_limit = _continuous_on_silent_limit
|
||||
|
||||
if rec is None:
|
||||
_debug("_continuous_on_silence: no recorder — abort")
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("transcribing")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
wav_path = rec.stop()
|
||||
# Peak RMS is the critical diagnostic when stop() returns None despite
|
||||
# the VAD firing — tells us at a glance whether the mic was too quiet
|
||||
# for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
|
||||
peak_rms = getattr(rec, "_peak_rms", -1)
|
||||
_debug(
|
||||
f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
|
||||
)
|
||||
|
||||
# CLI parity: double 660 Hz beep after the stream stops (safe from the
|
||||
# CoreAudio conflict that blocks pre-start beeps).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
transcript: Optional[str] = None
|
||||
|
||||
if wav_path:
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
# transcribe_recording returns {"success": bool, "transcript": str,
|
||||
# "error": str?} — NOT {"text": str}. Using the wrong key silently
|
||||
# produced empty transcripts even when Groq/local STT returned fine,
|
||||
# which masqueraded as "not hearing the user" to the caller.
|
||||
success = bool(result.get("success"))
|
||||
text = (result.get("transcript") or "").strip()
|
||||
err = result.get("error")
|
||||
_debug(
|
||||
f"_continuous_on_silence: transcribe -> success={success} "
|
||||
f"text={text!r} err={err!r}"
|
||||
)
|
||||
if success and text and not is_whisper_hallucination(text):
|
||||
transcript = text
|
||||
except Exception as e:
|
||||
logger.warning("continuous transcription failed: %s", e)
|
||||
_debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
# User stopped us while we were transcribing — discard.
|
||||
_debug("_continuous_on_silence: stopped during transcribe — no restart")
|
||||
return
|
||||
if transcript:
|
||||
_continuous_no_speech_count = 0
|
||||
else:
|
||||
_continuous_no_speech_count += 1
|
||||
should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
|
||||
no_speech = _continuous_no_speech_count
|
||||
|
||||
if transcript and on_transcript:
|
||||
try:
|
||||
on_transcript(transcript)
|
||||
except Exception as e:
|
||||
logger.warning("on_transcript callback raised: %s", e)
|
||||
|
||||
if should_halt:
|
||||
_debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
_continuous_no_speech_count = 0
|
||||
if on_silent_limit:
|
||||
try:
|
||||
on_silent_limit()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
rec.cancel()
|
||||
except Exception:
|
||||
pass
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
|
||||
# finish before re-arming the mic, then leave a small gap to avoid
|
||||
# catching the tail of the speaker output. Without this the voice
|
||||
# loop becomes a feedback loop — the agent's spoken reply lands
|
||||
# back in the mic and gets re-submitted.
|
||||
if not _tts_playing.is_set():
|
||||
_debug("_continuous_on_silence: waiting for TTS to finish")
|
||||
_tts_playing.wait(timeout=60)
|
||||
import time as _time
|
||||
_time.sleep(0.3)
|
||||
|
||||
# User may have stopped the loop during the wait.
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: stopped while waiting for TTS")
|
||||
return
|
||||
|
||||
# Restart for the next turn.
|
||||
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
||||
_play_beep(frequency=880, count=1)
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to restart continuous recording: %s", e)
|
||||
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── TTS API ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def speak_text(text: str) -> None:
|
||||
"""Synthesize ``text`` with the configured TTS provider and play it.
|
||||
|
||||
Mirrors cli.py:_voice_speak_response exactly — same markdown strip
|
||||
pipeline, same 4000-char cap, same explicit mp3 output path, same
|
||||
MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
|
||||
of both extensions. Keeping these in sync means a voice-mode TTS
|
||||
session in the TUI sounds identical to one in the classic CLI.
|
||||
|
||||
While playback is in flight the module-level _tts_playing Event is
|
||||
cleared so the continuous-recording loop knows to wait before
|
||||
re-arming the mic (otherwise the agent's spoken reply feedback-loops
|
||||
through the microphone and the agent ends up replying to itself).
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return
|
||||
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
# Cancel any live capture before we open the speakers — otherwise the
|
||||
# last ~200ms of the user's turn tail + the first syllables of our TTS
|
||||
# both end up in the next recording window. The continuous loop will
|
||||
# re-arm itself after _tts_playing flips back (see _continuous_on_silence).
|
||||
paused_recording = False
|
||||
with _continuous_lock:
|
||||
if (
|
||||
_continuous_active
|
||||
and _continuous_recorder is not None
|
||||
and getattr(_continuous_recorder, "is_recording", False)
|
||||
):
|
||||
try:
|
||||
_continuous_recorder.cancel()
|
||||
paused_recording = True
|
||||
except Exception as e:
|
||||
logger.warning("failed to pause recorder for TTS: %s", e)
|
||||
|
||||
_tts_playing.clear()
|
||||
_debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
|
||||
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool
|
||||
|
||||
tts_text = text[:4000] if len(text) > 4000 else text
|
||||
tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text) # fenced code blocks
|
||||
tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text) # [text](url) → text
|
||||
tts_text = re.sub(r'https?://\S+', '', tts_text) # bare URLs
|
||||
tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold
|
||||
tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic
|
||||
tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # inline code
|
||||
tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers
|
||||
tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list bullets
|
||||
tts_text = re.sub(r'---+', '', tts_text) # horizontal rules
|
||||
tts_text = re.sub(r'\n{3,}', '\n\n', tts_text) # excess newlines
|
||||
tts_text = tts_text.strip()
|
||||
if not tts_text:
|
||||
return
|
||||
|
||||
# MP3 output path, pre-chosen so we can play the MP3 directly even
|
||||
# when text_to_speech_tool auto-converts to OGG for messaging
|
||||
# platforms. afplay's OGG support is flaky, MP3 always works.
|
||||
os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
|
||||
mp3_path = os.path.join(
|
||||
tempfile.gettempdir(),
|
||||
"hermes_voice",
|
||||
f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
|
||||
)
|
||||
|
||||
_debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
|
||||
text_to_speech_tool(text=tts_text, output_path=mp3_path)
|
||||
|
||||
if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
|
||||
_debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
|
||||
play_audio_file(mp3_path)
|
||||
try:
|
||||
os.unlink(mp3_path)
|
||||
ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
|
||||
if os.path.isfile(ogg_path):
|
||||
os.unlink(ogg_path)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
_debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
|
||||
except Exception as e:
|
||||
logger.warning("Voice TTS playback failed: %s", e)
|
||||
_debug(f"speak_text raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
_tts_playing.set()
|
||||
_debug("speak_text: TTS done")
|
||||
|
||||
# Re-arm the mic so the user can answer without pressing Ctrl+B.
|
||||
# Small delay lets the OS flush speaker output and afplay fully
|
||||
# release the audio device before sounddevice re-opens the input.
|
||||
if paused_recording:
|
||||
time.sleep(0.3)
|
||||
with _continuous_lock:
|
||||
if _continuous_active and _continuous_recorder is not None:
|
||||
try:
|
||||
_continuous_recorder.start(
|
||||
on_silence_stop=_continuous_on_silence
|
||||
)
|
||||
_debug("speak_text: recording resumed after TTS")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"failed to resume recorder after TTS: %s", e
|
||||
)
|
||||
+292
-21
@@ -71,6 +71,7 @@ app = FastAPI(title="Hermes Agent", version=__version__)
|
||||
# Injected into the SPA HTML so only the legitimate web UI can use it.
|
||||
# ---------------------------------------------------------------------------
|
||||
_SESSION_TOKEN = secrets.token_urlsafe(32)
|
||||
_SESSION_HEADER_NAME = "X-Hermes-Session-Token"
|
||||
|
||||
# Simple rate limiter for the reveal endpoint
|
||||
_reveal_timestamps: List[float] = []
|
||||
@@ -104,14 +105,29 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
|
||||
})
|
||||
|
||||
|
||||
def _require_token(request: Request) -> None:
|
||||
"""Validate the ephemeral session token. Raises 401 on mismatch.
|
||||
def _has_valid_session_token(request: Request) -> bool:
|
||||
"""True if the request carries a valid dashboard session token.
|
||||
|
||||
Uses ``hmac.compare_digest`` to prevent timing side-channels.
|
||||
The dedicated session header avoids collisions with reverse proxies that
|
||||
already use ``Authorization`` (for example Caddy ``basic_auth``). We still
|
||||
accept the legacy Bearer path for backward compatibility with older
|
||||
dashboard bundles.
|
||||
"""
|
||||
session_header = request.headers.get(_SESSION_HEADER_NAME, "")
|
||||
if session_header and hmac.compare_digest(
|
||||
session_header.encode(),
|
||||
_SESSION_TOKEN.encode(),
|
||||
):
|
||||
return True
|
||||
|
||||
auth = request.headers.get("authorization", "")
|
||||
expected = f"Bearer {_SESSION_TOKEN}"
|
||||
if not hmac.compare_digest(auth.encode(), expected.encode()):
|
||||
return hmac.compare_digest(auth.encode(), expected.encode())
|
||||
|
||||
|
||||
def _require_token(request: Request) -> None:
|
||||
"""Validate the ephemeral session token. Raises 401 on mismatch."""
|
||||
if not _has_valid_session_token(request):
|
||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||
|
||||
|
||||
@@ -205,9 +221,7 @@ async def auth_middleware(request: Request, call_next):
|
||||
"""Require the session token on all /api/ routes except the public list."""
|
||||
path = request.url.path
|
||||
if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
|
||||
auth = request.headers.get("authorization", "")
|
||||
expected = f"Bearer {_SESSION_TOKEN}"
|
||||
if not hmac.compare_digest(auth.encode(), expected.encode()):
|
||||
if not _has_valid_session_token(request):
|
||||
return JSONResponse(
|
||||
status_code=401,
|
||||
content={"detail": "Unauthorized"},
|
||||
@@ -417,7 +431,14 @@ class EnvVarReveal(BaseModel):
|
||||
|
||||
|
||||
_GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
|
||||
_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
|
||||
try:
|
||||
_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
|
||||
except (ValueError, TypeError):
|
||||
_log.warning(
|
||||
"Invalid GATEWAY_HEALTH_TIMEOUT value %r — using default 3.0s",
|
||||
os.getenv("GATEWAY_HEALTH_TIMEOUT"),
|
||||
)
|
||||
_GATEWAY_HEALTH_TIMEOUT = 3.0
|
||||
|
||||
|
||||
def _probe_gateway_health() -> tuple[bool, dict | None]:
|
||||
@@ -2304,8 +2325,227 @@ _BUILTIN_DASHBOARD_THEMES = [
|
||||
]
|
||||
|
||||
|
||||
def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]:
|
||||
"""Normalise a theme layer spec from YAML into `{hex, alpha}` form.
|
||||
|
||||
Accepts shorthand (a bare hex string) or full dict form. Returns
|
||||
``None`` on garbage input so the caller can fall back to a built-in
|
||||
default rather than blowing up.
|
||||
"""
|
||||
if value is None:
|
||||
return {"hex": default_hex, "alpha": default_alpha}
|
||||
if isinstance(value, str):
|
||||
return {"hex": value, "alpha": default_alpha}
|
||||
if isinstance(value, dict):
|
||||
hex_val = value.get("hex", default_hex)
|
||||
alpha_val = value.get("alpha", default_alpha)
|
||||
if not isinstance(hex_val, str):
|
||||
return None
|
||||
try:
|
||||
alpha_f = float(alpha_val)
|
||||
except (TypeError, ValueError):
|
||||
alpha_f = default_alpha
|
||||
return {"hex": hex_val, "alpha": max(0.0, min(1.0, alpha_f))}
|
||||
return None
|
||||
|
||||
|
||||
_THEME_DEFAULT_TYPOGRAPHY: Dict[str, str] = {
|
||||
"fontSans": 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
|
||||
"fontMono": 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace',
|
||||
"baseSize": "15px",
|
||||
"lineHeight": "1.55",
|
||||
"letterSpacing": "0",
|
||||
}
|
||||
|
||||
_THEME_DEFAULT_LAYOUT: Dict[str, str] = {
|
||||
"radius": "0.5rem",
|
||||
"density": "comfortable",
|
||||
}
|
||||
|
||||
_THEME_OVERRIDE_KEYS = {
|
||||
"card", "cardForeground", "popover", "popoverForeground",
|
||||
"primary", "primaryForeground", "secondary", "secondaryForeground",
|
||||
"muted", "mutedForeground", "accent", "accentForeground",
|
||||
"destructive", "destructiveForeground", "success", "warning",
|
||||
"border", "input", "ring",
|
||||
}
|
||||
|
||||
# Well-known named asset slots themes can populate. Any other keys under
|
||||
# ``assets.custom`` are exposed as ``--theme-asset-custom-<key>`` CSS vars
|
||||
# for plugin/shell use.
|
||||
_THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"}
|
||||
|
||||
# Component-style buckets themes can override. The value under each bucket
|
||||
# is a mapping from camelCase property name to CSS string; each pair emits
|
||||
# ``--component-<bucket>-<kebab-property>`` on :root. The frontend's shell
|
||||
# components (Card, App header, Backdrop, etc.) consume these vars so themes
|
||||
# can restyle chrome (clip-path, border-image, segmented progress, etc.)
|
||||
# without shipping their own CSS.
|
||||
_THEME_COMPONENT_BUCKETS = {
|
||||
"card", "header", "footer", "sidebar", "tab",
|
||||
"progress", "badge", "backdrop", "page",
|
||||
}
|
||||
|
||||
_THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"}
|
||||
|
||||
# Cap on customCSS length so a malformed/oversized theme YAML can't blow up
|
||||
# the response payload or the <style> tag. 32 KiB is plenty for every
|
||||
# practical reskin (the Strike Freedom demo is ~2 KiB).
|
||||
_THEME_CUSTOM_CSS_MAX = 32 * 1024
|
||||
|
||||
|
||||
def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Normalise a user theme YAML into the wire format `ThemeProvider`
|
||||
expects. Returns ``None`` if the theme is unusable.
|
||||
|
||||
Accepts both the full schema (palette/typography/layout) and a loose
|
||||
form with bare hex strings, so hand-written YAMLs stay friendly.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
name = data.get("name")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
return None
|
||||
|
||||
# Palette
|
||||
palette_src = data.get("palette", {}) if isinstance(data.get("palette"), dict) else {}
|
||||
# Allow top-level `colors.background` as a shorthand too.
|
||||
colors_src = data.get("colors", {}) if isinstance(data.get("colors"), dict) else {}
|
||||
|
||||
def _layer(key: str, default_hex: str, default_alpha: float = 1.0) -> Dict[str, Any]:
|
||||
spec = palette_src.get(key, colors_src.get(key))
|
||||
parsed = _parse_theme_layer(spec, default_hex, default_alpha)
|
||||
return parsed if parsed is not None else {"hex": default_hex, "alpha": default_alpha}
|
||||
|
||||
palette = {
|
||||
"background": _layer("background", "#041c1c", 1.0),
|
||||
"midground": _layer("midground", "#ffe6cb", 1.0),
|
||||
"foreground": _layer("foreground", "#ffffff", 0.0),
|
||||
"warmGlow": palette_src.get("warmGlow") or data.get("warmGlow") or "rgba(255, 189, 56, 0.35)",
|
||||
"noiseOpacity": 1.0,
|
||||
}
|
||||
raw_noise = palette_src.get("noiseOpacity", data.get("noiseOpacity"))
|
||||
try:
|
||||
palette["noiseOpacity"] = float(raw_noise) if raw_noise is not None else 1.0
|
||||
except (TypeError, ValueError):
|
||||
palette["noiseOpacity"] = 1.0
|
||||
|
||||
# Typography
|
||||
typo_src = data.get("typography", {}) if isinstance(data.get("typography"), dict) else {}
|
||||
typography = dict(_THEME_DEFAULT_TYPOGRAPHY)
|
||||
for key in ("fontSans", "fontMono", "fontDisplay", "fontUrl", "baseSize", "lineHeight", "letterSpacing"):
|
||||
val = typo_src.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
typography[key] = val
|
||||
|
||||
# Layout
|
||||
layout_src = data.get("layout", {}) if isinstance(data.get("layout"), dict) else {}
|
||||
layout = dict(_THEME_DEFAULT_LAYOUT)
|
||||
radius = layout_src.get("radius")
|
||||
if isinstance(radius, str) and radius.strip():
|
||||
layout["radius"] = radius
|
||||
density = layout_src.get("density")
|
||||
if isinstance(density, str) and density in ("compact", "comfortable", "spacious"):
|
||||
layout["density"] = density
|
||||
|
||||
# Color overrides — keep only valid keys with string values.
|
||||
overrides_src = data.get("colorOverrides", {})
|
||||
color_overrides: Dict[str, str] = {}
|
||||
if isinstance(overrides_src, dict):
|
||||
for key, val in overrides_src.items():
|
||||
if key in _THEME_OVERRIDE_KEYS and isinstance(val, str) and val.strip():
|
||||
color_overrides[key] = val
|
||||
|
||||
# Assets — named slots + arbitrary user-defined keys. Values must be
|
||||
# strings (URLs or CSS ``url(...)``/``linear-gradient(...)`` expressions).
|
||||
# We don't fetch remote assets here; the frontend just injects them as
|
||||
# CSS vars. Empty values are dropped so a theme can explicitly clear a
|
||||
# slot by setting ``hero: ""``.
|
||||
assets_out: Dict[str, Any] = {}
|
||||
assets_src = data.get("assets", {}) if isinstance(data.get("assets"), dict) else {}
|
||||
for key in _THEME_NAMED_ASSET_KEYS:
|
||||
val = assets_src.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
assets_out[key] = val
|
||||
custom_assets_src = assets_src.get("custom")
|
||||
if isinstance(custom_assets_src, dict):
|
||||
custom_assets: Dict[str, str] = {}
|
||||
for key, val in custom_assets_src.items():
|
||||
if (
|
||||
isinstance(key, str)
|
||||
and key.replace("-", "").replace("_", "").isalnum()
|
||||
and isinstance(val, str)
|
||||
and val.strip()
|
||||
):
|
||||
custom_assets[key] = val
|
||||
if custom_assets:
|
||||
assets_out["custom"] = custom_assets
|
||||
|
||||
# Custom CSS — raw CSS text the frontend injects as a scoped <style>
|
||||
# tag on theme apply. Clipped to _THEME_CUSTOM_CSS_MAX to keep the
|
||||
# payload bounded. We intentionally do NOT parse/sanitise the CSS
|
||||
# here — the dashboard is localhost-only and themes are user-authored
|
||||
# YAML in ~/.hermes/, same trust level as the config file itself.
|
||||
custom_css_val = data.get("customCSS")
|
||||
custom_css: Optional[str] = None
|
||||
if isinstance(custom_css_val, str) and custom_css_val.strip():
|
||||
custom_css = custom_css_val[:_THEME_CUSTOM_CSS_MAX]
|
||||
|
||||
# Component style overrides — per-bucket dicts of camelCase CSS
|
||||
# property -> CSS string. The frontend converts these into CSS vars
|
||||
# that shell components (Card, App header, Backdrop) consume.
|
||||
component_styles_src = data.get("componentStyles", {})
|
||||
component_styles: Dict[str, Dict[str, str]] = {}
|
||||
if isinstance(component_styles_src, dict):
|
||||
for bucket, props in component_styles_src.items():
|
||||
if bucket not in _THEME_COMPONENT_BUCKETS or not isinstance(props, dict):
|
||||
continue
|
||||
clean: Dict[str, str] = {}
|
||||
for prop, value in props.items():
|
||||
if (
|
||||
isinstance(prop, str)
|
||||
and prop.replace("-", "").replace("_", "").isalnum()
|
||||
and isinstance(value, (str, int, float))
|
||||
and str(value).strip()
|
||||
):
|
||||
clean[prop] = str(value)
|
||||
if clean:
|
||||
component_styles[bucket] = clean
|
||||
|
||||
layout_variant_src = data.get("layoutVariant")
|
||||
layout_variant = (
|
||||
layout_variant_src
|
||||
if isinstance(layout_variant_src, str) and layout_variant_src in _THEME_LAYOUT_VARIANTS
|
||||
else "standard"
|
||||
)
|
||||
|
||||
result: Dict[str, Any] = {
|
||||
"name": name,
|
||||
"label": data.get("label") or name,
|
||||
"description": data.get("description", ""),
|
||||
"palette": palette,
|
||||
"typography": typography,
|
||||
"layout": layout,
|
||||
"layoutVariant": layout_variant,
|
||||
}
|
||||
if color_overrides:
|
||||
result["colorOverrides"] = color_overrides
|
||||
if assets_out:
|
||||
result["assets"] = assets_out
|
||||
if custom_css is not None:
|
||||
result["customCSS"] = custom_css
|
||||
if component_styles:
|
||||
result["componentStyles"] = component_styles
|
||||
return result
|
||||
|
||||
|
||||
def _discover_user_themes() -> list:
|
||||
"""Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
|
||||
"""Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes.
|
||||
|
||||
Returns a list of fully-normalised theme definitions ready to ship
|
||||
to the frontend, so the client can apply them without a secondary
|
||||
round-trip or a built-in stub.
|
||||
"""
|
||||
themes_dir = get_hermes_home() / "dashboard-themes"
|
||||
if not themes_dir.is_dir():
|
||||
return []
|
||||
@@ -2313,33 +2553,42 @@ def _discover_user_themes() -> list:
|
||||
for f in sorted(themes_dir.glob("*.yaml")):
|
||||
try:
|
||||
data = yaml.safe_load(f.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict) and data.get("name"):
|
||||
result.append({
|
||||
"name": data["name"],
|
||||
"label": data.get("label", data["name"]),
|
||||
"description": data.get("description", ""),
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
normalised = _normalise_theme_definition(data)
|
||||
if normalised is not None:
|
||||
result.append(normalised)
|
||||
return result
|
||||
|
||||
|
||||
@app.get("/api/dashboard/themes")
|
||||
async def get_dashboard_themes():
|
||||
"""Return available themes and the currently active one."""
|
||||
"""Return available themes and the currently active one.
|
||||
|
||||
Built-in entries ship name/label/description only (the frontend owns
|
||||
their full definitions in `web/src/themes/presets.ts`). User themes
|
||||
from `~/.hermes/dashboard-themes/*.yaml` ship with their full
|
||||
normalised definition under `definition`, so the client can apply
|
||||
them without a stub.
|
||||
"""
|
||||
config = load_config()
|
||||
active = config.get("dashboard", {}).get("theme", "default")
|
||||
user_themes = _discover_user_themes()
|
||||
# Merge built-in + user, user themes override built-in by name.
|
||||
seen = set()
|
||||
themes = []
|
||||
for t in _BUILTIN_DASHBOARD_THEMES:
|
||||
seen.add(t["name"])
|
||||
themes.append(t)
|
||||
for t in user_themes:
|
||||
if t["name"] not in seen:
|
||||
themes.append(t)
|
||||
seen.add(t["name"])
|
||||
if t["name"] in seen:
|
||||
continue
|
||||
themes.append({
|
||||
"name": t["name"],
|
||||
"label": t["label"],
|
||||
"description": t["description"],
|
||||
"definition": t,
|
||||
})
|
||||
seen.add(t["name"])
|
||||
return {"themes": themes, "active": active}
|
||||
|
||||
|
||||
@@ -2396,13 +2645,35 @@ def _discover_dashboard_plugins() -> list:
|
||||
if name in seen_names:
|
||||
continue
|
||||
seen_names.add(name)
|
||||
# Tab options: ``path`` + ``position`` for a new tab, optional
|
||||
# ``override`` to replace a built-in route, and ``hidden`` to
|
||||
# register the plugin component/slots without adding a tab
|
||||
# (useful for slot-only plugins like a header-crest injector).
|
||||
raw_tab = data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
|
||||
tab_info = {
|
||||
"path": raw_tab.get("path", f"/{name}"),
|
||||
"position": raw_tab.get("position", "end"),
|
||||
}
|
||||
override_path = raw_tab.get("override")
|
||||
if isinstance(override_path, str) and override_path.startswith("/"):
|
||||
tab_info["override"] = override_path
|
||||
if bool(raw_tab.get("hidden")):
|
||||
tab_info["hidden"] = True
|
||||
# Slots: list of named slot locations this plugin populates.
|
||||
# The frontend exposes ``registerSlot(pluginName, slotName, Component)``
|
||||
# on window; plugins with non-empty slots call it from their JS bundle.
|
||||
slots_src = data.get("slots")
|
||||
slots: List[str] = []
|
||||
if isinstance(slots_src, list):
|
||||
slots = [s for s in slots_src if isinstance(s, str) and s]
|
||||
plugins.append({
|
||||
"name": name,
|
||||
"label": data.get("label", name),
|
||||
"description": data.get("description", ""),
|
||||
"icon": data.get("icon", "Puzzle"),
|
||||
"version": data.get("version", "0.0.0"),
|
||||
"tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
|
||||
"tab": tab_info,
|
||||
"slots": slots,
|
||||
"entry": data.get("entry", "dist/index.js"),
|
||||
"css": data.get("css"),
|
||||
"has_api": bool(data.get("api")),
|
||||
|
||||
+1
-1
@@ -142,7 +142,7 @@ class _ComponentFilter(logging.Filter):
|
||||
# Used by _ComponentFilter and exposed for ``hermes logs --component``.
|
||||
COMPONENT_PREFIXES = {
|
||||
"gateway": ("gateway",),
|
||||
"agent": ("agent", "run_agent", "model_tools", "scripts.batch_runner"),
|
||||
"agent": ("agent", "run_agent", "model_tools", "batch_runner"),
|
||||
"tools": ("tools",),
|
||||
"cli": ("hermes_cli", "cli"),
|
||||
"cron": ("cron",),
|
||||
|
||||
@@ -26,13 +26,10 @@ Usage:
|
||||
python mini_swe_runner.py --prompts_file prompts.jsonl --output_file trajectories.jsonl --env docker
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
+52
-23
@@ -288,30 +288,34 @@ def get_tool_definitions(
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
||||
break
|
||||
|
||||
# Rebuild discord_server schema based on the bot's privileged intents
|
||||
# (detected from GET /applications/@me) and the user's action allowlist
|
||||
# in config. Hides actions the bot's intents don't support so the
|
||||
# model never attempts them, and annotates fetch_messages when the
|
||||
# Rebuild discord / discord_admin schemas based on the bot's privileged
|
||||
# intents (detected from GET /applications/@me) and the user's action
|
||||
# allowlist in config. Hides actions the bot's intents don't support so
|
||||
# the model never attempts them, and annotates fetch_messages when the
|
||||
# MESSAGE_CONTENT intent is missing.
|
||||
if "discord_server" in available_tool_names:
|
||||
try:
|
||||
from tools.discord_tool import get_dynamic_schema
|
||||
dynamic = get_dynamic_schema()
|
||||
except Exception: # pragma: no cover — defensive, fall back to static
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
# Tool filtered out entirely (empty allowlist or detection disabled
|
||||
# the only remaining actions). Drop it from the schema list.
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != "discord_server"
|
||||
]
|
||||
available_tool_names.discard("discord_server")
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "discord_server":
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
_discord_schema_fns = {
|
||||
"discord": "get_dynamic_schema_core",
|
||||
"discord_admin": "get_dynamic_schema_admin",
|
||||
}
|
||||
for discord_tool_name in _discord_schema_fns:
|
||||
if discord_tool_name in available_tool_names:
|
||||
try:
|
||||
from tools import discord_tool as _dt
|
||||
schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
|
||||
dynamic = schema_fn()
|
||||
except Exception:
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != discord_tool_name
|
||||
]
|
||||
available_tool_names.discard(discord_tool_name)
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == discord_tool_name:
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
|
||||
# Strip web tool cross-references from browser_navigate description when
|
||||
# web_search / web_extract are not available. The static schema says
|
||||
@@ -418,6 +422,31 @@ def _coerce_value(value: str, expected_type):
|
||||
return _coerce_number(value, integer_only=(expected_type == "integer"))
|
||||
if expected_type == "boolean":
|
||||
return _coerce_boolean(value)
|
||||
if expected_type == "array":
|
||||
return _coerce_json(value, list)
|
||||
if expected_type == "object":
|
||||
return _coerce_json(value, dict)
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_json(value: str, expected_python_type: type):
|
||||
"""Parse *value* as JSON when the schema expects an array or object.
|
||||
|
||||
Handles model output drift where a complex oneOf/discriminated-union schema
|
||||
causes the LLM to emit the array/object as a JSON string instead of a native
|
||||
structure. Returns the original string if parsing fails or yields the wrong
|
||||
Python type.
|
||||
"""
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
except (ValueError, TypeError):
|
||||
return value
|
||||
if isinstance(parsed, expected_python_type):
|
||||
logger.debug(
|
||||
"coerce_tool_args: coerced string to %s via json.loads",
|
||||
expected_python_type.__name__,
|
||||
)
|
||||
return parsed
|
||||
return value
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,313 @@
|
||||
"""xAI image generation backend.
|
||||
|
||||
Exposes xAI's ``grok-imagine-image`` model as an
|
||||
:class:`ImageGenProvider` implementation.
|
||||
|
||||
Features:
|
||||
- Text-to-image generation
|
||||
- Multiple aspect ratios (1:1, 16:9, 9:16, etc.)
|
||||
- Multiple resolutions (1K, 2K)
|
||||
- Base64 output saved to cache
|
||||
|
||||
Selection precedence (first hit wins):
|
||||
1. ``XAI_IMAGE_MODEL`` env var
|
||||
2. ``image_gen.xai.model`` in ``config.yaml``
|
||||
3. :data:`DEFAULT_MODEL`
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from agent.image_gen_provider import (
|
||||
DEFAULT_ASPECT_RATIO,
|
||||
ImageGenProvider,
|
||||
error_response,
|
||||
resolve_aspect_ratio,
|
||||
save_b64_image,
|
||||
success_response,
|
||||
)
|
||||
from tools.xai_http import hermes_xai_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model catalog
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
API_MODEL = "grok-imagine-image"
|
||||
|
||||
_MODELS: Dict[str, Dict[str, Any]] = {
|
||||
"grok-imagine-image": {
|
||||
"display": "Grok Imagine Image",
|
||||
"speed": "~5-10s",
|
||||
"strengths": "Fast, high-quality",
|
||||
},
|
||||
}
|
||||
|
||||
DEFAULT_MODEL = "grok-imagine-image"
|
||||
|
||||
# xAI aspect ratios (more options than FAL/OpenAI)
|
||||
_XAI_ASPECT_RATIOS = {
|
||||
"landscape": "16:9",
|
||||
"square": "1:1",
|
||||
"portrait": "9:16",
|
||||
"4:3": "4:3",
|
||||
"3:4": "3:4",
|
||||
"3:2": "3:2",
|
||||
"2:3": "2:3",
|
||||
}
|
||||
|
||||
# xAI resolutions
|
||||
_XAI_RESOLUTIONS = {
|
||||
"1k": "1024",
|
||||
"2k": "2048",
|
||||
}
|
||||
|
||||
DEFAULT_RESOLUTION = "1k"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_xai_config() -> Dict[str, Any]:
|
||||
"""Read ``image_gen.xai`` from config.yaml."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
|
||||
xai_section = section.get("xai") if isinstance(section, dict) else None
|
||||
return xai_section if isinstance(xai_section, dict) else {}
|
||||
except Exception as exc:
|
||||
logger.debug("Could not load image_gen.xai config: %s", exc)
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_model() -> Tuple[str, Dict[str, Any]]:
|
||||
"""Decide which model to use and return ``(model_id, meta)``."""
|
||||
env_override = os.environ.get("XAI_IMAGE_MODEL")
|
||||
if env_override and env_override in _MODELS:
|
||||
return env_override, _MODELS[env_override]
|
||||
|
||||
cfg = _load_xai_config()
|
||||
candidate = cfg.get("model") if isinstance(cfg.get("model"), str) else None
|
||||
if candidate and candidate in _MODELS:
|
||||
return candidate, _MODELS[candidate]
|
||||
|
||||
return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
|
||||
|
||||
|
||||
def _resolve_resolution() -> str:
|
||||
"""Get configured resolution."""
|
||||
cfg = _load_xai_config()
|
||||
res = cfg.get("resolution") if isinstance(cfg.get("resolution"), str) else None
|
||||
if res and res in _XAI_RESOLUTIONS:
|
||||
return res
|
||||
return DEFAULT_RESOLUTION
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class XAIImageGenProvider(ImageGenProvider):
|
||||
"""xAI ``grok-imagine-image`` backend."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "xai"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "xAI (Grok)"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(os.getenv("XAI_API_KEY"))
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"id": model_id,
|
||||
"display": meta.get("display", model_id),
|
||||
"speed": meta.get("speed", ""),
|
||||
"strengths": meta.get("strengths", ""),
|
||||
}
|
||||
for model_id, meta in _MODELS.items()
|
||||
]
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "xAI (Grok)",
|
||||
"badge": "paid",
|
||||
"tag": "Native xAI image generation via grok-imagine-image",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "XAI_API_KEY",
|
||||
"prompt": "xAI API key",
|
||||
"url": "https://console.x.ai/",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image using xAI's grok-imagine-image."""
|
||||
api_key = os.getenv("XAI_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
return error_response(
|
||||
error="XAI_API_KEY not set. Get one at https://console.x.ai/",
|
||||
error_type="missing_api_key",
|
||||
provider="xai",
|
||||
aspect_ratio=aspect_ratio,
|
||||
)
|
||||
|
||||
model_id, meta = _resolve_model()
|
||||
aspect = resolve_aspect_ratio(aspect_ratio)
|
||||
xai_ar = _XAI_ASPECT_RATIOS.get(aspect, "1:1")
|
||||
resolution = _resolve_resolution()
|
||||
xai_res = _XAI_RESOLUTIONS.get(resolution, "1024")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": API_MODEL,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": xai_ar,
|
||||
"resolution": xai_res,
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": hermes_xai_user_agent(),
|
||||
}
|
||||
|
||||
base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{base_url}/images/generations",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=120,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.HTTPError as exc:
|
||||
status = exc.response.status_code if exc.response else 0
|
||||
try:
|
||||
err_msg = exc.response.json().get("error", {}).get("message", exc.response.text[:300])
|
||||
except Exception:
|
||||
err_msg = exc.response.text[:300] if exc.response else str(exc)
|
||||
logger.error("xAI image gen failed (%d): %s", status, err_msg)
|
||||
return error_response(
|
||||
error=f"xAI image generation failed ({status}): {err_msg}",
|
||||
error_type="api_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.Timeout:
|
||||
return error_response(
|
||||
error="xAI image generation timed out (120s)",
|
||||
error_type="timeout",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.ConnectionError as exc:
|
||||
return error_response(
|
||||
error=f"xAI connection error: {exc}",
|
||||
error_type="connection_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
try:
|
||||
result = response.json()
|
||||
except Exception as exc:
|
||||
return error_response(
|
||||
error=f"xAI returned invalid JSON: {exc}",
|
||||
error_type="invalid_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
# Parse response — xAI returns data[0].b64_json or data[0].url
|
||||
data = result.get("data", [])
|
||||
if not data:
|
||||
return error_response(
|
||||
error="xAI returned no image data",
|
||||
error_type="empty_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
first = data[0]
|
||||
b64 = first.get("b64_json")
|
||||
url = first.get("url")
|
||||
|
||||
if b64:
|
||||
try:
|
||||
saved_path = save_b64_image(b64, prefix=f"xai_{model_id}")
|
||||
except Exception as exc:
|
||||
return error_response(
|
||||
error=f"Could not save image to cache: {exc}",
|
||||
error_type="io_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
image_ref = str(saved_path)
|
||||
elif url:
|
||||
image_ref = url
|
||||
else:
|
||||
return error_response(
|
||||
error="xAI response contained neither b64_json nor URL",
|
||||
error_type="empty_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
extra: Dict[str, Any] = {
|
||||
"resolution": xai_res,
|
||||
}
|
||||
|
||||
return success_response(
|
||||
image=image_ref,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
provider="xai",
|
||||
extra=extra,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def register(ctx: Any) -> None:
|
||||
"""Register this provider with the image gen registry."""
|
||||
ctx.register_image_gen_provider(XAIImageGenProvider())
|
||||
@@ -0,0 +1,7 @@
|
||||
name: xai
|
||||
version: 1.0.0
|
||||
description: "xAI image generation backend (grok-imagine-image). Text-to-image."
|
||||
author: Julien Talbot
|
||||
kind: backend
|
||||
requires_env:
|
||||
- XAI_API_KEY
|
||||
@@ -0,0 +1,70 @@
|
||||
# Strike Freedom Cockpit — dashboard skin demo
|
||||
|
||||
Demonstrates how the dashboard skin+plugin system can be used to build a
|
||||
fully custom cockpit-style reskin without touching the core dashboard.
|
||||
|
||||
Two pieces:
|
||||
|
||||
- `theme/strike-freedom.yaml` — a dashboard theme YAML that paints the
|
||||
palette, typography, layout variant (`cockpit`), component chrome
|
||||
(notched card corners, scanlines, accent colors), and declares asset
|
||||
slots (`hero`, `crest`, `bg`).
|
||||
- `dashboard/` — a plugin that populates the `sidebar`, `header-left`,
|
||||
and `footer-right` slots reserved by the cockpit layout. The sidebar
|
||||
renders an MS-STATUS panel with segmented telemetry bars driven by
|
||||
real agent status; the header-left injects a COMPASS crest; the
|
||||
footer-right replaces the default org tagline.
|
||||
|
||||
## Install
|
||||
|
||||
1. **Theme** — copy the theme YAML into your Hermes home:
|
||||
|
||||
```
|
||||
cp theme/strike-freedom.yaml ~/.hermes/dashboard-themes/
|
||||
```
|
||||
|
||||
2. **Plugin** — the `dashboard/` directory gets auto-discovered because
|
||||
it lives under `plugins/` in the repo. On a user install, copy the
|
||||
whole plugin directory into `~/.hermes/plugins/`:
|
||||
|
||||
```
|
||||
cp -r . ~/.hermes/plugins/strike-freedom-cockpit
|
||||
```
|
||||
|
||||
3. Restart the web UI (or `GET /api/dashboard/plugins/rescan`), open it,
|
||||
pick **Strike Freedom** from the theme switcher.
|
||||
|
||||
## Customising the artwork
|
||||
|
||||
The sidebar plugin reads `--theme-asset-hero` and `--theme-asset-crest`
|
||||
from the active theme. Drop your own URLs into the theme YAML:
|
||||
|
||||
```yaml
|
||||
assets:
|
||||
hero: "/my-images/strike-freedom.png"
|
||||
crest: "/my-images/compass-crest.svg"
|
||||
bg: "/my-images/cosmic-era-bg.jpg"
|
||||
```
|
||||
|
||||
The plugin reads those at render time — no plugin code changes needed
|
||||
to swap artwork across themes.
|
||||
|
||||
## What this demo proves
|
||||
|
||||
The dashboard skin+plugin system supports (ref: `web/src/themes/types.ts`,
|
||||
`web/src/plugins/slots.ts`):
|
||||
|
||||
- Palette, typography, font URLs, density, radius — already present
|
||||
- **Asset URLs exposed as CSS vars** (bg / hero / crest / logo /
|
||||
sidebar / header + arbitrary `custom.*`)
|
||||
- **Raw `customCSS` blocks** injected as scoped `<style>` tags
|
||||
- **Per-component style overrides** (card / header / sidebar / backdrop /
|
||||
tab / progress / footer / badge / page) via CSS vars
|
||||
- **`layoutVariant`** — `standard`, `cockpit`, or `tiled`
|
||||
- **Plugin slots** — 10 named shell slots plugins can inject into
|
||||
(`backdrop`, `header-left/right/banner`, `sidebar`, `pre-main`,
|
||||
`post-main`, `footer-left/right`, `overlay`)
|
||||
- **Route overrides** — plugins can replace a built-in page entirely
|
||||
(`tab.override: "/"`) instead of just adding a tab
|
||||
- **Hidden plugins** — slot-only plugins that never show in the nav
|
||||
(`tab.hidden: true`) — as used here
|
||||
@@ -0,0 +1,309 @@
|
||||
/**
|
||||
* Strike Freedom Cockpit — dashboard plugin demo.
|
||||
*
|
||||
* A slot-only plugin (manifest sets tab.hidden: true) that populates
|
||||
* three shell slots when the user has the ``strike-freedom`` theme
|
||||
* selected (or any theme that picks layoutVariant: cockpit):
|
||||
*
|
||||
* - sidebar → MS-STATUS panel: ENERGY / SHIELD / POWER bars,
|
||||
* ZGMF-X20A identity line, pilot block, hero
|
||||
* render (from --theme-asset-hero when the theme
|
||||
* provides one).
|
||||
* - header-left → COMPASS faction crest (uses --theme-asset-crest
|
||||
* if provided, falls back to a geometric SVG).
|
||||
* - footer-right → COSMIC ERA tagline that replaces the default
|
||||
* footer org line.
|
||||
*
|
||||
* The plugin demonstrates every extension point added alongside the
|
||||
* slot system: registerSlot, tab.hidden, reading theme asset CSS vars
|
||||
* from plugin code, and rendering above the built-in route content.
|
||||
*/
|
||||
(function () {
|
||||
"use strict";
|
||||
|
||||
const SDK = window.__HERMES_PLUGIN_SDK__;
|
||||
const PLUGINS = window.__HERMES_PLUGINS__;
|
||||
if (!SDK || !PLUGINS || !PLUGINS.registerSlot) {
|
||||
// Old dashboard bundle without slot support — bail silently rather
|
||||
// than breaking the page.
|
||||
return;
|
||||
}
|
||||
|
||||
const { React } = SDK;
|
||||
const { useState, useEffect } = SDK.hooks;
|
||||
const { api } = SDK;
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
/** Read a CSS custom property from :root. Empty string when unset. */
|
||||
function cssVar(name) {
|
||||
if (typeof document === "undefined") return "";
|
||||
return getComputedStyle(document.documentElement).getPropertyValue(name).trim();
|
||||
}
|
||||
|
||||
/** Segmented chip progress bar — 10 cells filled proportionally to value. */
|
||||
function TelemetryBar(props) {
|
||||
const { label, value, color } = props;
|
||||
const cells = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const filled = Math.round(value / 10) > i;
|
||||
cells.push(
|
||||
React.createElement("span", {
|
||||
key: i,
|
||||
style: {
|
||||
flex: 1,
|
||||
height: 8,
|
||||
background: filled ? color : "rgba(255,255,255,0.06)",
|
||||
transition: "background 200ms",
|
||||
clipPath: "polygon(2px 0, 100% 0, calc(100% - 2px) 100%, 0 100%)",
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
return React.createElement(
|
||||
"div",
|
||||
{ style: { display: "flex", flexDirection: "column", gap: 4 } },
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
display: "flex",
|
||||
justifyContent: "space-between",
|
||||
fontSize: "0.65rem",
|
||||
letterSpacing: "0.12em",
|
||||
opacity: 0.75,
|
||||
},
|
||||
},
|
||||
React.createElement("span", null, label),
|
||||
React.createElement("span", { style: { color, fontWeight: 700 } }, value + "%"),
|
||||
),
|
||||
React.createElement(
|
||||
"div",
|
||||
{ style: { display: "flex", gap: 2 } },
|
||||
cells,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Sidebar: MS-STATUS panel
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function SidebarSlot() {
|
||||
// Pull live-ish numbers from the status API so the plugin isn't just
|
||||
// a static decoration. Fall back to full bars if the API is slow /
|
||||
// unavailable.
|
||||
const [status, setStatus] = useState(null);
|
||||
useEffect(function () {
|
||||
let cancel = false;
|
||||
api.getStatus()
|
||||
.then(function (s) { if (!cancel) setStatus(s); })
|
||||
.catch(function () {});
|
||||
return function () { cancel = true; };
|
||||
}, []);
|
||||
|
||||
// Map real status signals to HUD telemetry. Energy/shield/power
|
||||
// aren't literal concepts on a software agent, so we read them from
|
||||
// adjacent signals: active sessions, gateway connected-platforms,
|
||||
// and agent-online health.
|
||||
const energy = status && status.gateway_online ? 92 : 18;
|
||||
const shield = status && status.connected_platforms
|
||||
? Math.min(100, 40 + (status.connected_platforms.length * 15))
|
||||
: 70;
|
||||
const power = status && status.active_sessions
|
||||
? Math.min(100, 55 + (status.active_sessions.length * 10))
|
||||
: 87;
|
||||
|
||||
const hero = cssVar("--theme-asset-hero");
|
||||
|
||||
return React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
padding: "1rem 0.75rem",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: "1rem",
|
||||
fontFamily: "var(--theme-font-display, sans-serif)",
|
||||
letterSpacing: "0.08em",
|
||||
textTransform: "uppercase",
|
||||
fontSize: "0.65rem",
|
||||
},
|
||||
},
|
||||
// Header line
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
borderBottom: "1px solid rgba(64,200,255,0.3)",
|
||||
paddingBottom: 8,
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: 2,
|
||||
},
|
||||
},
|
||||
React.createElement("span", { style: { opacity: 0.6 } }, "ms status"),
|
||||
React.createElement("span", { style: { fontWeight: 700, fontSize: "0.85rem" } }, "zgmf-x20a"),
|
||||
React.createElement("span", { style: { opacity: 0.6, fontSize: "0.6rem" } }, "strike freedom"),
|
||||
),
|
||||
// Hero slot — only renders when the theme provides one.
|
||||
hero
|
||||
? React.createElement("div", {
|
||||
style: {
|
||||
width: "100%",
|
||||
aspectRatio: "3 / 4",
|
||||
backgroundImage: hero,
|
||||
backgroundSize: "contain",
|
||||
backgroundPosition: "center",
|
||||
backgroundRepeat: "no-repeat",
|
||||
opacity: 0.85,
|
||||
},
|
||||
"aria-hidden": true,
|
||||
})
|
||||
: React.createElement("div", {
|
||||
style: {
|
||||
width: "100%",
|
||||
aspectRatio: "3 / 4",
|
||||
border: "1px dashed rgba(64,200,255,0.25)",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
fontSize: "0.55rem",
|
||||
opacity: 0.4,
|
||||
},
|
||||
}, "hero slot — set assets.hero in theme"),
|
||||
// Pilot block
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
borderTop: "1px solid rgba(64,200,255,0.18)",
|
||||
borderBottom: "1px solid rgba(64,200,255,0.18)",
|
||||
padding: "8px 0",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: 2,
|
||||
},
|
||||
},
|
||||
React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "pilot"),
|
||||
React.createElement("span", { style: { fontWeight: 700 } }, "hermes agent"),
|
||||
React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "compass"),
|
||||
),
|
||||
// Telemetry bars
|
||||
React.createElement(TelemetryBar, { label: "energy", value: energy, color: "#ffce3a" }),
|
||||
React.createElement(TelemetryBar, { label: "shield", value: shield, color: "#3fd3ff" }),
|
||||
React.createElement(TelemetryBar, { label: "power", value: power, color: "#ff3a5e" }),
|
||||
// System online
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
marginTop: 4,
|
||||
padding: "6px 8px",
|
||||
border: "1px solid rgba(74,222,128,0.4)",
|
||||
color: "#4ade80",
|
||||
textAlign: "center",
|
||||
fontWeight: 700,
|
||||
fontSize: "0.6rem",
|
||||
},
|
||||
},
|
||||
status && status.gateway_online ? "system online" : "system offline",
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Header-left: COMPASS crest
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function HeaderCrestSlot() {
|
||||
const crest = cssVar("--theme-asset-crest");
|
||||
const inner = crest
|
||||
? React.createElement("div", {
|
||||
style: {
|
||||
width: 28,
|
||||
height: 28,
|
||||
backgroundImage: crest,
|
||||
backgroundSize: "contain",
|
||||
backgroundPosition: "center",
|
||||
backgroundRepeat: "no-repeat",
|
||||
},
|
||||
"aria-hidden": true,
|
||||
})
|
||||
: React.createElement(
|
||||
"svg",
|
||||
{
|
||||
width: 28,
|
||||
height: 28,
|
||||
viewBox: "0 0 28 28",
|
||||
fill: "none",
|
||||
stroke: "currentColor",
|
||||
strokeWidth: 1.5,
|
||||
"aria-hidden": true,
|
||||
},
|
||||
React.createElement("path", { d: "M14 2 L26 14 L14 26 L2 14 Z" }),
|
||||
React.createElement("path", { d: "M14 8 L20 14 L14 20 L8 14 Z" }),
|
||||
React.createElement("circle", { cx: 14, cy: 14, r: 2, fill: "currentColor" }),
|
||||
);
|
||||
return React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
paddingLeft: 12,
|
||||
paddingRight: 8,
|
||||
color: "var(--color-accent, #3fd3ff)",
|
||||
},
|
||||
},
|
||||
inner,
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Footer-right: COSMIC ERA tagline
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function FooterTaglineSlot() {
|
||||
return React.createElement(
|
||||
"span",
|
||||
{
|
||||
style: {
|
||||
fontFamily: "var(--theme-font-display, sans-serif)",
|
||||
fontSize: "0.6rem",
|
||||
letterSpacing: "0.18em",
|
||||
textTransform: "uppercase",
|
||||
opacity: 0.75,
|
||||
mixBlendMode: "plus-lighter",
|
||||
},
|
||||
},
|
||||
"compass hermes systems / cosmic era 71",
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Hidden tab placeholder — tab.hidden=true means this never renders in
|
||||
// the nav, but we still register something sensible in case someone
|
||||
// manually navigates to /strike-freedom-cockpit (e.g. via a bookmark).
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function HiddenPage() {
|
||||
return React.createElement(
|
||||
"div",
|
||||
{ style: { padding: "2rem", opacity: 0.6, fontSize: "0.8rem" } },
|
||||
"Strike Freedom cockpit is a slot-only plugin — it populates the sidebar, header, and footer instead of showing a tab page.",
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Registration
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const NAME = "strike-freedom-cockpit";
|
||||
PLUGINS.register(NAME, HiddenPage);
|
||||
PLUGINS.registerSlot(NAME, "sidebar", SidebarSlot);
|
||||
PLUGINS.registerSlot(NAME, "header-left", HeaderCrestSlot);
|
||||
PLUGINS.registerSlot(NAME, "footer-right", FooterTaglineSlot);
|
||||
})();
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "strike-freedom-cockpit",
|
||||
"label": "Strike Freedom Cockpit",
|
||||
"description": "MS-STATUS sidebar + header crest for the Strike Freedom theme",
|
||||
"icon": "Shield",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/strike-freedom-cockpit",
|
||||
"position": "end",
|
||||
"hidden": true
|
||||
},
|
||||
"slots": ["sidebar", "header-left", "footer-right"],
|
||||
"entry": "dist/index.js"
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
# Strike Freedom — Hermes dashboard theme demo
|
||||
#
|
||||
# Copy this file to ~/.hermes/dashboard-themes/strike-freedom.yaml and
|
||||
# restart the web UI (or hit `/api/dashboard/plugins/rescan`). Pair with
|
||||
# the `strike-freedom-cockpit` plugin (plugins/strike-freedom-cockpit/)
|
||||
# for the full cockpit experience — this theme paints the palette,
|
||||
# chrome, and layout; the plugin supplies the MS-STATUS sidebar + header
|
||||
# crest that the cockpit layout variant reserves space for.
|
||||
#
|
||||
# Demonstrates every theme extension point added alongside the plugin
|
||||
# slot system: palette, typography, layoutVariant, assets, customCSS,
|
||||
# componentStyles, colorOverrides.
|
||||
name: strike-freedom
|
||||
label: "Strike Freedom"
|
||||
description: "Cockpit HUD — deep navy + cyan + gold accents"
|
||||
|
||||
# ------- palette (3-layer) -------
|
||||
palette:
|
||||
background: "#05091a"
|
||||
midground: "#d8f0ff"
|
||||
foreground:
|
||||
hex: "#ffffff"
|
||||
alpha: 0
|
||||
warmGlow: "rgba(255, 199, 55, 0.24)"
|
||||
noiseOpacity: 0.7
|
||||
|
||||
# ------- typography -------
|
||||
typography:
|
||||
fontSans: '"Orbitron", "Eurostile", "Bank Gothic", "Impact", sans-serif'
|
||||
fontMono: '"Share Tech Mono", "JetBrains Mono", ui-monospace, monospace'
|
||||
fontDisplay: '"Orbitron", "Eurostile", "Impact", sans-serif'
|
||||
fontUrl: "https://fonts.googleapis.com/css2?family=Orbitron:wght@400;500;600;700;800&family=Share+Tech+Mono&display=swap"
|
||||
baseSize: "14px"
|
||||
lineHeight: "1.5"
|
||||
letterSpacing: "0.04em"
|
||||
|
||||
# ------- layout -------
|
||||
layout:
|
||||
radius: "0"
|
||||
density: "compact"
|
||||
|
||||
# ``cockpit`` reserves a 260px left rail that the shell renders when the
|
||||
# user is on this theme. A paired plugin populates the rail via the
|
||||
# ``sidebar`` slot; with no plugin the rail shows a placeholder.
|
||||
layoutVariant: cockpit
|
||||
|
||||
# ------- assets -------
|
||||
# Use any URL (https, data:, /dashboard-plugins/...) or a pre-wrapped
|
||||
# ``url(...)``/``linear-gradient(...)`` expression. The shell exposes
|
||||
# each as a CSS var so plugins can read the same imagery.
|
||||
assets:
|
||||
bg: "linear-gradient(140deg, #05091a 0%, #0a1530 55%, #102048 100%)"
|
||||
# Plugin reads --theme-asset-hero / --theme-asset-crest to populate
|
||||
# its sidebar hero render + header crest. Replace these URLs with your
|
||||
# own artwork (copy files into ~/.hermes/dashboard-themes/assets/ and
|
||||
# reference them as /dashboard-themes-assets/strike-freedom/hero.png
|
||||
# once that static route is wired up — for now use inline data URLs or
|
||||
# remote URLs).
|
||||
hero: ""
|
||||
crest: ""
|
||||
|
||||
# ------- component chrome -------
|
||||
# Each bucket's props become CSS vars (--component-<bucket>-<kebab>) that
|
||||
# built-in shell components (Card, header, sidebar, backdrop) consume.
|
||||
componentStyles:
|
||||
card:
|
||||
# Notched corners on the top-left + bottom-right — classic mecha UI.
|
||||
clipPath: "polygon(12px 0, 100% 0, 100% calc(100% - 12px), calc(100% - 12px) 100%, 0 100%, 0 12px)"
|
||||
background: "linear-gradient(180deg, rgba(10, 22, 52, 0.85) 0%, rgba(5, 9, 26, 0.92) 100%)"
|
||||
boxShadow: "inset 0 0 0 1px rgba(64, 200, 255, 0.28), 0 0 18px -6px rgba(64, 200, 255, 0.4)"
|
||||
header:
|
||||
background: "linear-gradient(180deg, rgba(16, 32, 72, 0.95) 0%, rgba(5, 9, 26, 0.9) 100%)"
|
||||
sidebar:
|
||||
background: "linear-gradient(180deg, rgba(8, 18, 42, 0.88) 0%, rgba(5, 9, 26, 0.85) 100%)"
|
||||
tab:
|
||||
clipPath: "polygon(6px 0, 100% 0, calc(100% - 6px) 100%, 0 100%)"
|
||||
backdrop:
|
||||
backgroundSize: "cover"
|
||||
backgroundPosition: "center"
|
||||
fillerOpacity: "1"
|
||||
fillerBlendMode: "normal"
|
||||
|
||||
# ------- color overrides -------
|
||||
colorOverrides:
|
||||
primary: "#ffce3a"
|
||||
primaryForeground: "#05091a"
|
||||
accent: "#3fd3ff"
|
||||
accentForeground: "#05091a"
|
||||
ring: "#3fd3ff"
|
||||
success: "#4ade80"
|
||||
warning: "#ffce3a"
|
||||
destructive: "#ff3a5e"
|
||||
border: "rgba(64, 200, 255, 0.28)"
|
||||
|
||||
# ------- customCSS -------
|
||||
# Raw CSS injected as a scoped <style> tag on theme apply. Use this for
|
||||
# selector-level tweaks componentStyles can't express (pseudo-elements,
|
||||
# animations, media queries). Bounded to 32 KiB per theme.
|
||||
customCSS: |
|
||||
/* Scanline overlay — subtle, only when theme is active. */
|
||||
:root[data-layout-variant="cockpit"] body::before {
|
||||
content: "";
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
pointer-events: none;
|
||||
z-index: 100;
|
||||
background: repeating-linear-gradient(
|
||||
to bottom,
|
||||
transparent 0px,
|
||||
transparent 2px,
|
||||
rgba(64, 200, 255, 0.035) 3px,
|
||||
rgba(64, 200, 255, 0.035) 4px
|
||||
);
|
||||
mix-blend-mode: screen;
|
||||
}
|
||||
|
||||
/* Chevron pips on card corners. */
|
||||
[data-layout-variant="cockpit"] .border-border::before,
|
||||
[data-layout-variant="cockpit"] .border-border::after {
|
||||
content: "";
|
||||
position: absolute;
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border: 1px solid rgba(64, 200, 255, 0.55);
|
||||
pointer-events: none;
|
||||
}
|
||||
+6
-8
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "hermes-agent"
|
||||
version = "0.10.0"
|
||||
version = "0.11.0"
|
||||
description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
@@ -40,11 +40,11 @@ dependencies = [
|
||||
modal = ["modal>=1.0.0,<2"]
|
||||
daytona = ["daytona>=0.148.0,<1"]
|
||||
dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
|
||||
messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8", "mutagen>=1.45,<2", "aiohttp-socks>=0.9,<1"]
|
||||
messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
|
||||
cron = ["croniter>=6.0.0,<7"]
|
||||
slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
|
||||
matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29"]
|
||||
cli = ["simple-term-menu>=1.0,<2", "tiktoken>=0.7,<1", "Pillow>=10,<12"]
|
||||
cli = ["simple-term-menu>=1.0,<2"]
|
||||
tts-premium = ["elevenlabs>=1.0,<2"]
|
||||
voice = [
|
||||
# Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime),
|
||||
@@ -58,7 +58,7 @@ pty = [
|
||||
"pywinpty>=2.0.0,<3; sys_platform == 'win32'",
|
||||
]
|
||||
honcho = ["honcho-ai>=2.0.1,<3"]
|
||||
mcp = ["mcp>=1.2.0,<2", "psutil>=5.9,<7"]
|
||||
mcp = ["mcp>=1.2.0,<2"]
|
||||
homeassistant = ["aiohttp>=3.9.0,<4"]
|
||||
sms = ["aiohttp>=3.9.0,<4"]
|
||||
acp = ["agent-client-protocol>=0.9.0,<1.0"]
|
||||
@@ -85,9 +85,7 @@ rl = [
|
||||
"fastapi>=0.104.0,<1",
|
||||
"uvicorn[standard]>=0.24.0,<1",
|
||||
"wandb>=0.15.0,<1",
|
||||
"datasets>=2.14,<3",
|
||||
]
|
||||
tts-local = ["neutts[all]", "soundfile>=0.12,<1"]
|
||||
yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
|
||||
all = [
|
||||
"hermes-agent[modal]",
|
||||
@@ -122,13 +120,13 @@ hermes-agent = "run_agent:main"
|
||||
hermes-acp = "acp_adapter.entry:main"
|
||||
|
||||
[tool.setuptools]
|
||||
py-modules = ["run_agent", "model_tools", "toolsets", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]
|
||||
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
hermes_cli = ["web_dist/**/*"]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "scripts"]
|
||||
include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
||||
@@ -19,23 +19,18 @@ Environment Variables:
|
||||
OPENROUTER_API_KEY: API key for OpenRouter (required for agent)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
import fire
|
||||
import yaml
|
||||
|
||||
from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
|
||||
|
||||
# Load .env from ~/.hermes/.env first, then project root as dev fallback.
|
||||
# User-managed env files should override stale shell exports on restart.
|
||||
_hermes_home = get_hermes_home()
|
||||
_project_env = Path(__file__).parent.parent / '.env'
|
||||
_project_env = Path(__file__).parent / '.env'
|
||||
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
|
||||
@@ -65,6 +60,8 @@ from tools.rl_training_tool import get_missing_keys
|
||||
# Config Loading
|
||||
# ============================================================================
|
||||
|
||||
from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
|
||||
|
||||
DEFAULT_MODEL = "anthropic/claude-opus-4.5"
|
||||
DEFAULT_BASE_URL = OPENROUTER_BASE_URL
|
||||
|
||||
+56
-29
@@ -37,10 +37,7 @@ import time
|
||||
import threading
|
||||
from types import SimpleNamespace
|
||||
import uuid
|
||||
from typing import Callable, List, Dict, Any, Optional, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from agent.rate_limit_tracker import RateLimitState
|
||||
from typing import List, Dict, Any, Optional
|
||||
from openai import OpenAI
|
||||
import fire
|
||||
from datetime import datetime
|
||||
@@ -265,6 +262,7 @@ _MAX_TOOL_WORKERS = 8
|
||||
_DESTRUCTIVE_PATTERNS = re.compile(
|
||||
r"""(?:^|\s|&&|\|\||;|`)(?:
|
||||
rm\s|rmdir\s|
|
||||
cp\s|install\s|
|
||||
mv\s|
|
||||
sed\s+-i|
|
||||
truncate\s|
|
||||
@@ -725,17 +723,17 @@ class AIAgent:
|
||||
provider_require_parameters: bool = False,
|
||||
provider_data_collection: str = None,
|
||||
session_id: str = None,
|
||||
tool_progress_callback: Callable[..., Any] = None,
|
||||
tool_start_callback: Callable[..., Any] = None,
|
||||
tool_complete_callback: Callable[..., Any] = None,
|
||||
thinking_callback: Callable[..., Any] = None,
|
||||
reasoning_callback: Callable[..., Any] = None,
|
||||
clarify_callback: Callable[..., Any] = None,
|
||||
step_callback: Callable[..., Any] = None,
|
||||
stream_delta_callback: Callable[..., Any] = None,
|
||||
interim_assistant_callback: Callable[..., Any] = None,
|
||||
tool_gen_callback: Callable[..., Any] = None,
|
||||
status_callback: Callable[..., Any] = None,
|
||||
tool_progress_callback: callable = None,
|
||||
tool_start_callback: callable = None,
|
||||
tool_complete_callback: callable = None,
|
||||
thinking_callback: callable = None,
|
||||
reasoning_callback: callable = None,
|
||||
clarify_callback: callable = None,
|
||||
step_callback: callable = None,
|
||||
stream_delta_callback: callable = None,
|
||||
interim_assistant_callback: callable = None,
|
||||
tool_gen_callback: callable = None,
|
||||
status_callback: callable = None,
|
||||
max_tokens: int = None,
|
||||
reasoning_config: Dict[str, Any] = None,
|
||||
service_tier: str = None,
|
||||
@@ -1051,7 +1049,7 @@ class AIAgent:
|
||||
for quiet_logger in [
|
||||
'tools', # all tools.* (terminal, browser, web, file, etc.)
|
||||
'run_agent', # agent runner internals
|
||||
'scripts.trajectory_compressor',
|
||||
'trajectory_compressor',
|
||||
'cron', # scheduler (only relevant in daemon mode)
|
||||
'hermes_cli', # CLI helpers
|
||||
]:
|
||||
@@ -1551,6 +1549,17 @@ class AIAgent:
|
||||
_agent_section = {}
|
||||
self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
|
||||
|
||||
# App-level API retry count (wraps each model API call). Default 3,
|
||||
# overridable via agent.api_max_retries in config.yaml. See #11616.
|
||||
try:
|
||||
_raw_api_retries = _agent_section.get("api_max_retries", 3)
|
||||
_api_retries = int(_raw_api_retries)
|
||||
if _api_retries < 1:
|
||||
_api_retries = 1 # 1 = no retry (single attempt)
|
||||
except (TypeError, ValueError):
|
||||
_api_retries = 3
|
||||
self._api_max_retries = _api_retries
|
||||
|
||||
# Initialize context compressor for automatic context management
|
||||
# Compresses conversation when approaching model's context limit
|
||||
# Configuration via config.yaml (compression section)
|
||||
@@ -4770,7 +4779,7 @@ class AIAgent:
|
||||
def _close_request_openai_client(self, client: Any, *, reason: str) -> None:
|
||||
self._close_openai_client(client, reason=reason, shared=False)
|
||||
|
||||
def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: Callable[..., Any] = None):
|
||||
def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
|
||||
"""Execute one streaming Responses API request and return the final response."""
|
||||
import httpx as _httpx
|
||||
|
||||
@@ -5469,7 +5478,7 @@ class AIAgent:
|
||||
)
|
||||
|
||||
def _interruptible_streaming_api_call(
|
||||
self, api_kwargs: dict, *, on_first_delta: Callable[..., Any] = None
|
||||
self, api_kwargs: dict, *, on_first_delta: callable = None
|
||||
):
|
||||
"""Streaming variant of _interruptible_api_call for real-time token delivery.
|
||||
|
||||
@@ -7408,15 +7417,12 @@ class AIAgent:
|
||||
_flush_temperature = _fixed_temp
|
||||
else:
|
||||
_flush_temperature = 0.3
|
||||
_flush_llm_kwargs: dict = {}
|
||||
if _flush_temperature is not None:
|
||||
_flush_llm_kwargs["temperature"] = _flush_temperature
|
||||
try:
|
||||
response = _call_llm(
|
||||
task="flush_memories",
|
||||
messages=api_messages,
|
||||
tools=[memory_tool_def],
|
||||
**_flush_llm_kwargs,
|
||||
temperature=_flush_temperature,
|
||||
max_tokens=5120,
|
||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||
)
|
||||
@@ -8625,9 +8631,9 @@ class AIAgent:
|
||||
self,
|
||||
user_message: str,
|
||||
system_message: str = None,
|
||||
conversation_history: List[Dict[str, Any]] | None = None,
|
||||
conversation_history: List[Dict[str, Any]] = None,
|
||||
task_id: str = None,
|
||||
stream_callback: Optional[Callable[..., Any]] = None,
|
||||
stream_callback: Optional[callable] = None,
|
||||
persist_user_message: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -9265,7 +9271,7 @@ class AIAgent:
|
||||
|
||||
api_start_time = time.time()
|
||||
retry_count = 0
|
||||
max_retries = 3
|
||||
max_retries = self._api_max_retries
|
||||
primary_recovery_attempted = False
|
||||
max_compression_attempts = 3
|
||||
codex_auth_retry_attempted=False
|
||||
@@ -10231,7 +10237,7 @@ class AIAgent:
|
||||
auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
|
||||
print(f"{self.log_prefix}🔐 Anthropic 401 — authentication failed.")
|
||||
print(f"{self.log_prefix} Auth method: {auth_method}")
|
||||
print(f"{self.log_prefix} Token prefix: {str(key)[:12]}..." if key and len(str(key)) > 12 else f"{self.log_prefix} Token: (empty or short)")
|
||||
print(f"{self.log_prefix} Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{self.log_prefix} Token: (empty or short)")
|
||||
print(f"{self.log_prefix} Troubleshooting:")
|
||||
from hermes_constants import display_hermes_home as _dhh_fn
|
||||
_dhh = _dhh_fn()
|
||||
@@ -10569,9 +10575,30 @@ class AIAgent:
|
||||
# Error is about the INPUT being too large — reduce context_length.
|
||||
# Try to parse the actual limit from the error message
|
||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
_provider_lower = (getattr(self, "provider", "") or "").lower()
|
||||
_base_lower = (getattr(self, "base_url", "") or "").rstrip("/").lower()
|
||||
is_minimax_provider = (
|
||||
_provider_lower in {"minimax", "minimax-cn"}
|
||||
or _base_lower.startswith((
|
||||
"https://api.minimax.io/anthropic",
|
||||
"https://api.minimaxi.com/anthropic",
|
||||
))
|
||||
)
|
||||
minimax_delta_only_overflow = (
|
||||
is_minimax_provider
|
||||
and parsed_limit is None
|
||||
and "context window exceeds limit (" in error_msg
|
||||
)
|
||||
if parsed_limit and parsed_limit < old_ctx:
|
||||
new_ctx = parsed_limit
|
||||
self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
|
||||
self._vprint(f"{self.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
|
||||
elif minimax_delta_only_overflow:
|
||||
new_ctx = old_ctx
|
||||
self._vprint(
|
||||
f"{self.log_prefix}Provider reported overflow amount only; "
|
||||
f"keeping context_length at {old_ctx:,} tokens and compressing.",
|
||||
force=True,
|
||||
)
|
||||
else:
|
||||
# Step down to the next probe tier
|
||||
new_ctx = get_next_probe_tier(old_ctx)
|
||||
@@ -11575,7 +11602,7 @@ class AIAgent:
|
||||
messages.append(assistant_msg)
|
||||
|
||||
if reasoning_text:
|
||||
reasoning_preview = str(reasoning_text)[:500] + "..." if len(str(reasoning_text)) > 500 else reasoning_text
|
||||
reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
|
||||
logger.warning(
|
||||
"Reasoning-only response (no visible content) "
|
||||
"after exhausting retries and fallback. "
|
||||
@@ -11914,7 +11941,7 @@ class AIAgent:
|
||||
|
||||
return result
|
||||
|
||||
def chat(self, message: str, stream_callback: Optional[Callable[..., Any]] = None) -> str:
|
||||
def chat(self, message: str, stream_callback: Optional[callable] = None) -> str:
|
||||
"""
|
||||
Simple chat interface that returns just the final response.
|
||||
|
||||
|
||||
+39
-2
@@ -26,7 +26,6 @@ import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
@@ -45,6 +44,9 @@ AUTHOR_MAP = {
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"343873859@qq.com": "DrStrangerUJN",
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
"130918800+devorun@users.noreply.github.com": "devorun",
|
||||
"maks.mir@yahoo.com": "say8hi",
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -113,6 +115,8 @@ AUTHOR_MAP = {
|
||||
"josephzcan@gmail.com": "j0sephz",
|
||||
# contributors (manual mapping from git names)
|
||||
"ahmedsherif95@gmail.com": "asheriif",
|
||||
"dyxushuai@gmail.com": "dyxushuai",
|
||||
"33860762+etcircle@users.noreply.github.com": "etcircle",
|
||||
"liujinkun@bytedance.com": "liujinkun2025",
|
||||
"dmayhem93@gmail.com": "dmahan93",
|
||||
"fr@tecompanytea.com": "ifrederico",
|
||||
@@ -163,7 +167,10 @@ AUTHOR_MAP = {
|
||||
"socrates1024@gmail.com": "socrates1024",
|
||||
"seanalt555@gmail.com": "Salt-555",
|
||||
"satelerd@gmail.com": "satelerd",
|
||||
"dan@danlynn.com": "danklynn",
|
||||
"mattmaximo@hotmail.com": "MattMaximo",
|
||||
"numman.ali@gmail.com": "nummanali",
|
||||
"rohithsaimidigudla@gmail.com": "whitehatjr1001",
|
||||
"0xNyk@users.noreply.github.com": "0xNyk",
|
||||
"0xnykcd@googlemail.com": "0xNyk",
|
||||
"buraysandro9@gmail.com": "buray",
|
||||
@@ -408,6 +415,36 @@ AUTHOR_MAP = {
|
||||
"caliberoviv@gmail.com": "vivganes",
|
||||
"michaelfackerell@gmail.com": "MikeFac",
|
||||
"18024642@qq.com": "GuyCui",
|
||||
"eumael.mkt@gmail.com": "maelrx",
|
||||
# v0.11.0 additions
|
||||
"benbarclay@gmail.com": "benbarclay",
|
||||
"lijiawen@umich.edu": "Jiawen-lee",
|
||||
"oleksiy@kovyrin.net": "kovyrin",
|
||||
"kovyrin.claw@gmail.com": "kovyrin",
|
||||
"kaiobarb@gmail.com": "liftaris",
|
||||
"me@arihantsethia.com": "arihantsethia",
|
||||
"zhuofengwang2003@gmail.com": "coekfung",
|
||||
"teknium@noreply.github.com": "teknium1",
|
||||
"2114364329@qq.com": "cuyua9",
|
||||
"2557058999@qq.com": "Disaster-Terminator",
|
||||
"cine.dreamer.one@gmail.com": "LeonSGP43",
|
||||
"leozeli@qq.com": "leozeli",
|
||||
"linlehao@cuhk.edu.cn": "LehaoLin",
|
||||
"liutong@isacas.ac.cn": "I3eg1nner",
|
||||
"peterberthelsen@Peters-MacBook-Air.local": "PeterBerthelsen",
|
||||
"root@debian.debian": "lengxii",
|
||||
"roque@priveperfumeshn.com": "priveperfumes",
|
||||
"shijianzhi@shijianzhideMacBook-Pro.local": "sjz-ks",
|
||||
"topcheer@me.com": "topcheer",
|
||||
"walli@tencent.com": "walli",
|
||||
"zhuofengwang@tencent.com": "Zhuofeng-Wang",
|
||||
# no-github-match — keep as display names
|
||||
"clio-agent@sisyphuslabs.ai": "Sisyphus",
|
||||
"marco@rutimka.de": "Marco Rutsch",
|
||||
"paul@gamma.app": "Paul Bergeron",
|
||||
"zhangxicen@example.com": "zhangxicen",
|
||||
"codex@openai.invalid": "teknium1",
|
||||
"screenmachine@gmail.com": "teknium1",
|
||||
}
|
||||
|
||||
|
||||
@@ -686,7 +723,7 @@ def get_commits(since_tag=None):
|
||||
return commits
|
||||
|
||||
|
||||
def get_pr_number(subject: str) -> Optional[str]:
|
||||
def get_pr_number(subject: str) -> str:
|
||||
"""Extract PR number from commit subject if present."""
|
||||
match = re.search(r"#(\d+)", subject)
|
||||
if match:
|
||||
|
||||
@@ -267,7 +267,7 @@ def run_compression(input_dir: Path, output_dir: Path, config_path: str):
|
||||
# Import the compressor
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from scripts.trajectory_compressor import TrajectoryCompressor, CompressionConfig
|
||||
from trajectory_compressor import TrajectoryCompressor, CompressionConfig
|
||||
|
||||
print(f"\n🗜️ Running trajectory compression...")
|
||||
print(f" Input: {input_dir}")
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
---
|
||||
name: design-md
|
||||
description: Author, validate, diff, and export DESIGN.md files — Google's open-source format spec that gives coding agents a persistent, structured understanding of a design system (tokens + rationale in one file). Use when building a design system, porting style rules between projects, generating UI with consistent brand, or auditing accessibility/contrast.
|
||||
version: 1.0.0
|
||||
author: Hermes Agent
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [design, design-system, tokens, ui, accessibility, wcag, tailwind, dtcg, google]
|
||||
related_skills: [popular-web-designs, excalidraw, architecture-diagram]
|
||||
---
|
||||
|
||||
# DESIGN.md Skill
|
||||
|
||||
DESIGN.md is Google's open spec (Apache-2.0, `google-labs-code/design.md`) for
|
||||
describing a visual identity to coding agents. One file combines:
|
||||
|
||||
- **YAML front matter** — machine-readable design tokens (normative values)
|
||||
- **Markdown body** — human-readable rationale, organized into canonical sections
|
||||
|
||||
Tokens give exact values. Prose tells agents *why* those values exist and how to
|
||||
apply them. The CLI (`npx @google/design.md`) lints structure + WCAG contrast,
|
||||
diffs versions for regressions, and exports to Tailwind or W3C DTCG JSON.
|
||||
|
||||
## When to use this skill
|
||||
|
||||
- User asks for a DESIGN.md file, design tokens, or a design system spec
|
||||
- User wants consistent UI/brand across multiple projects or tools
|
||||
- User pastes an existing DESIGN.md and asks to lint, diff, export, or extend it
|
||||
- User asks to port a style guide into a format agents can consume
|
||||
- User wants contrast / WCAG accessibility validation on their color palette
|
||||
|
||||
For purely visual inspiration or layout examples, use `popular-web-designs`
|
||||
instead. This skill is for the *formal spec file* itself.
|
||||
|
||||
## File anatomy
|
||||
|
||||
```md
|
||||
---
|
||||
version: alpha
|
||||
name: Heritage
|
||||
description: Architectural minimalism meets journalistic gravitas.
|
||||
colors:
|
||||
primary: "#1A1C1E"
|
||||
secondary: "#6C7278"
|
||||
tertiary: "#B8422E"
|
||||
neutral: "#F7F5F2"
|
||||
typography:
|
||||
h1:
|
||||
fontFamily: Public Sans
|
||||
fontSize: 3rem
|
||||
fontWeight: 700
|
||||
lineHeight: 1.1
|
||||
letterSpacing: "-0.02em"
|
||||
body-md:
|
||||
fontFamily: Public Sans
|
||||
fontSize: 1rem
|
||||
rounded:
|
||||
sm: 4px
|
||||
md: 8px
|
||||
lg: 16px
|
||||
spacing:
|
||||
sm: 8px
|
||||
md: 16px
|
||||
lg: 24px
|
||||
components:
|
||||
button-primary:
|
||||
backgroundColor: "{colors.tertiary}"
|
||||
textColor: "#FFFFFF"
|
||||
rounded: "{rounded.sm}"
|
||||
padding: 12px
|
||||
button-primary-hover:
|
||||
backgroundColor: "{colors.primary}"
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Architectural Minimalism meets Journalistic Gravitas...
|
||||
|
||||
## Colors
|
||||
|
||||
- **Primary (#1A1C1E):** Deep ink for headlines and core text.
|
||||
- **Tertiary (#B8422E):** "Boston Clay" — the sole driver for interaction.
|
||||
|
||||
## Typography
|
||||
|
||||
Public Sans for everything except small all-caps labels...
|
||||
|
||||
## Components
|
||||
|
||||
`button-primary` is the only high-emphasis action on a page...
|
||||
```
|
||||
|
||||
## Token types
|
||||
|
||||
| Type | Format | Example |
|
||||
|------|--------|---------|
|
||||
| Color | `#` + hex (sRGB) | `"#1A1C1E"` |
|
||||
| Dimension | number + unit (`px`, `em`, `rem`) | `48px`, `-0.02em` |
|
||||
| Token reference | `{path.to.token}` | `{colors.primary}` |
|
||||
| Typography | object with `fontFamily`, `fontSize`, `fontWeight`, `lineHeight`, `letterSpacing`, `fontFeature`, `fontVariation` | see above |
|
||||
|
||||
Component property whitelist: `backgroundColor`, `textColor`, `typography`,
|
||||
`rounded`, `padding`, `size`, `height`, `width`. Variants (hover, active,
|
||||
pressed) are **separate component entries** with related key names
|
||||
(`button-primary-hover`), not nested.
|
||||
|
||||
## Canonical section order
|
||||
|
||||
Sections are optional, but present ones MUST appear in this order. Duplicate
|
||||
headings reject the file.
|
||||
|
||||
1. Overview (alias: Brand & Style)
|
||||
2. Colors
|
||||
3. Typography
|
||||
4. Layout (alias: Layout & Spacing)
|
||||
5. Elevation & Depth (alias: Elevation)
|
||||
6. Shapes
|
||||
7. Components
|
||||
8. Do's and Don'ts
|
||||
|
||||
Unknown sections are preserved, not errored. Unknown token names are accepted
|
||||
if the value type is valid. Unknown component properties produce a warning.
|
||||
|
||||
## Workflow: authoring a new DESIGN.md
|
||||
|
||||
1. **Ask the user** (or infer) the brand tone, accent color, and typography
|
||||
direction. If they provided a site, image, or vibe, translate it to the
|
||||
token shape above.
|
||||
2. **Write `DESIGN.md`** in their project root using `write_file`. Always
|
||||
include `name:` and `colors:`; other sections optional but encouraged.
|
||||
3. **Use token references** (`{colors.primary}`) in the `components:` section
|
||||
instead of re-typing hex values. Keeps the palette single-source.
|
||||
4. **Lint it** (see below). Fix any broken references or WCAG failures
|
||||
before returning.
|
||||
5. **If the user has an existing project**, also write Tailwind or DTCG
|
||||
exports next to the file (`tailwind.theme.json`, `tokens.json`).
|
||||
|
||||
## Workflow: lint / diff / export
|
||||
|
||||
The CLI is `@google/design.md` (Node). Use `npx` — no global install needed.
|
||||
|
||||
```bash
|
||||
# Validate structure + token references + WCAG contrast
|
||||
npx -y @google/design.md lint DESIGN.md
|
||||
|
||||
# Compare two versions, fail on regression (exit 1 = regression)
|
||||
npx -y @google/design.md diff DESIGN.md DESIGN-v2.md
|
||||
|
||||
# Export to Tailwind theme JSON
|
||||
npx -y @google/design.md export --format tailwind DESIGN.md > tailwind.theme.json
|
||||
|
||||
# Export to W3C DTCG (Design Tokens Format Module) JSON
|
||||
npx -y @google/design.md export --format dtcg DESIGN.md > tokens.json
|
||||
|
||||
# Print the spec itself — useful when injecting into an agent prompt
|
||||
npx -y @google/design.md spec --rules-only --format json
|
||||
```
|
||||
|
||||
All commands accept `-` for stdin. `lint` returns exit 1 on errors. Use the
|
||||
`--format json` flag and parse the output if you need to report findings
|
||||
structurally.
|
||||
|
||||
### Lint rule reference (what the 7 rules catch)
|
||||
|
||||
- `broken-ref` (error) — `{colors.missing}` points at a non-existent token
|
||||
- `duplicate-section` (error) — same `## Heading` appears twice
|
||||
- `invalid-color`, `invalid-dimension`, `invalid-typography` (error)
|
||||
- `wcag-contrast` (warning/info) — component `textColor` vs `backgroundColor`
|
||||
ratio against WCAG AA (4.5:1) and AAA (7:1)
|
||||
- `unknown-component-property` (warning) — outside the whitelist above
|
||||
|
||||
When the user cares about accessibility, call this out explicitly in your
|
||||
summary — WCAG findings are the most load-bearing reason to use the CLI.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
- **Don't nest component variants.** `button-primary.hover` is wrong;
|
||||
`button-primary-hover` as a sibling key is right.
|
||||
- **Hex colors must be quoted strings.** YAML will otherwise choke on `#` or
|
||||
truncate values like `#1A1C1E` oddly.
|
||||
- **Negative dimensions need quotes too.** `letterSpacing: -0.02em` parses as
|
||||
a YAML flow — write `letterSpacing: "-0.02em"`.
|
||||
- **Section order is enforced.** If the user gives you prose in a random order,
|
||||
reorder it to match the canonical list before saving.
|
||||
- **`version: alpha` is the current spec version** (as of Apr 2026). The spec
|
||||
is marked alpha — watch for breaking changes.
|
||||
- **Token references resolve by dotted path.** `{colors.primary}` works;
|
||||
`{primary}` does not.
|
||||
|
||||
## Spec source of truth
|
||||
|
||||
- Repo: https://github.com/google-labs-code/design.md (Apache-2.0)
|
||||
- CLI: `@google/design.md` on npm
|
||||
- License of generated DESIGN.md files: whatever the user's project uses;
|
||||
the spec itself is Apache-2.0.
|
||||
@@ -0,0 +1,99 @@
|
||||
---
|
||||
version: alpha
|
||||
name: MyBrand
|
||||
description: One-sentence description of the visual identity.
|
||||
colors:
|
||||
primary: "#0F172A"
|
||||
secondary: "#64748B"
|
||||
tertiary: "#2563EB"
|
||||
neutral: "#F8FAFC"
|
||||
on-primary: "#FFFFFF"
|
||||
on-tertiary: "#FFFFFF"
|
||||
typography:
|
||||
h1:
|
||||
fontFamily: Inter
|
||||
fontSize: 3rem
|
||||
fontWeight: 700
|
||||
lineHeight: 1.1
|
||||
letterSpacing: "-0.02em"
|
||||
h2:
|
||||
fontFamily: Inter
|
||||
fontSize: 2rem
|
||||
fontWeight: 600
|
||||
lineHeight: 1.2
|
||||
body-md:
|
||||
fontFamily: Inter
|
||||
fontSize: 1rem
|
||||
lineHeight: 1.5
|
||||
label-caps:
|
||||
fontFamily: Inter
|
||||
fontSize: 0.75rem
|
||||
fontWeight: 600
|
||||
letterSpacing: "0.08em"
|
||||
rounded:
|
||||
sm: 4px
|
||||
md: 8px
|
||||
lg: 16px
|
||||
full: 9999px
|
||||
spacing:
|
||||
xs: 4px
|
||||
sm: 8px
|
||||
md: 16px
|
||||
lg: 24px
|
||||
xl: 48px
|
||||
components:
|
||||
button-primary:
|
||||
backgroundColor: "{colors.tertiary}"
|
||||
textColor: "{colors.on-tertiary}"
|
||||
rounded: "{rounded.sm}"
|
||||
padding: 12px
|
||||
button-primary-hover:
|
||||
backgroundColor: "{colors.primary}"
|
||||
textColor: "{colors.on-primary}"
|
||||
card:
|
||||
backgroundColor: "{colors.neutral}"
|
||||
textColor: "{colors.primary}"
|
||||
rounded: "{rounded.md}"
|
||||
padding: 24px
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Describe the voice and feel of the brand in one or two paragraphs. What mood
|
||||
does it evoke? What emotional response should a user have on first impression?
|
||||
|
||||
## Colors
|
||||
|
||||
- **Primary ({colors.primary}):** Core text, headlines, high-emphasis surfaces.
|
||||
- **Secondary ({colors.secondary}):** Supporting text, borders, metadata.
|
||||
- **Tertiary ({colors.tertiary}):** Interaction driver — buttons, links,
|
||||
selected states. Use sparingly to preserve its signal.
|
||||
- **Neutral ({colors.neutral}):** Page background and surface fills.
|
||||
|
||||
## Typography
|
||||
|
||||
Inter for everything. Weight and size carry hierarchy, not font family. Tight
|
||||
letter-spacing on display sizes; default tracking on body.
|
||||
|
||||
## Layout
|
||||
|
||||
Spacing scale is a 4px baseline. Use `md` (16px) for intra-component gaps,
|
||||
`lg` (24px) for inter-component gaps, `xl` (48px) for section breaks.
|
||||
|
||||
## Shapes
|
||||
|
||||
Rounded corners are modest — `sm` on interactive elements, `md` on cards.
|
||||
`full` is reserved for avatars and pill badges.
|
||||
|
||||
## Components
|
||||
|
||||
- `button-primary` is the only high-emphasis action per screen.
|
||||
- `card` is the default surface for grouped content. No shadow by default.
|
||||
|
||||
## Do's and Don'ts
|
||||
|
||||
- **Do** use token references (`{colors.primary}`) instead of literal hex in
|
||||
component definitions.
|
||||
- **Don't** introduce colors outside the palette — extend the palette first.
|
||||
- **Don't** nest component variants. `button-primary-hover` is a sibling,
|
||||
not a child.
|
||||
@@ -447,6 +447,34 @@ class TestExplicitProviderRouting:
|
||||
adapter = client.chat.completions
|
||||
assert adapter._is_oauth is False
|
||||
|
||||
def test_explicit_openrouter_pool_exhausted_logs_precise_warning(self, monkeypatch, caplog):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)):
|
||||
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
client, model = resolve_provider_client("openrouter")
|
||||
assert client is None
|
||||
assert model is None
|
||||
assert any(
|
||||
"credential pool has no usable entries" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
assert not any(
|
||||
"OPENROUTER_API_KEY not set" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
|
||||
def test_explicit_openrouter_missing_env_keeps_not_set_warning(self, monkeypatch, caplog):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
|
||||
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
client, model = resolve_provider_client("openrouter")
|
||||
assert client is None
|
||||
assert model is None
|
||||
assert any(
|
||||
"OPENROUTER_API_KEY not set" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
|
||||
class TestGetTextAuxiliaryClient:
|
||||
"""Test the full resolution chain for get_text_auxiliary_client."""
|
||||
|
||||
|
||||
@@ -245,7 +245,7 @@ class TestResolveVisionMainFirst:
|
||||
assert model == "xiaomi/mimo-v2-omni"
|
||||
|
||||
def test_exotic_provider_with_vision_override_preserved(self):
|
||||
"""xiaomi → mimo-v2-omni override still wins over main_model."""
|
||||
"""xiaomi → mimo-v2.5 override still wins over main_model."""
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="xiaomi",
|
||||
), patch(
|
||||
@@ -257,15 +257,15 @@ class TestResolveVisionMainFirst:
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None),
|
||||
):
|
||||
mock_resolve.return_value = (MagicMock(), "mimo-v2-omni")
|
||||
mock_resolve.return_value = (MagicMock(), "mimo-v2.5")
|
||||
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert provider == "xiaomi"
|
||||
# Should use mimo-v2-omni (vision override), not mimo-v2-pro (text main)
|
||||
assert mock_resolve.call_args.args[1] == "mimo-v2-omni"
|
||||
# Should use mimo-v2.5 (vision override), not mimo-v2-pro (text main)
|
||||
assert mock_resolve.call_args.args[1] == "mimo-v2.5"
|
||||
|
||||
def test_main_unavailable_vision_falls_through_to_aggregators(self):
|
||||
"""Main provider fails → fall back to OpenRouter/Nous strict backends."""
|
||||
|
||||
@@ -333,66 +333,6 @@ def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch):
|
||||
assert persisted["last_error_code"] == 402
|
||||
|
||||
|
||||
def test_try_refresh_current_updates_only_current_entry(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-1",
|
||||
"label": "primary",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "device_code",
|
||||
"access_token": "access-old",
|
||||
"refresh_token": "refresh-old",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
},
|
||||
{
|
||||
"id": "cred-2",
|
||||
"label": "secondary",
|
||||
"auth_type": "oauth",
|
||||
"priority": 1,
|
||||
"source": "device_code",
|
||||
"access_token": "access-other",
|
||||
"refresh_token": "refresh-other",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.refresh_codex_oauth_pure",
|
||||
lambda access_token, refresh_token, timeout_seconds=20.0: {
|
||||
"access_token": "access-new",
|
||||
"refresh_token": "refresh-new",
|
||||
},
|
||||
)
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
current = pool.select()
|
||||
assert current.id == "cred-1"
|
||||
|
||||
refreshed = pool.try_refresh_current()
|
||||
|
||||
assert refreshed is not None
|
||||
assert refreshed.access_token == "access-new"
|
||||
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
primary, secondary = auth_payload["credential_pool"]["openai-codex"]
|
||||
assert primary["access_token"] == "access-new"
|
||||
assert primary["refresh_token"] == "refresh-new"
|
||||
assert secondary["access_token"] == "access-other"
|
||||
assert secondary["refresh_token"] == "refresh-other"
|
||||
|
||||
|
||||
def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded")
|
||||
@@ -1162,75 +1102,3 @@ def test_load_pool_does_not_seed_qwen_oauth_when_no_token(tmp_path, monkeypatch)
|
||||
|
||||
assert not pool.has_credentials()
|
||||
assert pool.entries() == []
|
||||
|
||||
|
||||
def _build_pool_with_entries(tmp_path, monkeypatch, provider="openrouter", entries=None):
|
||||
"""Helper: build a CredentialPool directly without seeding side-effects."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
monkeypatch.setattr("agent.credential_pool._seed_from_singletons", lambda p, e: (False, set()))
|
||||
monkeypatch.setattr("agent.credential_pool._seed_from_env", lambda p, e: (False, set()))
|
||||
if entries is None:
|
||||
entries = [
|
||||
{
|
||||
"id": "cred-1",
|
||||
"label": "primary",
|
||||
"auth_type": "api_key",
|
||||
"priority": 0,
|
||||
"source": "manual",
|
||||
"access_token": "tok-1",
|
||||
},
|
||||
{
|
||||
"id": "cred-2",
|
||||
"label": "secondary",
|
||||
"auth_type": "api_key",
|
||||
"priority": 1,
|
||||
"source": "manual",
|
||||
"access_token": "tok-2",
|
||||
},
|
||||
]
|
||||
_write_auth_store(tmp_path, {"version": 1, "credential_pool": {provider: entries}})
|
||||
from agent.credential_pool import load_pool
|
||||
return load_pool(provider)
|
||||
|
||||
|
||||
def test_remove_entry_removes_by_id(tmp_path, monkeypatch):
|
||||
"""remove_entry should remove the entry with matching id and return it."""
|
||||
pool = _build_pool_with_entries(tmp_path, monkeypatch)
|
||||
|
||||
removed = pool.remove_entry("cred-1")
|
||||
|
||||
assert removed is not None
|
||||
assert removed.id == "cred-1"
|
||||
remaining_ids = [e.id for e in pool.entries()]
|
||||
assert "cred-1" not in remaining_ids
|
||||
assert "cred-2" in remaining_ids
|
||||
|
||||
|
||||
def test_remove_entry_returns_none_for_unknown_id(tmp_path, monkeypatch):
|
||||
"""remove_entry returns None when no entry matches the given id."""
|
||||
pool = _build_pool_with_entries(tmp_path, monkeypatch)
|
||||
|
||||
result = pool.remove_entry("nonexistent-id")
|
||||
|
||||
assert result is None
|
||||
# Pool should still have both original entries
|
||||
assert len(pool.entries()) == 2
|
||||
|
||||
|
||||
def test_remove_entry_renumbers_priorities(tmp_path, monkeypatch):
|
||||
"""After remove_entry, remaining entries receive sequential priorities 0, 1, ..."""
|
||||
pool = _build_pool_with_entries(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
entries=[
|
||||
{"id": "cred-1", "label": "a", "auth_type": "api_key", "priority": 0, "source": "manual", "access_token": "tok-1"},
|
||||
{"id": "cred-2", "label": "b", "auth_type": "api_key", "priority": 1, "source": "manual", "access_token": "tok-2"},
|
||||
{"id": "cred-3", "label": "c", "auth_type": "api_key", "priority": 2, "source": "manual", "access_token": "tok-3"},
|
||||
],
|
||||
)
|
||||
|
||||
pool.remove_entry("cred-2")
|
||||
|
||||
remaining = sorted(pool.entries(), key=lambda e: e.priority)
|
||||
assert [e.priority for e in remaining] == [0, 1]
|
||||
assert [e.id for e in remaining] == ["cred-1", "cred-3"]
|
||||
|
||||
@@ -56,6 +56,7 @@ class TestFailoverReason:
|
||||
"overloaded", "server_error", "timeout",
|
||||
"context_overflow", "payload_too_large",
|
||||
"model_not_found", "format_error",
|
||||
"provider_policy_blocked",
|
||||
"thinking_signature", "long_context_tier", "unknown",
|
||||
}
|
||||
actual = {r.value for r in FailoverReason}
|
||||
@@ -308,6 +309,59 @@ class TestClassifyApiError:
|
||||
assert result.retryable is True
|
||||
assert result.should_fallback is False
|
||||
|
||||
# ── Provider policy-block (OpenRouter privacy/guardrail) ──
|
||||
|
||||
def test_404_openrouter_policy_blocked(self):
|
||||
# Real OpenRouter error when the user's account privacy setting
|
||||
# excludes the only endpoint serving a model (e.g. DeepSeek V4 Pro
|
||||
# which is hosted only by DeepSeek, and their endpoint may log
|
||||
# inputs). Must NOT classify as model_not_found — the model
|
||||
# exists, falling back won't help (same account setting applies),
|
||||
# and the error body already tells the user where to fix it.
|
||||
e = MockAPIError(
|
||||
"No endpoints available matching your guardrail restrictions "
|
||||
"and data policy. Configure: https://openrouter.ai/settings/privacy",
|
||||
status_code=404,
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.provider_policy_blocked
|
||||
assert result.retryable is False
|
||||
assert result.should_fallback is False
|
||||
|
||||
def test_400_openrouter_policy_blocked(self):
|
||||
# Defense-in-depth: if OpenRouter ever returns this as 400 instead
|
||||
# of 404, still classify it distinctly rather than as format_error
|
||||
# or model_not_found.
|
||||
e = MockAPIError(
|
||||
"No endpoints available matching your data policy",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.provider_policy_blocked
|
||||
assert result.retryable is False
|
||||
assert result.should_fallback is False
|
||||
|
||||
def test_message_only_openrouter_policy_blocked(self):
|
||||
# No status code — classifier should still catch the fingerprint
|
||||
# via the message-pattern fallback.
|
||||
e = Exception(
|
||||
"No endpoints available matching your guardrail restrictions "
|
||||
"and data policy"
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.provider_policy_blocked
|
||||
|
||||
def test_404_model_not_found_still_works(self):
|
||||
# Regression guard: the new policy-block check must not swallow
|
||||
# genuine model_not_found 404s.
|
||||
e = MockAPIError(
|
||||
"openrouter/nonexistent-model is not a valid model ID",
|
||||
status_code=404,
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.model_not_found
|
||||
assert result.should_fallback is True
|
||||
|
||||
# ── Payload too large ──
|
||||
|
||||
def test_413_payload_too_large(self):
|
||||
|
||||
@@ -200,6 +200,126 @@ class TestDefaultContextLengths:
|
||||
assert len(DEFAULT_CONTEXT_LENGTHS) >= 10
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Codex OAuth context-window resolution (provider="openai-codex")
|
||||
# =========================================================================
|
||||
|
||||
class TestCodexOAuthContextLength:
|
||||
"""ChatGPT Codex OAuth imposes lower context limits than the direct
|
||||
OpenAI API for the same slugs. Verified Apr 2026 via live probe of
|
||||
chatgpt.com/backend-api/codex/models: every model returns 272k, while
|
||||
models.dev reports 1.05M for gpt-5.5/gpt-5.4 and 400k for the rest.
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
import agent.model_metadata as mm
|
||||
mm._codex_oauth_context_cache = {}
|
||||
mm._codex_oauth_context_cache_time = 0.0
|
||||
|
||||
def test_fallback_table_used_without_token(self):
|
||||
"""With no access token, the hardcoded Codex fallback table wins
|
||||
over models.dev (which reports 1.05M for gpt-5.5 but Codex is 272k).
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
with patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
for model in (
|
||||
"gpt-5.5",
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1-codex-max",
|
||||
"gpt-5.1-codex-mini",
|
||||
):
|
||||
ctx = get_model_context_length(
|
||||
model=model,
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx == 272_000, (
|
||||
f"Codex {model}: expected 272000 fallback, got {ctx} "
|
||||
"(models.dev leakage?)"
|
||||
)
|
||||
|
||||
def test_live_probe_overrides_fallback(self):
|
||||
"""When a token is provided, the live /models probe is preferred
|
||||
and its context_window drives the result."""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.status_code = 200
|
||||
fake_response.json.return_value = {
|
||||
"models": [
|
||||
{"slug": "gpt-5.5", "context_window": 300_000},
|
||||
{"slug": "gpt-5.4", "context_window": 400_000},
|
||||
]
|
||||
}
|
||||
|
||||
with patch("agent.model_metadata.requests.get", return_value=fake_response), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
ctx_55 = get_model_context_length(
|
||||
model="gpt-5.5",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="fake-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
ctx_54 = get_model_context_length(
|
||||
model="gpt-5.4",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="fake-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx_55 == 300_000
|
||||
assert ctx_54 == 400_000
|
||||
|
||||
def test_probe_failure_falls_back_to_hardcoded(self):
|
||||
"""If the probe fails (non-200 / network error), we still return
|
||||
the hardcoded 272k rather than leaking through to models.dev 1.05M."""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.status_code = 401
|
||||
fake_response.json.return_value = {}
|
||||
|
||||
with patch("agent.model_metadata.requests.get", return_value=fake_response), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
ctx = get_model_context_length(
|
||||
model="gpt-5.5",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="expired-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx == 272_000
|
||||
|
||||
def test_non_codex_providers_unaffected(self):
|
||||
"""Resolving gpt-5.5 on non-Codex providers must NOT use the Codex
|
||||
272k override — OpenRouter / direct OpenAI API have different limits.
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
# OpenRouter — should hit its own catalog path first; when mocked
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
|
||||
with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.models_dev.lookup_models_dev_context", return_value=None):
|
||||
ctx = get_model_context_length(
|
||||
model="openai/gpt-5.5",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_key="",
|
||||
provider="openrouter",
|
||||
)
|
||||
assert ctx == 400_000, (
|
||||
f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
|
||||
"leaked outside openai-codex provider"
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# get_model_context_length — resolution order
|
||||
# =========================================================================
|
||||
@@ -621,6 +741,10 @@ class TestParseContextLimitFromError:
|
||||
msg = "Error: context window of 4096 tokens exceeded"
|
||||
assert parse_context_limit_from_error(msg) == 4096
|
||||
|
||||
def test_minimax_delta_only_message_returns_none(self):
|
||||
msg = "invalid params, context window exceeds limit (2013)"
|
||||
assert parse_context_limit_from_error(msg) is None
|
||||
|
||||
def test_completely_unrelated_error(self):
|
||||
assert parse_context_limit_from_error("Invalid API key") is None
|
||||
|
||||
|
||||
@@ -0,0 +1,254 @@
|
||||
"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer.
|
||||
|
||||
Moonshot's tool-parameter validator rejects several shapes that the rest of
|
||||
the JSON Schema ecosystem accepts:
|
||||
|
||||
1. Properties without ``type`` — Moonshot requires ``type`` on every node.
|
||||
2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
|
||||
``anyOf`` children.
|
||||
|
||||
These tests cover the repairs applied by ``agent/moonshot_schema.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.moonshot_schema import (
|
||||
is_moonshot_model,
|
||||
sanitize_moonshot_tool_parameters,
|
||||
sanitize_moonshot_tools,
|
||||
)
|
||||
|
||||
|
||||
class TestMoonshotModelDetection:
|
||||
"""is_moonshot_model() must match across aggregator prefixes."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"kimi-k2.6",
|
||||
"kimi-k2-thinking",
|
||||
"moonshotai/Kimi-K2.6",
|
||||
"moonshotai/kimi-k2.6",
|
||||
"nous/moonshotai/kimi-k2.6",
|
||||
"openrouter/moonshotai/kimi-k2-thinking",
|
||||
"MOONSHOTAI/KIMI-K2.6",
|
||||
],
|
||||
)
|
||||
def test_positive_matches(self, model):
|
||||
assert is_moonshot_model(model) is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"",
|
||||
None,
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"openai/gpt-5.4",
|
||||
"google/gemini-3-flash-preview",
|
||||
"deepseek-chat",
|
||||
],
|
||||
)
|
||||
def test_negative_matches(self, model):
|
||||
assert is_moonshot_model(model) is False
|
||||
|
||||
|
||||
class TestMissingTypeFilled:
|
||||
"""Rule 1: every property must carry a type."""
|
||||
|
||||
def test_property_without_type_gets_string(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"query": {"description": "a bare property"}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["query"]["type"] == "string"
|
||||
|
||||
def test_property_with_enum_infers_type_from_first_value(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"flag": {"enum": [True, False]}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["flag"]["type"] == "boolean"
|
||||
|
||||
def test_nested_properties_are_repaired(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filter": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"field": {"description": "no type"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["filter"]["properties"]["field"]["type"] == "string"
|
||||
|
||||
def test_array_items_without_type_get_repaired(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {"description": "tag entry"},
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["tags"]["items"]["type"] == "string"
|
||||
|
||||
def test_ref_node_is_not_given_synthetic_type(self):
|
||||
"""$ref nodes should NOT get a synthetic type — the referenced
|
||||
definition supplies it, and Moonshot would reject the conflict."""
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"payload": {"$ref": "#/$defs/Payload"}},
|
||||
"$defs": {"Payload": {"type": "object", "properties": {}}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert "type" not in out["properties"]["payload"]
|
||||
assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload"
|
||||
|
||||
|
||||
class TestAnyOfParentType:
|
||||
"""Rule 2: type must not appear at the anyOf parent level."""
|
||||
|
||||
def test_parent_type_stripped_when_anyof_present(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from_format": {
|
||||
"type": "string",
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"type": "null"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
from_format = out["properties"]["from_format"]
|
||||
assert "type" not in from_format
|
||||
assert "anyOf" in from_format
|
||||
|
||||
def test_anyof_children_missing_type_get_filled(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"value": {
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"description": "A typeless option"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
children = out["properties"]["value"]["anyOf"]
|
||||
assert children[0]["type"] == "string"
|
||||
assert "type" in children[1]
|
||||
|
||||
|
||||
class TestTopLevelGuarantees:
|
||||
"""The returned top-level schema is always a well-formed object."""
|
||||
|
||||
def test_non_dict_input_returns_empty_object(self):
|
||||
assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}}
|
||||
assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}}
|
||||
assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}}
|
||||
|
||||
def test_non_object_top_level_coerced(self):
|
||||
params = {"type": "string"}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["type"] == "object"
|
||||
assert "properties" in out
|
||||
|
||||
def test_does_not_mutate_input(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "no type"}},
|
||||
}
|
||||
snapshot = {
|
||||
"type": params["type"],
|
||||
"properties": {"q": dict(params["properties"]["q"])},
|
||||
}
|
||||
sanitize_moonshot_tool_parameters(params)
|
||||
assert params["type"] == snapshot["type"]
|
||||
assert "type" not in params["properties"]["q"]
|
||||
|
||||
|
||||
class TestToolListSanitizer:
|
||||
"""sanitize_moonshot_tools() walks an OpenAI-format tool list."""
|
||||
|
||||
def test_applies_per_tool(self):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "query"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "noop",
|
||||
"description": "Does nothing",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
},
|
||||
]
|
||||
out = sanitize_moonshot_tools(tools)
|
||||
assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
|
||||
# Second tool already clean — should be structurally equivalent
|
||||
assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_empty_list_is_passthrough(self):
|
||||
assert sanitize_moonshot_tools([]) == []
|
||||
assert sanitize_moonshot_tools(None) is None
|
||||
|
||||
def test_skips_malformed_entries(self):
|
||||
"""Entries without a function dict are passed through untouched."""
|
||||
tools = [{"type": "function"}, {"not": "a tool"}]
|
||||
out = sanitize_moonshot_tools(tools)
|
||||
assert out == tools
|
||||
|
||||
|
||||
class TestRealWorldMCPShape:
|
||||
"""End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
|
||||
|
||||
def test_combined_rewrites(self):
|
||||
# Shape: missing type on a property, anyOf with parent type, array
|
||||
# items without type — all in one tool.
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"description": "search text"},
|
||||
"filter": {
|
||||
"type": "string",
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"type": "null"},
|
||||
],
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {"description": "tag"},
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["query"]["type"] == "string"
|
||||
assert "type" not in out["properties"]["filter"]
|
||||
assert out["properties"]["filter"]["anyOf"][0]["type"] == "string"
|
||||
assert out["properties"]["tags"]["items"]["type"] == "string"
|
||||
assert out["required"] == ["query"]
|
||||
@@ -38,6 +38,18 @@ description: Description for {name}.
|
||||
return skill_dir
|
||||
|
||||
|
||||
def _symlink_category(skills_dir: Path, linked_root: Path, category: str) -> Path:
|
||||
"""Create a category symlink under skills_dir pointing outside the tree."""
|
||||
external_category = linked_root / category
|
||||
external_category.mkdir(parents=True, exist_ok=True)
|
||||
symlink_path = skills_dir / category
|
||||
try:
|
||||
symlink_path.symlink_to(external_category, target_is_directory=True)
|
||||
except (OSError, NotImplementedError) as exc:
|
||||
pytest.skip(f"symlinks unavailable in test environment: {exc}")
|
||||
return external_category
|
||||
|
||||
|
||||
class TestScanSkillCommands:
|
||||
def test_finds_skills(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
@@ -101,6 +113,20 @@ class TestScanSkillCommands:
|
||||
assert "/enabled-skill" in result
|
||||
assert "/disabled-skill" not in result
|
||||
|
||||
def test_finds_skills_in_symlinked_category_dir(self, tmp_path):
|
||||
external_root = tmp_path / "repo"
|
||||
skills_root = tmp_path / "skills"
|
||||
skills_root.mkdir()
|
||||
|
||||
external_category = _symlink_category(skills_root, external_root, "linked")
|
||||
_make_skill(external_category.parent, "knowledge-brain", category="linked")
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", skills_root):
|
||||
result = scan_skill_commands()
|
||||
|
||||
assert "/knowledge-brain" in result
|
||||
assert result["/knowledge-brain"]["name"] == "knowledge-brain"
|
||||
|
||||
|
||||
def test_special_chars_stripped_from_cmd_key(self, tmp_path):
|
||||
"""Skill names with +, /, or other special chars produce clean cmd keys."""
|
||||
|
||||
@@ -238,6 +238,56 @@ class TestChatCompletionsKimi:
|
||||
)
|
||||
assert kw["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
|
||||
def test_moonshot_tool_schemas_are_sanitized_by_model_name(self, transport):
|
||||
"""Aggregator routes (Nous, OpenRouter) hit Moonshot by model name, not base URL."""
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"q": {"description": "query"}, # missing type
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
kw = transport.build_kwargs(
|
||||
model="moonshotai/kimi-k2.6",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
tools=tools,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
assert kw["tools"][0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
|
||||
|
||||
def test_non_moonshot_tools_are_not_mutated(self, transport):
|
||||
"""Other models don't go through the Moonshot sanitizer."""
|
||||
original_params = {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "query"}}, # missing type
|
||||
}
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": original_params,
|
||||
},
|
||||
},
|
||||
]
|
||||
kw = transport.build_kwargs(
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
tools=tools,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
# The parameters dict is passed through untouched (no synthetic type)
|
||||
assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"]
|
||||
|
||||
|
||||
class TestChatCompletionsValidate:
|
||||
|
||||
|
||||
@@ -200,6 +200,35 @@ class TestToolCallBackwardCompat:
|
||||
tc_no_pd = ToolCall(id="1", name="fn", arguments="{}")
|
||||
assert getattr(tc_no_pd, "call_id", None) is None
|
||||
|
||||
def test_extra_content_from_provider_data(self):
|
||||
"""Gemini thought_signature stored in provider_data is exposed via property."""
|
||||
ec = {"google": {"thought_signature": "SIG_ABC123"}}
|
||||
tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec})
|
||||
assert tc.extra_content == ec
|
||||
|
||||
def test_extra_content_none_when_no_provider_data(self):
|
||||
tc = ToolCall(id="1", name="fn", arguments="{}", provider_data=None)
|
||||
assert tc.extra_content is None
|
||||
|
||||
def test_extra_content_none_when_key_absent(self):
|
||||
tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
|
||||
assert tc.extra_content is None
|
||||
|
||||
def test_extra_content_getattr_pattern(self):
|
||||
"""_build_assistant_message uses getattr(tc, 'extra_content', None).
|
||||
|
||||
This is the exact pattern that was broken before the extra_content
|
||||
property was added — ToolCall lacked the property so getattr always
|
||||
returned None, silently dropping the Gemini thought_signature and
|
||||
causing HTTP 400 on subsequent turns (issue #14488).
|
||||
"""
|
||||
ec = {"google": {"thought_signature": "SIG_ABC123"}}
|
||||
tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec})
|
||||
assert getattr(tc, "extra_content", None) == ec
|
||||
|
||||
tc_no_extra = ToolCall(id="1", name="fn", arguments="{}")
|
||||
assert getattr(tc_no_extra, "extra_content", None) is None
|
||||
|
||||
|
||||
class TestNormalizedResponseBackwardCompat:
|
||||
"""Test properties that replaced _nr_to_assistant_message() shim."""
|
||||
|
||||
@@ -566,6 +566,35 @@ class TestGetDueJobs:
|
||||
assert get_job("oneshot-stale")["next_run_at"] is None
|
||||
|
||||
|
||||
class TestEnabledToolsets:
|
||||
def test_enabled_toolsets_stored(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "terminal"])
|
||||
assert job["enabled_toolsets"] == ["web", "terminal"]
|
||||
|
||||
def test_enabled_toolsets_persisted(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "file"])
|
||||
fetched = get_job(job["id"])
|
||||
assert fetched["enabled_toolsets"] == ["web", "file"]
|
||||
|
||||
def test_enabled_toolsets_none_when_omitted(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h")
|
||||
assert job["enabled_toolsets"] is None
|
||||
|
||||
def test_enabled_toolsets_empty_list_normalizes_to_none(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=[])
|
||||
assert job["enabled_toolsets"] is None
|
||||
|
||||
def test_enabled_toolsets_whitespace_entries_stripped(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", " ", "file"])
|
||||
assert job["enabled_toolsets"] == ["web", "file"]
|
||||
|
||||
def test_enabled_toolsets_updated_via_update_job(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h")
|
||||
update_job(job["id"], {"enabled_toolsets": ["web", "delegation"]})
|
||||
fetched = get_job(job["id"])
|
||||
assert fetched["enabled_toolsets"] == ["web", "delegation"]
|
||||
|
||||
|
||||
class TestSaveJobOutput:
|
||||
def test_creates_output_file(self, tmp_cron_dir):
|
||||
output_file = save_job_output("test123", "# Results\nEverything ok.")
|
||||
|
||||
@@ -673,6 +673,100 @@ class TestRunJobSessionPersistence:
|
||||
assert call_args[0][1] == "cron_complete"
|
||||
fake_db.close.assert_called_once()
|
||||
|
||||
def _make_run_job_patches(self, tmp_path):
|
||||
"""Common patches for run_job tests."""
|
||||
fake_db = MagicMock()
|
||||
return fake_db, [
|
||||
patch("cron.scheduler._hermes_home", tmp_path),
|
||||
patch("cron.scheduler._resolve_origin", return_value=None),
|
||||
patch("dotenv.load_dotenv"),
|
||||
patch("hermes_state.SessionDB", return_value=fake_db),
|
||||
patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value={
|
||||
"api_key": "test-key",
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
def test_run_job_passes_enabled_toolsets_to_agent(self, tmp_path):
|
||||
job = {
|
||||
"id": "toolset-job",
|
||||
"name": "test",
|
||||
"prompt": "hello",
|
||||
"enabled_toolsets": ["web", "terminal", "file"],
|
||||
}
|
||||
fake_db, patches = self._make_run_job_patches(tmp_path)
|
||||
with patches[0], patches[1], patches[2], patches[3], patches[4], \
|
||||
patch("run_agent.AIAgent") as mock_agent_cls:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent_cls.return_value = mock_agent
|
||||
run_job(job)
|
||||
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"]
|
||||
|
||||
def test_run_job_enabled_toolsets_resolves_from_platform_config_when_not_set(self, tmp_path):
|
||||
"""When a job has no explicit enabled_toolsets, the scheduler now
|
||||
resolves them from ``hermes tools`` platform config for ``cron``
|
||||
(PR #14xxx — blanket fix for Norbert's surprise ``moa`` run).
|
||||
|
||||
The legacy "pass None → AIAgent loads full default" path is still
|
||||
reachable, but only when ``_get_platform_tools`` raises (safety net
|
||||
for any unexpected config shape).
|
||||
"""
|
||||
job = {
|
||||
"id": "no-toolset-job",
|
||||
"name": "test",
|
||||
"prompt": "hello",
|
||||
}
|
||||
fake_db, patches = self._make_run_job_patches(tmp_path)
|
||||
with patches[0], patches[1], patches[2], patches[3], patches[4], \
|
||||
patch("run_agent.AIAgent") as mock_agent_cls:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent_cls.return_value = mock_agent
|
||||
run_job(job)
|
||||
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
# Resolution happened — not None, is a list.
|
||||
assert isinstance(kwargs["enabled_toolsets"], list)
|
||||
# The cron default is _HERMES_CORE_TOOLS with _DEFAULT_OFF_TOOLSETS
|
||||
# (``moa``, ``homeassistant``, ``rl``) removed. The most important
|
||||
# invariant: ``moa`` is NOT in the default cron toolset, so a cron
|
||||
# run cannot accidentally spin up frontier models.
|
||||
assert "moa" not in kwargs["enabled_toolsets"]
|
||||
|
||||
def test_run_job_per_job_toolsets_win_over_platform_config(self, tmp_path):
|
||||
"""Per-job enabled_toolsets (via cronjob tool) always take precedence
|
||||
over the platform-level ``hermes tools`` config."""
|
||||
job = {
|
||||
"id": "override-job",
|
||||
"name": "test",
|
||||
"prompt": "hello",
|
||||
"enabled_toolsets": ["terminal"],
|
||||
}
|
||||
fake_db, patches = self._make_run_job_patches(tmp_path)
|
||||
# Even if the user has ``hermes tools`` configured to enable web+file
|
||||
# for cron, the per-job override wins.
|
||||
with patches[0], patches[1], patches[2], patches[3], patches[4], \
|
||||
patch("run_agent.AIAgent") as mock_agent_cls, \
|
||||
patch(
|
||||
"hermes_cli.tools_config._get_platform_tools",
|
||||
return_value={"web", "file"},
|
||||
):
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent_cls.return_value = mock_agent
|
||||
run_job(job)
|
||||
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
assert kwargs["enabled_toolsets"] == ["terminal"]
|
||||
|
||||
def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path):
|
||||
"""Empty final_response should stay empty for delivery logic (issue #2234).
|
||||
|
||||
|
||||
@@ -95,6 +95,7 @@ class TestBusySessionAck:
|
||||
async def test_sends_ack_when_agent_running(self):
|
||||
"""First message during busy session should get a status ack."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="Are you working?")
|
||||
@@ -127,16 +128,42 @@ class TestBusySessionAck:
|
||||
assert "Interrupting" in content or "respond" in content
|
||||
assert "/stop" not in content # no need — we ARE interrupting
|
||||
|
||||
# Verify message was queued in adapter pending
|
||||
assert sk in adapter._pending_messages
|
||||
|
||||
# Verify agent interrupt was called
|
||||
agent.interrupt.assert_called_once_with("Are you working?")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_queue_mode_suppresses_interrupt_and_updates_ack(self):
|
||||
"""When busy_input_mode is 'queue', message is queued WITHOUT interrupt."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "queue"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="Add this to queue")
|
||||
sk = build_session_key(event.source)
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
agent = MagicMock()
|
||||
runner._running_agents[sk] = agent
|
||||
|
||||
with patch("gateway.run.merge_pending_message_event"):
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
# VERIFY: Agent was NOT interrupted
|
||||
agent.interrupt.assert_not_called()
|
||||
|
||||
# VERIFY: Ack sent with queue-specific wording
|
||||
adapter._send_with_retry.assert_called_once()
|
||||
call_kwargs = adapter._send_with_retry.call_args
|
||||
content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
|
||||
assert "Queued for the next turn" in content
|
||||
assert "respond once the current task finishes" in content
|
||||
assert "Interrupting" not in content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_debounce_suppresses_rapid_acks(self):
|
||||
"""Second message within 30s should NOT send another ack."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event1 = _make_event(text="hello?")
|
||||
@@ -172,13 +199,14 @@ class TestBusySessionAck:
|
||||
assert result2 is True
|
||||
assert adapter._send_with_retry.call_count == 1 # still 1, no new ack
|
||||
|
||||
# But interrupt should still be called for both
|
||||
# But interrupt should still be called for both (since we are in interrupt mode)
|
||||
assert agent.interrupt.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ack_after_cooldown_expires(self):
|
||||
"""After 30s cooldown, a new message should send a fresh ack."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="hello?")
|
||||
@@ -212,6 +240,7 @@ class TestBusySessionAck:
|
||||
async def test_includes_status_detail(self):
|
||||
"""Ack message should include iteration and tool info when available."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="yo")
|
||||
@@ -243,6 +272,7 @@ class TestBusySessionAck:
|
||||
"""Draining case should still produce the drain-specific message."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._draining = True
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="hello")
|
||||
@@ -264,6 +294,7 @@ class TestBusySessionAck:
|
||||
async def test_pending_sentinel_no_interrupt(self):
|
||||
"""When agent is PENDING_SENTINEL, don't call interrupt (it has no method)."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="hey")
|
||||
|
||||
@@ -1,22 +1,28 @@
|
||||
"""Regression tests for the TUI gateway's `complete.path` handler.
|
||||
|
||||
Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
|
||||
with no colon yet) still surfaced files alongside directories in the
|
||||
TUI composer, because the gateway-side completion lives in
|
||||
`tui_gateway/server.py` and was never touched by the earlier fix to
|
||||
`hermes_cli/commands.py`.
|
||||
Reported during the TUI v2 blitz retest:
|
||||
- typing `@folder:` (and `@folder` with no colon yet) surfaced files
|
||||
alongside directories — the gateway-side completion lives in
|
||||
`tui_gateway/server.py` and was never touched by the earlier fix to
|
||||
`hermes_cli/commands.py`.
|
||||
- typing `@appChrome` required the full `@ui-tui/src/components/app…`
|
||||
path to find the file — users expect Cmd-P-style fuzzy basename
|
||||
matching across the repo, not a strict directory prefix filter.
|
||||
|
||||
Covers:
|
||||
- `@folder:` only yields directories
|
||||
- `@file:` only yields regular files
|
||||
- Bare `@folder` / `@file` (no colon) lists cwd directly
|
||||
- Explicit prefix is preserved in the completion text
|
||||
- `@<name>` with no slash fuzzy-matches basenames anywhere in the tree
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tui_gateway import server
|
||||
|
||||
|
||||
@@ -33,6 +39,15 @@ def _items(word: str):
|
||||
return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_fuzzy_cache(monkeypatch):
|
||||
# Each test walks a fresh tmp dir; clear the cached listing so prior
|
||||
# roots can't leak through the TTL window.
|
||||
server._fuzzy_cache.clear()
|
||||
yield
|
||||
server._fuzzy_cache.clear()
|
||||
|
||||
|
||||
def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_fixture(tmp_path)
|
||||
@@ -89,3 +104,176 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
|
||||
|
||||
for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
|
||||
assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
|
||||
|
||||
|
||||
# ── Fuzzy basename matching ──────────────────────────────────────────────
|
||||
# Users shouldn't have to know the full path — typing `@appChrome` should
|
||||
# find `ui-tui/src/components/appChrome.tsx`.
|
||||
|
||||
|
||||
def _nested_fixture(tmp_path: Path):
|
||||
(tmp_path / "readme.md").write_text("x")
|
||||
(tmp_path / ".env").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components").mkdir(parents=True)
|
||||
(tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/hooks").mkdir(parents=True)
|
||||
(tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x")
|
||||
(tmp_path / "tui_gateway").mkdir()
|
||||
(tmp_path / "tui_gateway/server.py").write_text("x")
|
||||
|
||||
|
||||
def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch):
|
||||
"""`@appChrome` — with no slash — should surface the nested file."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
entries = _items("@appChrome")
|
||||
texts = [t for t, _, _ in entries]
|
||||
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
# Display is the basename, meta is the containing directory, so the
|
||||
# picker can show `appChrome.tsx ui-tui/src/components` on one row.
|
||||
row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx")
|
||||
assert row[1] == "appChrome.tsx"
|
||||
assert row[2] == "ui-tui/src/components"
|
||||
|
||||
|
||||
def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch):
|
||||
"""Better matches sort before weaker matches regardless of path depth."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
(tmp_path / "server.py").write_text("x") # exact basename match at root
|
||||
|
||||
texts = [t for t, _, _ in _items("@server")]
|
||||
|
||||
# Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both
|
||||
# rank 1 on basename but exact basename wins on the sort key; shorter
|
||||
# rel path breaks ties.
|
||||
assert texts[0] == "@file:server.py", texts
|
||||
assert "@file:tui_gateway/server.py" in texts
|
||||
|
||||
|
||||
def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch):
|
||||
"""Mid-basename camelCase pieces match without substring scanning."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@Chrome")]
|
||||
|
||||
# `Chrome` starts a camelCase word inside `appChrome.tsx`.
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch):
|
||||
"""`@uCo` → `useCompletion.ts` via subsequence, last-resort tier."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@uCo")]
|
||||
|
||||
assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch):
|
||||
"""Explicit `@file:` prefix still wins the completion tag."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@file:appChrome")]
|
||||
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch):
|
||||
"""Any `/` in the query = user is navigating; keep directory listing."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@ui-tui/src/components/app")]
|
||||
|
||||
# Directory-listing mode prefixes with `@file:` / `@folder:` per entry.
|
||||
# It should only surface direct children of the named dir — not the
|
||||
# nested `useCompletion.ts`.
|
||||
assert any("appChrome.tsx" in t for t in texts), texts
|
||||
assert not any("useCompletion.ts" in t for t in texts), texts
|
||||
|
||||
|
||||
def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch):
|
||||
"""`@folder:<name>` still lists directories — fuzzy scanner only walks
|
||||
files (git-tracked + untracked), so defer to the dir-listing path."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@folder:ui")]
|
||||
|
||||
# Root has `ui-tui/` as a directory; the listing branch should surface it.
|
||||
assert any(t.startswith("@folder:ui-tui") for t in texts), texts
|
||||
|
||||
|
||||
def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch):
|
||||
"""`.env` doesn't leak into `@env` but does show for `@.env`."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
assert not any(".env" in t for t, _, _ in _items("@env"))
|
||||
assert any(t.endswith(".env") for t, _, _ in _items("@.env"))
|
||||
|
||||
|
||||
def test_fuzzy_caps_results(tmp_path, monkeypatch):
|
||||
"""The 30-item cap survives a big tree."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
for i in range(60):
|
||||
(tmp_path / f"mod_{i:03d}.py").write_text("x")
|
||||
|
||||
items = _items("@mod")
|
||||
|
||||
assert len(items) == 30
|
||||
|
||||
|
||||
def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch):
|
||||
"""When the gateway runs from a subdirectory of a git repo, fuzzy
|
||||
completion paths must resolve under that cwd — not under the repo root.
|
||||
|
||||
Without this, `@appChrome` from inside `apps/web/` would suggest
|
||||
`@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would
|
||||
look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every
|
||||
`git ls-files` result back to a `relpath(root)` and drop anything
|
||||
outside `root` so the completion contract stays "paths are cwd-relative".
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True)
|
||||
|
||||
(tmp_path / "apps" / "web" / "src").mkdir(parents=True)
|
||||
(tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x")
|
||||
(tmp_path / "apps" / "api" / "src").mkdir(parents=True)
|
||||
(tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x")
|
||||
(tmp_path / "README.md").write_text("x")
|
||||
|
||||
subprocess.run(["git", "add", "."], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True)
|
||||
|
||||
# Run from `apps/web/` — completions should be relative to here, and
|
||||
# files outside this subtree (apps/api, README.md at root) shouldn't
|
||||
# appear at all.
|
||||
monkeypatch.chdir(tmp_path / "apps" / "web")
|
||||
|
||||
texts = [t for t, _, _ in _items("@appChrome")]
|
||||
|
||||
assert "@file:src/appChrome.tsx" in texts, texts
|
||||
assert not any("apps/web/" in t for t in texts), texts
|
||||
|
||||
server._fuzzy_cache.clear()
|
||||
other_texts = [t for t, _, _ in _items("@server")]
|
||||
|
||||
assert not any("server.ts" in t for t in other_texts), other_texts
|
||||
|
||||
server._fuzzy_cache.clear()
|
||||
readme_texts = [t for t, _, _ in _items("@README")]
|
||||
|
||||
assert not any("README.md" in t for t in readme_texts), readme_texts
|
||||
|
||||
@@ -73,18 +73,29 @@ from gateway.platforms.discord import DiscordAdapter # noqa: E402
|
||||
class FakeTree:
|
||||
def __init__(self):
|
||||
self.sync = AsyncMock(return_value=[])
|
||||
self.fetch_commands = AsyncMock(return_value=[])
|
||||
self._commands = []
|
||||
|
||||
def command(self, *args, **kwargs):
|
||||
return lambda fn: fn
|
||||
|
||||
def get_commands(self, *args, **kwargs):
|
||||
return list(self._commands)
|
||||
|
||||
|
||||
class FakeBot:
|
||||
def __init__(self, *, intents, proxy=None, allowed_mentions=None, **_):
|
||||
self.intents = intents
|
||||
self.allowed_mentions = allowed_mentions
|
||||
self.application_id = 999
|
||||
self.user = SimpleNamespace(id=999, name="Hermes")
|
||||
self._events = {}
|
||||
self.tree = FakeTree()
|
||||
self.http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
|
||||
def event(self, fn):
|
||||
self._events[fn.__name__] = fn
|
||||
@@ -199,6 +210,7 @@ async def test_connect_releases_token_lock_on_timeout(monkeypatch):
|
||||
async def test_connect_does_not_wait_for_slash_sync(monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "bulk")
|
||||
monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
|
||||
monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
|
||||
|
||||
@@ -226,3 +238,420 @@ async def test_connect_does_not_wait_for_slash_sync(monkeypatch):
|
||||
created["bot"].tree.allow_finish.set()
|
||||
await asyncio.sleep(0)
|
||||
await adapter.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_respects_slash_commands_opt_out(monkeypatch):
|
||||
adapter = DiscordAdapter(
|
||||
PlatformConfig(enabled=True, token="test-token", extra={"slash_commands": False})
|
||||
)
|
||||
|
||||
monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "off")
|
||||
monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
|
||||
monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
|
||||
|
||||
intents = SimpleNamespace(message_content=False, dm_messages=False, guild_messages=False, members=False, voice_states=False)
|
||||
monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents)
|
||||
monkeypatch.setattr(
|
||||
discord_platform.commands,
|
||||
"Bot",
|
||||
lambda **kwargs: FakeBot(
|
||||
intents=kwargs["intents"],
|
||||
proxy=kwargs.get("proxy"),
|
||||
allowed_mentions=kwargs.get("allowed_mentions"),
|
||||
),
|
||||
)
|
||||
register_mock = MagicMock()
|
||||
monkeypatch.setattr(adapter, "_register_slash_commands", register_mock)
|
||||
monkeypatch.setattr(adapter, "_resolve_allowed_usernames", AsyncMock())
|
||||
|
||||
ok = await adapter.connect()
|
||||
|
||||
assert ok is True
|
||||
register_mock.assert_not_called()
|
||||
|
||||
await adapter.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_slash_commands_only_mutates_diffs():
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
assert tree is not None
|
||||
return dict(self._payload)
|
||||
|
||||
class _ExistingCommand:
|
||||
def __init__(self, command_id, payload):
|
||||
self.id = command_id
|
||||
self.name = payload["name"]
|
||||
self.type = SimpleNamespace(value=payload["type"])
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"application_id": 999,
|
||||
**self._payload,
|
||||
"name_localizations": {},
|
||||
"description_localizations": {},
|
||||
}
|
||||
|
||||
desired_same = {
|
||||
"name": "status",
|
||||
"description": "Show Hermes session status",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
}
|
||||
desired_updated = {
|
||||
"name": "help",
|
||||
"description": "Show available commands",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
}
|
||||
desired_created = {
|
||||
"name": "metricas",
|
||||
"description": "Show Colmeio metrics dashboard",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
}
|
||||
existing_same = _ExistingCommand(11, desired_same)
|
||||
existing_updated = _ExistingCommand(
|
||||
12,
|
||||
{
|
||||
**desired_updated,
|
||||
"description": "Old help text",
|
||||
},
|
||||
)
|
||||
existing_deleted = _ExistingCommand(
|
||||
13,
|
||||
{
|
||||
"name": "old-command",
|
||||
"description": "To be deleted",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
},
|
||||
)
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [
|
||||
_DesiredCommand(desired_same),
|
||||
_DesiredCommand(desired_updated),
|
||||
_DesiredCommand(desired_created),
|
||||
],
|
||||
fetch_commands=AsyncMock(return_value=[existing_same, existing_updated, existing_deleted]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
assert summary == {
|
||||
"total": 3,
|
||||
"unchanged": 1,
|
||||
"updated": 1,
|
||||
"recreated": 0,
|
||||
"created": 1,
|
||||
"deleted": 1,
|
||||
}
|
||||
fake_http.edit_global_command.assert_awaited_once_with(999, 12, desired_updated)
|
||||
fake_http.upsert_global_command.assert_awaited_once_with(999, desired_created)
|
||||
fake_http.delete_global_command.assert_awaited_once_with(999, 13)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_slash_commands_recreates_metadata_only_diffs():
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
assert tree is not None
|
||||
return dict(self._payload)
|
||||
|
||||
class _ExistingCommand:
|
||||
def __init__(self, command_id, payload):
|
||||
self.id = command_id
|
||||
self.name = payload["name"]
|
||||
self.type = SimpleNamespace(value=payload["type"])
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"application_id": 999,
|
||||
**self._payload,
|
||||
"name_localizations": {},
|
||||
"description_localizations": {},
|
||||
}
|
||||
|
||||
desired = {
|
||||
"name": "help",
|
||||
"description": "Show available commands",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": "8",
|
||||
}
|
||||
existing = _ExistingCommand(
|
||||
12,
|
||||
{
|
||||
**desired,
|
||||
"default_member_permissions": None,
|
||||
},
|
||||
)
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [_DesiredCommand(desired)],
|
||||
fetch_commands=AsyncMock(return_value=[existing]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
assert summary == {
|
||||
"total": 1,
|
||||
"unchanged": 0,
|
||||
"updated": 0,
|
||||
"recreated": 1,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
fake_http.edit_global_command.assert_not_awaited()
|
||||
fake_http.delete_global_command.assert_awaited_once_with(999, 12)
|
||||
fake_http.upsert_global_command.assert_awaited_once_with(999, desired)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_post_connect_initialization_skips_sync_when_policy_off(monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "off")
|
||||
|
||||
fake_tree = SimpleNamespace(sync=AsyncMock())
|
||||
adapter._client = SimpleNamespace(tree=fake_tree)
|
||||
|
||||
await adapter._run_post_connect_initialization()
|
||||
|
||||
fake_tree.sync.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_reads_permission_attrs_from_existing_command():
|
||||
"""Regression: AppCommand.to_dict() in discord.py does NOT include
|
||||
nsfw, dm_permission, or default_member_permissions — they live only
|
||||
on the attributes. Without reading those attrs, any command with
|
||||
non-default permissions false-diffs on every startup.
|
||||
"""
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
return dict(self._payload)
|
||||
|
||||
class _ExistingCommand:
|
||||
"""Mirrors discord.py's AppCommand — to_dict() omits nsfw/dm/perms."""
|
||||
|
||||
def __init__(self, command_id, name, description, *, nsfw, guild_only, default_permissions):
|
||||
self.id = command_id
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.type = SimpleNamespace(value=1)
|
||||
self.nsfw = nsfw
|
||||
self.guild_only = guild_only
|
||||
self.default_member_permissions = (
|
||||
SimpleNamespace(value=default_permissions)
|
||||
if default_permissions is not None
|
||||
else None
|
||||
)
|
||||
|
||||
def to_dict(self):
|
||||
# Match real AppCommand.to_dict() — no nsfw/dm_permission/default_member_permissions
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": 1,
|
||||
"application_id": 999,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"name_localizations": {},
|
||||
"description_localizations": {},
|
||||
"options": [],
|
||||
}
|
||||
|
||||
desired = {
|
||||
"name": "admin",
|
||||
"description": "Admin-only command",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": True,
|
||||
"dm_permission": False,
|
||||
"default_member_permissions": "8",
|
||||
}
|
||||
# Existing command has matching attrs — should report unchanged, NOT falsely diff.
|
||||
existing = _ExistingCommand(
|
||||
42,
|
||||
"admin",
|
||||
"Admin-only command",
|
||||
nsfw=True,
|
||||
guild_only=True,
|
||||
default_permissions=8,
|
||||
)
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [_DesiredCommand(desired)],
|
||||
fetch_commands=AsyncMock(return_value=[existing]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
# Without the fix, this would be unchanged=0, recreated=1 (false diff).
|
||||
assert summary == {
|
||||
"total": 1,
|
||||
"unchanged": 1,
|
||||
"updated": 0,
|
||||
"recreated": 0,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
fake_http.edit_global_command.assert_not_awaited()
|
||||
fake_http.delete_global_command.assert_not_awaited()
|
||||
fake_http.upsert_global_command.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_detects_contexts_drift():
|
||||
"""Regression: contexts and integration_types must be canonicalized
|
||||
so drift in those fields triggers reconciliation. Without this, the
|
||||
diff silently reports 'unchanged' and never reconciles.
|
||||
"""
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
return dict(self._payload)
|
||||
|
||||
class _ExistingCommand:
|
||||
def __init__(self, command_id, payload):
|
||||
self.id = command_id
|
||||
self.name = payload["name"]
|
||||
self.description = payload["description"]
|
||||
self.type = SimpleNamespace(value=1)
|
||||
self.nsfw = payload.get("nsfw", False)
|
||||
self.guild_only = not payload.get("dm_permission", True)
|
||||
self.default_member_permissions = None
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": 1,
|
||||
"application_id": 999,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"name_localizations": {},
|
||||
"description_localizations": {},
|
||||
"options": [],
|
||||
"contexts": self._payload.get("contexts"),
|
||||
"integration_types": self._payload.get("integration_types"),
|
||||
}
|
||||
|
||||
desired = {
|
||||
"name": "help",
|
||||
"description": "Show available commands",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
"contexts": [0, 1, 2],
|
||||
"integration_types": [0, 1],
|
||||
}
|
||||
existing = _ExistingCommand(
|
||||
77,
|
||||
{
|
||||
**desired,
|
||||
"contexts": [0], # server-side only
|
||||
"integration_types": [0],
|
||||
},
|
||||
)
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [_DesiredCommand(desired)],
|
||||
fetch_commands=AsyncMock(return_value=[existing]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
# contexts and integration_types are not patchable by
|
||||
# edit_global_command, so the command must be recreated.
|
||||
assert summary["unchanged"] == 0
|
||||
assert summary["recreated"] == 1
|
||||
assert summary["updated"] == 0
|
||||
fake_http.edit_global_command.assert_not_awaited()
|
||||
fake_http.delete_global_command.assert_awaited_once_with(999, 77)
|
||||
fake_http.upsert_global_command.assert_awaited_once_with(999, desired)
|
||||
|
||||
@@ -145,3 +145,86 @@ async def test_drain_active_agents_throttles_status_updates():
|
||||
# Start, one count-change update, and final update. Allow one extra update
|
||||
# if the loop observes the zero-agent state before exiting.
|
||||
assert 3 <= runner._update_runtime_status.call_count <= 4
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_stop_kills_tool_subprocesses_before_adapter_disconnect_on_timeout(monkeypatch):
|
||||
"""On drain timeout, tool subprocesses must be killed BEFORE adapter
|
||||
disconnect so systemd's TimeoutStopSec doesn't SIGKILL the cgroup with
|
||||
bash/sleep children still attached (#8202)."""
|
||||
runner, adapter = make_restart_runner()
|
||||
runner._restart_drain_timeout = 0.01 # force timeout path
|
||||
|
||||
call_order: list[str] = []
|
||||
|
||||
def _fake_kill_all(task_id=None):
|
||||
call_order.append("kill_all")
|
||||
return 2
|
||||
|
||||
def _fake_cleanup_envs():
|
||||
call_order.append("cleanup_environments")
|
||||
|
||||
def _fake_cleanup_browsers():
|
||||
call_order.append("cleanup_browsers")
|
||||
|
||||
async def _disconnect():
|
||||
call_order.append("disconnect")
|
||||
|
||||
# Patch the module-level names the stop() helper imports lazily.
|
||||
import tools.process_registry as _pr
|
||||
import tools.terminal_tool as _tt
|
||||
import tools.browser_tool as _bt
|
||||
monkeypatch.setattr(_pr.process_registry, "kill_all", _fake_kill_all)
|
||||
monkeypatch.setattr(_tt, "cleanup_all_environments", _fake_cleanup_envs)
|
||||
monkeypatch.setattr(_bt, "cleanup_all_browsers", _fake_cleanup_browsers)
|
||||
|
||||
adapter.disconnect = _disconnect
|
||||
|
||||
runner._running_agents = {"session": MagicMock()}
|
||||
|
||||
with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
|
||||
await runner.stop()
|
||||
|
||||
# First kill_all must precede the first disconnect. (Both the eager
|
||||
# post-interrupt cleanup and the final catch-all call _kill_tool_
|
||||
# subprocesses, so we expect kill_all to appear twice total.)
|
||||
assert "kill_all" in call_order
|
||||
assert "disconnect" in call_order
|
||||
first_kill = call_order.index("kill_all")
|
||||
first_disconnect = call_order.index("disconnect")
|
||||
assert first_kill < first_disconnect, (
|
||||
f"Tool subprocesses must be killed before adapter disconnect on "
|
||||
f"drain timeout, got order: {call_order}"
|
||||
)
|
||||
# Defense-in-depth final cleanup still runs.
|
||||
assert call_order.count("kill_all") >= 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_stop_kills_tool_subprocesses_on_graceful_path(monkeypatch):
|
||||
"""Graceful shutdown (no drain timeout) must still kill tool subprocesses
|
||||
exactly once via the final catch-all — regression guard against
|
||||
accidentally removing that call when refactoring."""
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.disconnect = AsyncMock()
|
||||
|
||||
kill_count = 0
|
||||
|
||||
def _fake_kill_all(task_id=None):
|
||||
nonlocal kill_count
|
||||
kill_count += 1
|
||||
return 0
|
||||
|
||||
import tools.process_registry as _pr
|
||||
import tools.terminal_tool as _tt
|
||||
import tools.browser_tool as _bt
|
||||
monkeypatch.setattr(_pr.process_registry, "kill_all", _fake_kill_all)
|
||||
monkeypatch.setattr(_tt, "cleanup_all_environments", lambda: None)
|
||||
monkeypatch.setattr(_bt, "cleanup_all_browsers", lambda: None)
|
||||
|
||||
# No running agents → drain returns immediately, no timeout, no eager cleanup.
|
||||
with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
|
||||
await runner.stop()
|
||||
|
||||
# Only the final catch-all fires on the graceful path.
|
||||
assert kill_count == 1
|
||||
|
||||
@@ -193,7 +193,10 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
|
||||
_pid_state["alive"] = False
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
|
||||
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.release_all_scoped_locks",
|
||||
lambda **kwargs: 0,
|
||||
)
|
||||
monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
|
||||
monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
|
||||
monkeypatch.setattr("gateway.run.os.kill", lambda pid, sig: None)
|
||||
@@ -267,7 +270,10 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
|
||||
_pid_state["alive"] = False
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
|
||||
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.release_all_scoped_locks",
|
||||
lambda **kwargs: 0,
|
||||
)
|
||||
monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
|
||||
monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)
|
||||
monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
|
||||
|
||||
@@ -0,0 +1,399 @@
|
||||
"""Regression tests for issue #11016 — Telegram sessions trapped in
|
||||
repeated 'Interrupting current task...' while /stop reports no active task.
|
||||
|
||||
Covers three layers of the fix:
|
||||
|
||||
1. Adapter-side task ownership (_session_tasks map): /stop, /new, /reset
|
||||
actually cancel the in-flight adapter task and release the guard in
|
||||
order, so follow-up messages reach the new session.
|
||||
|
||||
2. Adapter-side on-entry self-heal: if _active_sessions still has an
|
||||
entry but the recorded owner task is already done/cancelled, clear it
|
||||
on the next inbound message rather than trapping the user.
|
||||
|
||||
3. Runner-side generation guard: a stale async run can't promote itself
|
||||
into _running_agents after /stop/ /new bumped the generation, and
|
||||
can't clear a newer run's slot on the way out.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
)
|
||||
from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapter helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _StubAdapter(BasePlatformAdapter):
|
||||
async def connect(self):
|
||||
pass
|
||||
|
||||
async def disconnect(self):
|
||||
pass
|
||||
|
||||
async def send(self, chat_id, text, **kwargs):
|
||||
pass
|
||||
|
||||
async def get_chat_info(self, chat_id):
|
||||
return {}
|
||||
|
||||
|
||||
def _make_adapter():
|
||||
config = PlatformConfig(enabled=True, token="test-token")
|
||||
adapter = _StubAdapter(config, Platform.TELEGRAM)
|
||||
adapter.sent_responses = []
|
||||
|
||||
async def _mock_send_retry(chat_id, content, **kwargs):
|
||||
adapter.sent_responses.append(content)
|
||||
|
||||
adapter._send_with_retry = _mock_send_retry
|
||||
return adapter
|
||||
|
||||
|
||||
def _make_event(text="hello", chat_id="12345"):
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
|
||||
)
|
||||
return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)
|
||||
|
||||
|
||||
def _session_key(chat_id="12345"):
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
|
||||
)
|
||||
return build_session_key(source)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Runner helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_runner():
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||
)
|
||||
runner.adapters = {}
|
||||
runner._running_agents = {}
|
||||
runner._running_agents_ts = {}
|
||||
runner._session_run_generation = {}
|
||||
runner._pending_messages = {}
|
||||
runner._draining = False
|
||||
runner._update_runtime_status = MagicMock()
|
||||
return runner
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Layer 1: Adapter-side session cancellation on /stop /new /reset
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestAdapterSessionCancellation:
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("command_text", ["/stop", "/new", "/reset"])
|
||||
async def test_command_cancels_active_task_and_unblocks_follow_up(
|
||||
self, command_text
|
||||
):
|
||||
"""/stop /new /reset must cancel the adapter task and let follow-ups through."""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
processing_started = asyncio.Event()
|
||||
processing_cancelled = asyncio.Event()
|
||||
blocked_first_message = True
|
||||
|
||||
async def _handler(event):
|
||||
nonlocal blocked_first_message
|
||||
cmd = event.get_command()
|
||||
if cmd in {"stop", "new", "reset", "model"}:
|
||||
return f"handled:{cmd}"
|
||||
|
||||
if blocked_first_message:
|
||||
blocked_first_message = False
|
||||
processing_started.set()
|
||||
try:
|
||||
await asyncio.Event().wait()
|
||||
except asyncio.CancelledError:
|
||||
processing_cancelled.set()
|
||||
raise
|
||||
return f"handled:text:{event.text}"
|
||||
|
||||
adapter._message_handler = _handler
|
||||
|
||||
await adapter.handle_message(_make_event("hello world"))
|
||||
await processing_started.wait()
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert sk in adapter._active_sessions
|
||||
assert sk in adapter._session_tasks
|
||||
|
||||
await adapter.handle_message(_make_event(command_text))
|
||||
|
||||
assert processing_cancelled.is_set(), (
|
||||
f"{command_text} did not cancel the active processing task"
|
||||
)
|
||||
assert sk not in adapter._active_sessions
|
||||
assert sk not in adapter._pending_messages
|
||||
assert sk not in adapter._session_tasks
|
||||
expected = command_text.lstrip("/")
|
||||
assert any(f"handled:{expected}" in r for r in adapter.sent_responses)
|
||||
|
||||
# Follow-up must go through normally now that the session is clean.
|
||||
await adapter.handle_message(
|
||||
_make_event("/model xiaomi/mimo-v2-pro --provider nous")
|
||||
)
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert any("handled:model" in r for r in adapter.sent_responses), (
|
||||
f"follow-up /model stayed blocked after {command_text}"
|
||||
)
|
||||
assert sk not in adapter._pending_messages
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_keeps_guard_until_command_finishes_then_runs_follow_up(self):
|
||||
"""/new must finish runner logic before cancelling old work or releasing the guard."""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
processing_started = asyncio.Event()
|
||||
command_started = asyncio.Event()
|
||||
allow_command_finish = asyncio.Event()
|
||||
follow_up_processed = asyncio.Event()
|
||||
call_order = []
|
||||
|
||||
async def _handler(event):
|
||||
cmd = event.get_command()
|
||||
if cmd == "new":
|
||||
call_order.append("command:start")
|
||||
command_started.set()
|
||||
await allow_command_finish.wait()
|
||||
call_order.append("command:end")
|
||||
return "handled:new"
|
||||
|
||||
if event.text == "hello world":
|
||||
processing_started.set()
|
||||
try:
|
||||
await asyncio.Event().wait()
|
||||
except asyncio.CancelledError:
|
||||
call_order.append("original:cancelled")
|
||||
raise
|
||||
|
||||
if event.text == "after reset":
|
||||
call_order.append("followup:processed")
|
||||
follow_up_processed.set()
|
||||
return f"handled:text:{event.text}"
|
||||
|
||||
adapter._message_handler = _handler
|
||||
|
||||
await adapter.handle_message(_make_event("hello world"))
|
||||
await processing_started.wait()
|
||||
|
||||
command_task = asyncio.create_task(adapter.handle_message(_make_event("/new")))
|
||||
await command_started.wait()
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert sk in adapter._active_sessions
|
||||
|
||||
await adapter.handle_message(_make_event("after reset"))
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert sk in adapter._active_sessions, "guard must stay active while /new is still running"
|
||||
assert sk in adapter._pending_messages, "follow-up should stay queued until /new finishes"
|
||||
assert not follow_up_processed.is_set(), "follow-up ran before /new completed"
|
||||
assert "original:cancelled" not in call_order, "old task was cancelled before runner completed /new"
|
||||
|
||||
allow_command_finish.set()
|
||||
await command_task
|
||||
await asyncio.wait_for(follow_up_processed.wait(), timeout=1.0)
|
||||
|
||||
assert any("handled:new" in r for r in adapter.sent_responses)
|
||||
assert call_order.index("command:end") < call_order.index("original:cancelled")
|
||||
assert call_order.index("original:cancelled") < call_order.index("followup:processed")
|
||||
assert sk not in adapter._pending_messages
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Layer 2: Adapter-side on-entry self-heal for stale session locks
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestStaleSessionLockSelfHeal:
|
||||
@pytest.mark.asyncio
|
||||
async def test_stale_lock_with_done_task_is_healed_on_next_message(self):
|
||||
"""A split-brain guard (owner task done but entry still live) heals on next inbound."""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
|
||||
# Simulate the production split-brain: an _active_sessions entry
|
||||
# remains AND a recorded owner task, but that task is already done.
|
||||
async def _done():
|
||||
return None
|
||||
|
||||
done_task = asyncio.create_task(_done())
|
||||
await done_task
|
||||
assert done_task.done()
|
||||
|
||||
adapter._active_sessions[sk] = asyncio.Event()
|
||||
adapter._session_tasks[sk] = done_task
|
||||
|
||||
assert adapter._session_task_is_stale(sk)
|
||||
|
||||
async def _handler(event):
|
||||
return f"handled:{event.get_command() or 'text'}"
|
||||
|
||||
adapter._message_handler = _handler
|
||||
|
||||
# An ordinary message should heal the stale lock, then fall through
|
||||
# to normal dispatch. User gets a reply instead of a busy ack.
|
||||
await adapter.handle_message(_make_event("hello"))
|
||||
# Drain any spawned background tasks.
|
||||
for _ in range(5):
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert any("handled:text" in r for r in adapter.sent_responses), (
|
||||
"stale lock trapped a normal message — split-brain not healed"
|
||||
)
|
||||
|
||||
def test_no_owner_task_is_not_treated_as_stale(self):
|
||||
"""If _session_tasks has no entry at all, the guard isn't stale.
|
||||
|
||||
Tests and rare legitimate code paths install _active_sessions
|
||||
entries directly. Auto-healing those would break real fixtures.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
|
||||
adapter._active_sessions[sk] = asyncio.Event()
|
||||
# No _session_tasks entry.
|
||||
|
||||
assert adapter._session_task_is_stale(sk) is False
|
||||
assert adapter._heal_stale_session_lock(sk) is False
|
||||
|
||||
def test_live_owner_task_is_not_stale(self):
|
||||
"""When the owner task is alive, do NOT heal — agent is really busy."""
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
|
||||
fake_task = MagicMock()
|
||||
fake_task.done.return_value = False
|
||||
adapter._active_sessions[sk] = asyncio.Event()
|
||||
adapter._session_tasks[sk] = fake_task
|
||||
|
||||
assert adapter._session_task_is_stale(sk) is False
|
||||
assert adapter._heal_stale_session_lock(sk) is False
|
||||
# Lock still in place.
|
||||
assert sk in adapter._active_sessions
|
||||
assert sk in adapter._session_tasks
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Layer 3: Runner-side generation guard on slot promotion + release
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestRunnerSessionGenerationGuard:
|
||||
def test_release_without_generation_behaves_as_before(self):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:telegram:dm:12345"
|
||||
runner._running_agents[sk] = "agent"
|
||||
runner._running_agents_ts[sk] = 1.0
|
||||
assert runner._release_running_agent_state(sk) is True
|
||||
assert sk not in runner._running_agents
|
||||
assert sk not in runner._running_agents_ts
|
||||
|
||||
def test_release_with_current_generation_clears_slot(self):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:telegram:dm:12345"
|
||||
gen = runner._begin_session_run_generation(sk)
|
||||
runner._running_agents[sk] = "agent"
|
||||
runner._running_agents_ts[sk] = 1.0
|
||||
|
||||
assert runner._release_running_agent_state(sk, run_generation=gen) is True
|
||||
assert sk not in runner._running_agents
|
||||
|
||||
def test_release_with_stale_generation_blocks(self):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:telegram:dm:12345"
|
||||
stale_gen = runner._begin_session_run_generation(sk)
|
||||
# /stop bumps the generation — stale run's generation is no longer current.
|
||||
runner._invalidate_session_run_generation(sk, reason="stop")
|
||||
# The fresh run lands next; imagine it has its own state installed.
|
||||
runner._running_agents[sk] = "fresh_agent"
|
||||
runner._running_agents_ts[sk] = 2.0
|
||||
|
||||
# Stale run's unwind MUST NOT clobber the fresh run's state.
|
||||
released = runner._release_running_agent_state(sk, run_generation=stale_gen)
|
||||
|
||||
assert released is False
|
||||
assert runner._running_agents[sk] == "fresh_agent"
|
||||
assert runner._running_agents_ts[sk] == 2.0
|
||||
|
||||
def test_is_session_run_current_tracks_bumps(self):
|
||||
runner = _make_runner()
|
||||
sk = "agent:main:telegram:dm:12345"
|
||||
gen1 = runner._begin_session_run_generation(sk)
|
||||
assert runner._is_session_run_current(sk, gen1) is True
|
||||
|
||||
runner._invalidate_session_run_generation(sk, reason="test")
|
||||
assert runner._is_session_run_current(sk, gen1) is False
|
||||
|
||||
gen2 = runner._begin_session_run_generation(sk)
|
||||
assert gen2 > gen1
|
||||
assert runner._is_session_run_current(sk, gen2) is True
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Layer 1 (regression): old task's finally must NOT delete a newer guard
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestOldTaskCannotClobberNewerGuard:
|
||||
"""Direct regression for the unconditional-delete bug.
|
||||
|
||||
Before the guard-match fix, a task in its finally would delete
|
||||
``_active_sessions[session_key]`` unconditionally — even if a
|
||||
/stop/ /new command had already swapped in its own command_guard
|
||||
(which then gets clobbered, opening a race for follow-up messages).
|
||||
"""
|
||||
|
||||
def test_release_session_guard_matches_on_event_identity(self):
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
|
||||
old_guard = asyncio.Event()
|
||||
new_guard = asyncio.Event()
|
||||
# Command swapped in a newer guard.
|
||||
adapter._active_sessions[sk] = new_guard
|
||||
|
||||
# Old task tries to release using its captured (stale) guard.
|
||||
adapter._release_session_guard(sk, guard=old_guard)
|
||||
|
||||
# The newer guard survives.
|
||||
assert adapter._active_sessions.get(sk) is new_guard
|
||||
|
||||
# Now the command itself releases using the matching guard.
|
||||
adapter._release_session_guard(sk, guard=new_guard)
|
||||
assert sk not in adapter._active_sessions
|
||||
|
||||
def test_release_session_guard_without_guard_releases_unconditionally(self):
|
||||
adapter = _make_adapter()
|
||||
sk = _session_key()
|
||||
adapter._active_sessions[sk] = asyncio.Event()
|
||||
# Callers that don't know the guard (e.g. cancel_session_processing's
|
||||
# default path) still work.
|
||||
adapter._release_session_guard(sk)
|
||||
assert sk not in adapter._active_sessions
|
||||
|
||||
@@ -404,6 +404,53 @@ class TestScopedLocks:
|
||||
status.release_scoped_lock("telegram-bot-token", "secret")
|
||||
assert not lock_path.exists()
|
||||
|
||||
def test_release_all_scoped_locks_can_target_single_owner(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_dir = tmp_path / "locks"
|
||||
lock_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
target_lock = lock_dir / "telegram-bot-token-target.lock"
|
||||
other_lock = lock_dir / "slack-app-token-other.lock"
|
||||
target_lock.write_text(json.dumps({
|
||||
"pid": 111,
|
||||
"start_time": 222,
|
||||
"kind": "hermes-gateway",
|
||||
}))
|
||||
other_lock.write_text(json.dumps({
|
||||
"pid": 999,
|
||||
"start_time": 333,
|
||||
"kind": "hermes-gateway",
|
||||
}))
|
||||
|
||||
removed = status.release_all_scoped_locks(
|
||||
owner_pid=111,
|
||||
owner_start_time=222,
|
||||
)
|
||||
|
||||
assert removed == 1
|
||||
assert not target_lock.exists()
|
||||
assert other_lock.exists()
|
||||
|
||||
def test_release_all_scoped_locks_skips_pid_reuse_mismatch(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_dir = tmp_path / "locks"
|
||||
lock_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
reused_pid_lock = lock_dir / "telegram-bot-token-reused.lock"
|
||||
reused_pid_lock.write_text(json.dumps({
|
||||
"pid": 111,
|
||||
"start_time": 999,
|
||||
"kind": "hermes-gateway",
|
||||
}))
|
||||
|
||||
removed = status.release_all_scoped_locks(
|
||||
owner_pid=111,
|
||||
owner_start_time=222,
|
||||
)
|
||||
|
||||
assert removed == 0
|
||||
assert reused_pid_lock.exists()
|
||||
|
||||
|
||||
class TestTakeoverMarker:
|
||||
"""Tests for the --replace takeover marker.
|
||||
|
||||
@@ -164,7 +164,7 @@ class TestArceeURLMapping:
|
||||
assert "arceeai" in _PROVIDER_PREFIXES
|
||||
|
||||
def test_trajectory_compressor_detects_arcee(self):
|
||||
import scripts.trajectory_compressor as tc
|
||||
import trajectory_compressor as tc
|
||||
comp = tc.TrajectoryCompressor.__new__(tc.TrajectoryCompressor)
|
||||
comp.config = types.SimpleNamespace(base_url="https://api.arcee.ai/api/v1")
|
||||
assert comp._detect_provider() == "arcee"
|
||||
|
||||
@@ -68,3 +68,68 @@ def test_build_welcome_banner_uses_normalized_toolset_names():
|
||||
assert "homeassistant_tools:" not in output
|
||||
assert "honcho_tools:" not in output
|
||||
assert "web_tools:" not in output
|
||||
|
||||
|
||||
def test_build_welcome_banner_title_is_hyperlinked_to_release():
|
||||
"""Panel title (version label) is wrapped in an OSC-8 hyperlink to the GitHub release."""
|
||||
import io
|
||||
from unittest.mock import patch as _patch
|
||||
import hermes_cli.banner as _banner
|
||||
import model_tools as _mt
|
||||
import tools.mcp_tool as _mcp
|
||||
|
||||
_banner._latest_release_cache = None
|
||||
tag_url = ("v2026.4.23", "https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.23")
|
||||
|
||||
buf = io.StringIO()
|
||||
with (
|
||||
_patch.object(_mt, "check_tool_availability", return_value=(["web"], [])),
|
||||
_patch.object(_banner, "get_available_skills", return_value={}),
|
||||
_patch.object(_banner, "get_update_result", return_value=None),
|
||||
_patch.object(_mcp, "get_mcp_status", return_value=[]),
|
||||
_patch.object(_banner, "get_latest_release_tag", return_value=tag_url),
|
||||
):
|
||||
console = Console(file=buf, force_terminal=True, color_system="truecolor", width=160)
|
||||
_banner.build_welcome_banner(
|
||||
console=console, model="x", cwd="/tmp",
|
||||
session_id="abc123",
|
||||
tools=[{"function": {"name": "read_file"}}],
|
||||
get_toolset_for_tool=lambda n: "file",
|
||||
)
|
||||
|
||||
raw = buf.getvalue()
|
||||
# The existing version label must still be present in the title
|
||||
assert "Hermes Agent v" in raw, "Version label missing from title"
|
||||
# OSC-8 hyperlink escape sequence present with the release URL
|
||||
assert "\x1b]8;" in raw, "OSC-8 hyperlink not emitted"
|
||||
assert "releases/tag/v2026.4.23" in raw, "Release URL missing from banner output"
|
||||
|
||||
|
||||
def test_build_welcome_banner_title_falls_back_when_no_tag():
|
||||
"""Without a resolvable tag, the panel title renders as plain text (no hyperlink escape)."""
|
||||
import io
|
||||
from unittest.mock import patch as _patch
|
||||
import hermes_cli.banner as _banner
|
||||
import model_tools as _mt
|
||||
import tools.mcp_tool as _mcp
|
||||
|
||||
_banner._latest_release_cache = None
|
||||
buf = io.StringIO()
|
||||
with (
|
||||
_patch.object(_mt, "check_tool_availability", return_value=(["web"], [])),
|
||||
_patch.object(_banner, "get_available_skills", return_value={}),
|
||||
_patch.object(_banner, "get_update_result", return_value=None),
|
||||
_patch.object(_mcp, "get_mcp_status", return_value=[]),
|
||||
_patch.object(_banner, "get_latest_release_tag", return_value=None),
|
||||
):
|
||||
console = Console(file=buf, force_terminal=True, color_system="truecolor", width=160)
|
||||
_banner.build_welcome_banner(
|
||||
console=console, model="x", cwd="/tmp",
|
||||
session_id="abc123",
|
||||
tools=[{"function": {"name": "read_file"}}],
|
||||
get_toolset_for_tool=lambda n: "file",
|
||||
)
|
||||
|
||||
raw = buf.getvalue()
|
||||
assert "Hermes Agent v" in raw, "Version label missing from title"
|
||||
assert "\x1b]8;" not in raw, "OSC-8 hyperlink should not be emitted without a tag"
|
||||
|
||||
@@ -95,7 +95,10 @@ class TestGeneratedSystemdUnits:
|
||||
assert "ExecStop=" not in unit
|
||||
assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
|
||||
assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
|
||||
assert "TimeoutStopSec=60" in unit
|
||||
# TimeoutStopSec must exceed the default drain_timeout (60s) so
|
||||
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
|
||||
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
|
||||
assert "TimeoutStopSec=90" in unit
|
||||
|
||||
def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None)
|
||||
@@ -111,7 +114,10 @@ class TestGeneratedSystemdUnits:
|
||||
assert "ExecStop=" not in unit
|
||||
assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
|
||||
assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
|
||||
assert "TimeoutStopSec=60" in unit
|
||||
# TimeoutStopSec must exceed the default drain_timeout (60s) so
|
||||
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
|
||||
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
|
||||
assert "TimeoutStopSec=90" in unit
|
||||
assert "WantedBy=multi-user.target" in unit
|
||||
|
||||
|
||||
|
||||
@@ -463,7 +463,7 @@ class TestPlatformToolsetConsistency:
|
||||
|
||||
gateway_includes = set(TOOLSETS["hermes-gateway"]["includes"])
|
||||
# Exclude non-messaging platforms from the check
|
||||
non_messaging = {"cli", "api_server"}
|
||||
non_messaging = {"cli", "api_server", "cron"}
|
||||
for platform, meta in PLATFORMS.items():
|
||||
if platform in non_messaging:
|
||||
continue
|
||||
@@ -601,3 +601,122 @@ class TestImagegenModelPicker:
|
||||
_configure_imagegen_model("fal", config)
|
||||
assert isinstance(config["image_gen"], dict)
|
||||
assert config["image_gen"]["model"] == "fal-ai/flux-2/klein/9b"
|
||||
|
||||
|
||||
def test_get_platform_tools_recovers_non_configurable_toolsets_from_composite():
|
||||
"""Non-configurable toolsets whose tools are in the composite but not in
|
||||
CONFIGURABLE_TOOLSETS should still appear in the result.
|
||||
"""
|
||||
from toolsets import TOOLSETS
|
||||
from hermes_cli.tools_config import PLATFORMS
|
||||
from unittest.mock import patch as mock_patch
|
||||
|
||||
fake_toolsets = dict(TOOLSETS)
|
||||
fake_toolsets["_test_platform_tool"] = {
|
||||
"description": "test",
|
||||
"tools": ["_test_special_tool"],
|
||||
"includes": [],
|
||||
}
|
||||
fake_toolsets["hermes-_test_platform"] = {
|
||||
"description": "test composite",
|
||||
"tools": ["web_search", "web_extract", "terminal", "process", "_test_special_tool"],
|
||||
"includes": [],
|
||||
}
|
||||
|
||||
test_platforms = {
|
||||
"_test_platform": {"label": "Test", "default_toolset": "hermes-_test_platform"},
|
||||
}
|
||||
|
||||
with mock_patch("hermes_cli.tools_config.PLATFORMS", {**PLATFORMS, **test_platforms}):
|
||||
with mock_patch("toolsets.TOOLSETS", fake_toolsets):
|
||||
enabled = _get_platform_tools({}, "_test_platform")
|
||||
|
||||
assert "_test_platform_tool" in enabled
|
||||
assert "web" in enabled
|
||||
assert "terminal" in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_second_pass_skips_fully_claimed_toolsets():
|
||||
"""Toolsets whose tools are fully covered by configurable keys should NOT
|
||||
be added by the second pass (prevents 'search', 'hermes-acp' noise).
|
||||
"""
|
||||
enabled = _get_platform_tools({}, "cli")
|
||||
|
||||
assert "search" not in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_discord_includes_discord_not_admin():
|
||||
enabled = _get_platform_tools({}, "discord")
|
||||
assert "discord" in enabled
|
||||
assert "discord_admin" not in enabled
|
||||
|
||||
|
||||
def test_discord_admin_in_configurable_toolsets():
|
||||
assert any(ts_key == "discord_admin" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
|
||||
|
||||
|
||||
def test_discord_admin_in_default_off():
|
||||
assert "discord_admin" in _DEFAULT_OFF_TOOLSETS
|
||||
|
||||
|
||||
def test_get_platform_tools_feishu_includes_doc_and_drive():
|
||||
enabled = _get_platform_tools({}, "feishu")
|
||||
assert "feishu_doc" in enabled
|
||||
assert "feishu_drive" in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_feishu_tools_not_on_other_platforms():
|
||||
for plat in ["cli", "telegram", "discord"]:
|
||||
enabled = _get_platform_tools({}, plat)
|
||||
assert "feishu_doc" not in enabled, f"feishu_doc leaked onto {plat}"
|
||||
assert "feishu_drive" not in enabled, f"feishu_drive leaked onto {plat}"
|
||||
|
||||
|
||||
def test_save_platform_tools_normalizes_numeric_entries():
|
||||
"""YAML may parse bare numeric toolset names as int. They should be
|
||||
normalized to str so they survive the save round-trip.
|
||||
"""
|
||||
config = {
|
||||
"platform_toolsets": {
|
||||
"cli": ["web", "terminal", 12306, "custom-mcp"]
|
||||
}
|
||||
}
|
||||
|
||||
with patch("hermes_cli.tools_config.save_config"):
|
||||
_save_platform_tools(config, "cli", {"web", "browser"})
|
||||
|
||||
saved = config["platform_toolsets"]["cli"]
|
||||
assert "12306" in saved
|
||||
assert 12306 not in saved
|
||||
|
||||
|
||||
def test_save_platform_tools_clears_stale_no_mcp():
|
||||
"""When the new selection doesn't include no_mcp, the sentinel should
|
||||
be stripped from preserved entries so MCP servers are re-enabled.
|
||||
"""
|
||||
config = {
|
||||
"platform_toolsets": {
|
||||
"cli": ["web", "terminal", "no_mcp"]
|
||||
}
|
||||
}
|
||||
|
||||
with patch("hermes_cli.tools_config.save_config"):
|
||||
_save_platform_tools(config, "cli", {"web", "browser"})
|
||||
|
||||
saved = config["platform_toolsets"]["cli"]
|
||||
assert "no_mcp" not in saved
|
||||
|
||||
|
||||
def test_save_platform_tools_preserves_explicit_no_mcp():
|
||||
"""When the new selection explicitly includes no_mcp, it should be kept."""
|
||||
config = {
|
||||
"platform_toolsets": {
|
||||
"cli": ["web", "no_mcp"]
|
||||
}
|
||||
}
|
||||
|
||||
with patch("hermes_cli.tools_config.save_config"):
|
||||
_save_platform_tools(config, "cli", {"web", "no_mcp"})
|
||||
|
||||
saved = config["platform_toolsets"]["cli"]
|
||||
assert "no_mcp" in saved
|
||||
|
||||
@@ -422,6 +422,152 @@ class TestCmdUpdateLaunchdRestart:
|
||||
]
|
||||
assert len(restart_calls) == 1
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_update_prefers_sigusr1_over_systemctl_restart_when_mainpid_known(
|
||||
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
||||
):
|
||||
"""Drain-aware update: when systemctl show reports a MainPID, the
|
||||
update path sends SIGUSR1 and waits for graceful exit + respawn,
|
||||
instead of ``systemctl restart`` (which SIGKILLs in-flight agents).
|
||||
"""
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
|
||||
# Track state: before kill → "active" (old PID),
|
||||
# after kill + exit → briefly inactive, then "active" again (new PID).
|
||||
state = {"killed": False}
|
||||
|
||||
def side_effect(cmd, **kwargs):
|
||||
joined = " ".join(str(c) for c in cmd)
|
||||
|
||||
if "rev-parse" in joined and "--abbrev-ref" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
|
||||
if "rev-parse" in joined and "--verify" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
if "rev-list" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
|
||||
|
||||
# Only expose a user-scope service.
|
||||
if "systemctl" in joined and "list-units" in joined:
|
||||
if "--user" in joined:
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0,
|
||||
stdout="hermes-gateway.service loaded active running\n",
|
||||
stderr="",
|
||||
)
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
if "systemctl" in joined and "is-active" in joined:
|
||||
# Pre-kill: active. Post-kill: active again (respawned by
|
||||
# Restart=on-failure). The drain loop verifies liveness
|
||||
# separately via os.kill(pid, 0).
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
|
||||
|
||||
# The new code path.
|
||||
if "systemctl" in joined and "show" in joined and "MainPID" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
|
||||
|
||||
# If systemctl restart is called, this test fails its intent —
|
||||
# but still let it succeed so we can assert it was NOT called.
|
||||
if "systemctl" in joined and "restart" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
mock_run.side_effect = side_effect
|
||||
|
||||
# Track SIGUSR1 delivery and simulate the gateway draining + exiting.
|
||||
sigusr1_sent = {"value": False}
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
import signal as _s
|
||||
if pid == 4242 and sig == _s.SIGUSR1:
|
||||
sigusr1_sent["value"] = True
|
||||
state["killed"] = True
|
||||
return
|
||||
if pid == 4242 and sig == 0:
|
||||
# Liveness probe — report dead once SIGUSR1 has been sent.
|
||||
if state["killed"]:
|
||||
raise ProcessLookupError()
|
||||
return
|
||||
# For any other PID/sig combination, succeed silently.
|
||||
return
|
||||
|
||||
monkeypatch.setattr("os.kill", fake_kill)
|
||||
|
||||
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
||||
cmd_update(mock_args)
|
||||
|
||||
# SIGUSR1 must have been delivered to the gateway MainPID.
|
||||
assert sigusr1_sent["value"], "Expected SIGUSR1 to be sent to MainPID"
|
||||
|
||||
# And `systemctl restart` must NOT have been used (that's the
|
||||
# non-draining kill-everything path we're moving away from).
|
||||
restart_calls = [
|
||||
c for c in mock_run.call_args_list
|
||||
if "systemctl" in " ".join(str(a) for a in c.args[0])
|
||||
and "restart" in " ".join(str(a) for a in c.args[0])
|
||||
]
|
||||
assert restart_calls == [], (
|
||||
"Graceful SIGUSR1 succeeded; `systemctl restart` should not "
|
||||
f"have been called. Got: {restart_calls}"
|
||||
)
|
||||
|
||||
captured = capsys.readouterr().out
|
||||
assert "draining" in captured.lower()
|
||||
assert "Restarted hermes-gateway" in captured
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_update_falls_back_to_systemctl_restart_when_sigusr1_times_out(
|
||||
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
||||
):
|
||||
"""If the gateway doesn't exit within the drain budget (e.g. old unit
|
||||
missing ``Restart=on-failure`` or an agent ignoring SIGUSR1), the
|
||||
update path falls back to ``systemctl restart``.
|
||||
"""
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
|
||||
mock_run.side_effect = _make_run_side_effect(
|
||||
commit_count="3",
|
||||
systemd_active=True,
|
||||
)
|
||||
|
||||
# Patch systemctl show to report MainPID=4242 so cmd_update attempts
|
||||
# the graceful path.
|
||||
orig = mock_run.side_effect
|
||||
def wrapped(cmd, **kwargs):
|
||||
joined = " ".join(str(c) for c in cmd)
|
||||
if "systemctl" in joined and "show" in joined and "MainPID" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
|
||||
return orig(cmd, **kwargs)
|
||||
mock_run.side_effect = wrapped
|
||||
|
||||
# Simulate the drain helper failing to confirm a clean exit — either
|
||||
# because the gateway ignored SIGUSR1 or the drain budget was
|
||||
# exceeded. cmd_update() should detect this and escalate.
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.gateway._graceful_restart_via_sigusr1",
|
||||
lambda pid, drain_timeout: False,
|
||||
)
|
||||
|
||||
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
||||
cmd_update(mock_args)
|
||||
|
||||
# Fallback kicked in → systemctl restart was called.
|
||||
restart_calls = [
|
||||
c for c in mock_run.call_args_list
|
||||
if "systemctl" in " ".join(str(a) for a in c.args[0])
|
||||
and "restart" in " ".join(str(a) for a in c.args[0])
|
||||
]
|
||||
assert len(restart_calls) >= 1, (
|
||||
"Drain path failed; expected fallback `systemctl restart`."
|
||||
)
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_update_no_gateway_running_skips_restart(
|
||||
|
||||
@@ -0,0 +1,255 @@
|
||||
"""Tests for ``hermes_cli.voice`` — the TUI gateway's voice wrapper.
|
||||
|
||||
The module is imported *lazily* by ``tui_gateway/server.py`` so that a
|
||||
box with missing audio deps fails at call time (returning a clean RPC
|
||||
error) rather than at gateway startup. These tests therefore only
|
||||
assert the public contract the gateway depends on: the three symbols
|
||||
exist, ``stop_and_transcribe`` is a no-op when nothing is recording,
|
||||
and ``speak_text`` tolerates empty input without touching the provider
|
||||
stack.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
|
||||
class TestPublicAPI:
|
||||
def test_gateway_symbols_importable(self):
|
||||
"""Match the exact import shape tui_gateway/server.py uses."""
|
||||
from hermes_cli.voice import (
|
||||
speak_text,
|
||||
start_recording,
|
||||
stop_and_transcribe,
|
||||
)
|
||||
|
||||
assert callable(start_recording)
|
||||
assert callable(stop_and_transcribe)
|
||||
assert callable(speak_text)
|
||||
|
||||
|
||||
class TestStopWithoutStart:
|
||||
def test_returns_none_when_no_recording_active(self, monkeypatch):
|
||||
"""Idempotent no-op: stop before start must not raise or touch state."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_recorder", None)
|
||||
|
||||
assert voice.stop_and_transcribe() is None
|
||||
|
||||
|
||||
class TestSpeakTextGuards:
|
||||
@pytest.mark.parametrize("text", ["", " ", "\n\t "])
|
||||
def test_empty_text_is_noop(self, text):
|
||||
"""Empty / whitespace-only text must return without importing tts_tool
|
||||
(the gateway spawns a thread per call, so a no-op on empty input
|
||||
keeps the thread pool from churning on trivial inputs)."""
|
||||
from hermes_cli.voice import speak_text
|
||||
|
||||
# Should simply return None without raising.
|
||||
assert speak_text(text) is None
|
||||
|
||||
|
||||
class TestContinuousAPI:
|
||||
"""Continuous (VAD) mode API — CLI-parity loop entry points."""
|
||||
|
||||
def test_continuous_exports(self):
|
||||
from hermes_cli.voice import (
|
||||
is_continuous_active,
|
||||
start_continuous,
|
||||
stop_continuous,
|
||||
)
|
||||
|
||||
assert callable(start_continuous)
|
||||
assert callable(stop_continuous)
|
||||
assert callable(is_continuous_active)
|
||||
|
||||
def test_not_active_by_default(self, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Isolate from any state left behind by other tests in the session.
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_stop_continuous_idempotent_when_inactive(self, monkeypatch):
|
||||
"""stop_continuous must not raise when no loop is active — the
|
||||
gateway's voice.toggle off path calls it unconditionally."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
|
||||
# Should return cleanly without exceptions
|
||||
assert voice.stop_continuous() is None
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_double_start_is_idempotent(self, monkeypatch):
|
||||
"""A second start_continuous while already active is a no-op — prevents
|
||||
two overlapping capture threads fighting over the microphone when the
|
||||
UI double-fires (e.g. both /voice on and Ctrl+B within the same tick)."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_active", True)
|
||||
called = {"n": 0}
|
||||
|
||||
class FakeRecorder:
|
||||
def start(self, on_silence_stop=None):
|
||||
called["n"] += 1
|
||||
|
||||
def cancel(self):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())
|
||||
|
||||
voice.start_continuous(on_transcript=lambda _t: None)
|
||||
|
||||
# The guard inside start_continuous short-circuits before rec.start()
|
||||
assert called["n"] == 0
|
||||
|
||||
|
||||
class TestContinuousLoopSimulation:
|
||||
"""End-to-end simulation of the VAD loop with a fake recorder.
|
||||
|
||||
Proves auto-restart works: the silence callback must trigger transcribe →
|
||||
on_transcript → re-call rec.start(on_silence_stop=same_cb). Also covers
|
||||
the 3-strikes no-speech halt.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def fake_recorder(self, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Reset module state between tests.
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
monkeypatch.setattr(voice, "_continuous_no_speech_count", 0)
|
||||
monkeypatch.setattr(voice, "_continuous_on_transcript", None)
|
||||
monkeypatch.setattr(voice, "_continuous_on_status", None)
|
||||
monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
|
||||
|
||||
class FakeRecorder:
|
||||
_silence_threshold = 200
|
||||
_silence_duration = 3.0
|
||||
is_recording = False
|
||||
|
||||
def __init__(self):
|
||||
self.start_calls = 0
|
||||
self.last_callback = None
|
||||
self.stopped = 0
|
||||
self.cancelled = 0
|
||||
# Preset WAV path returned by stop()
|
||||
self.next_stop_wav = "/tmp/fake.wav"
|
||||
|
||||
def start(self, on_silence_stop=None):
|
||||
self.start_calls += 1
|
||||
self.last_callback = on_silence_stop
|
||||
self.is_recording = True
|
||||
|
||||
def stop(self):
|
||||
self.stopped += 1
|
||||
self.is_recording = False
|
||||
return self.next_stop_wav
|
||||
|
||||
def cancel(self):
|
||||
self.cancelled += 1
|
||||
self.is_recording = False
|
||||
|
||||
rec = FakeRecorder()
|
||||
monkeypatch.setattr(voice, "create_audio_recorder", lambda: rec)
|
||||
# Skip real file ops in the silence callback.
|
||||
monkeypatch.setattr(voice.os.path, "isfile", lambda _p: False)
|
||||
return rec
|
||||
|
||||
def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": "hello world"},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
statuses = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_status=lambda s: statuses.append(s),
|
||||
)
|
||||
|
||||
assert fake_recorder.start_calls == 1
|
||||
assert statuses == ["listening"]
|
||||
|
||||
# Simulate AudioRecorder's silence detector firing.
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert transcripts == ["hello world"]
|
||||
assert fake_recorder.start_calls == 2 # auto-restarted
|
||||
assert statuses == ["listening", "transcribing", "listening"]
|
||||
assert voice.is_continuous_active() is True
|
||||
|
||||
voice.stop_continuous()
|
||||
|
||||
def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Transcription returns no speech — fake_recorder.stop() returns the
|
||||
# path, but transcribe returns empty text, counting as silence.
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": ""},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
silent_limit_fired = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_silent_limit=lambda: silent_limit_fired.append(True),
|
||||
)
|
||||
|
||||
# Fire silence callback 3 times
|
||||
for _ in range(3):
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert transcripts == []
|
||||
assert silent_limit_fired == [True]
|
||||
assert voice.is_continuous_active() is False
|
||||
assert fake_recorder.cancelled >= 1
|
||||
|
||||
def test_stop_during_transcription_discards_restart(self, fake_recorder, monkeypatch):
|
||||
"""User hits Ctrl+B mid-transcription: the in-flight transcript must
|
||||
still fire (it's a real utterance), but the loop must NOT restart."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
stop_triggered = {"flag": False}
|
||||
|
||||
def late_transcribe(_p):
|
||||
# Simulate stop_continuous arriving while we're inside transcribe
|
||||
voice.stop_continuous()
|
||||
stop_triggered["flag"] = True
|
||||
return {"success": True, "transcript": "final word"}
|
||||
|
||||
monkeypatch.setattr(voice, "transcribe_recording", late_transcribe)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
voice.start_continuous(on_transcript=lambda t: transcripts.append(t))
|
||||
|
||||
initial_starts = fake_recorder.start_calls # 1
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert stop_triggered["flag"] is True
|
||||
# Loop is stopped — no auto-restart
|
||||
assert fake_recorder.start_calls == initial_starts
|
||||
# The in-flight transcript was suppressed because we stopped mid-flight
|
||||
assert transcripts == []
|
||||
assert voice.is_continuous_active() is False
|
||||
@@ -110,12 +110,12 @@ class TestWebServerEndpoints:
|
||||
|
||||
import hermes_state
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_cli.web_server import app, _SESSION_TOKEN
|
||||
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
|
||||
|
||||
self.client = TestClient(app)
|
||||
self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
|
||||
self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
|
||||
|
||||
def test_get_status(self):
|
||||
resp = self.client.get("/api/status")
|
||||
@@ -221,12 +221,12 @@ class TestWebServerEndpoints:
|
||||
def test_reveal_env_var(self, tmp_path):
|
||||
"""POST /api/env/reveal should return the real unredacted value."""
|
||||
from hermes_cli.config import save_env_value
|
||||
from hermes_cli.web_server import _SESSION_TOKEN
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
save_env_value("TEST_REVEAL_KEY", "super-secret-value-12345")
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "TEST_REVEAL_KEY"},
|
||||
headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
|
||||
headers={_SESSION_HEADER_NAME: _SESSION_TOKEN},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
@@ -235,11 +235,11 @@ class TestWebServerEndpoints:
|
||||
|
||||
def test_reveal_env_var_not_found(self):
|
||||
"""POST /api/env/reveal should 404 for unknown keys."""
|
||||
from hermes_cli.web_server import _SESSION_TOKEN
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "NONEXISTENT_KEY_XYZ"},
|
||||
headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
|
||||
headers={_SESSION_HEADER_NAME: _SESSION_TOKEN},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
@@ -249,7 +249,7 @@ class TestWebServerEndpoints:
|
||||
from hermes_cli.web_server import app
|
||||
from hermes_cli.config import save_env_value
|
||||
save_env_value("TEST_REVEAL_NOAUTH", "secret-value")
|
||||
# Use a fresh client WITHOUT the Authorization header
|
||||
# Use a fresh client WITHOUT the dashboard session header
|
||||
unauth_client = TestClient(app)
|
||||
resp = unauth_client.post(
|
||||
"/api/env/reveal",
|
||||
@@ -260,14 +260,47 @@ class TestWebServerEndpoints:
|
||||
def test_reveal_env_var_bad_token(self, tmp_path):
|
||||
"""POST /api/env/reveal with wrong token should return 401."""
|
||||
from hermes_cli.config import save_env_value
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME
|
||||
save_env_value("TEST_REVEAL_BADAUTH", "secret-value")
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "TEST_REVEAL_BADAUTH"},
|
||||
headers={"Authorization": "Bearer wrong-token-here"},
|
||||
headers={_SESSION_HEADER_NAME: "wrong-token-here"},
|
||||
)
|
||||
assert resp.status_code == 401
|
||||
|
||||
def test_reveal_env_var_custom_session_header_ignores_proxy_authorization(self, tmp_path):
|
||||
"""A valid dashboard session header should coexist with proxy auth."""
|
||||
from hermes_cli.config import save_env_value
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
|
||||
save_env_value("TEST_REVEAL_PROXY_AUTH", "secret-value")
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "TEST_REVEAL_PROXY_AUTH"},
|
||||
headers={
|
||||
_SESSION_HEADER_NAME: _SESSION_TOKEN,
|
||||
"Authorization": "Basic dXNlcjpwYXNz",
|
||||
},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["value"] == "secret-value"
|
||||
|
||||
def test_reveal_env_var_legacy_authorization_header_still_works(self, tmp_path):
|
||||
"""Keep old dashboard bundles working while the new header rolls out."""
|
||||
from hermes_cli.config import save_env_value
|
||||
from hermes_cli.web_server import _SESSION_TOKEN
|
||||
|
||||
save_env_value("TEST_REVEAL_LEGACY_AUTH", "secret-value")
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "TEST_REVEAL_LEGACY_AUTH"},
|
||||
headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_session_token_endpoint_removed(self):
|
||||
"""GET /api/auth/session-token should no longer exist (token injected via HTML)."""
|
||||
resp = self.client.get("/api/auth/session-token")
|
||||
@@ -285,7 +318,7 @@ class TestWebServerEndpoints:
|
||||
"""API requests without the session token should be rejected."""
|
||||
from starlette.testclient import TestClient
|
||||
from hermes_cli.web_server import app
|
||||
# Create a client WITHOUT the Authorization header
|
||||
# Create a client WITHOUT the dashboard session header
|
||||
unauth_client = TestClient(app)
|
||||
resp = unauth_client.get("/api/env")
|
||||
assert resp.status_code == 401
|
||||
@@ -388,9 +421,9 @@ class TestConfigRoundTrip:
|
||||
from starlette.testclient import TestClient
|
||||
except ImportError:
|
||||
pytest.skip("fastapi/starlette not installed")
|
||||
from hermes_cli.web_server import app, _SESSION_TOKEN
|
||||
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
self.client = TestClient(app)
|
||||
self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
|
||||
self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
|
||||
|
||||
def test_get_config_no_internal_keys(self):
|
||||
"""GET /api/config should not expose _config_version or _model_meta."""
|
||||
@@ -524,12 +557,12 @@ class TestNewEndpoints:
|
||||
|
||||
import hermes_state
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_cli.web_server import app, _SESSION_TOKEN
|
||||
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
|
||||
|
||||
self.client = TestClient(app)
|
||||
self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
|
||||
self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
|
||||
|
||||
def test_get_logs_default(self):
|
||||
resp = self.client.get("/api/logs")
|
||||
@@ -1176,9 +1209,9 @@ class TestStatusRemoteGateway:
|
||||
except ImportError:
|
||||
pytest.skip("fastapi/starlette not installed")
|
||||
|
||||
from hermes_cli.web_server import app, _SESSION_TOKEN
|
||||
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
self.client = TestClient(app)
|
||||
self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
|
||||
self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
|
||||
|
||||
def test_status_falls_back_to_remote_probe(self, monkeypatch):
|
||||
"""When local PID check fails and remote probe succeeds, gateway shows running."""
|
||||
@@ -1256,3 +1289,391 @@ class TestStatusRemoteGateway:
|
||||
assert data["gateway_running"] is True
|
||||
assert data["gateway_pid"] is None
|
||||
assert data["gateway_state"] == "running"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dashboard theme normaliser tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNormaliseThemeDefinition:
|
||||
"""Tests for _normalise_theme_definition() — parses YAML theme files."""
|
||||
|
||||
def test_rejects_missing_name(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
assert _normalise_theme_definition({}) is None
|
||||
assert _normalise_theme_definition({"name": ""}) is None
|
||||
assert _normalise_theme_definition({"name": " "}) is None
|
||||
|
||||
def test_rejects_non_dict(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
assert _normalise_theme_definition("string") is None
|
||||
assert _normalise_theme_definition(None) is None
|
||||
assert _normalise_theme_definition([1, 2, 3]) is None
|
||||
|
||||
def test_loose_colors_shorthand(self):
|
||||
"""Bare hex strings under `colors` parse as {hex, alpha=1.0}."""
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "loose",
|
||||
"colors": {"background": "#000000", "midground": "#ffffff"},
|
||||
})
|
||||
assert result is not None
|
||||
assert result["palette"]["background"] == {"hex": "#000000", "alpha": 1.0}
|
||||
assert result["palette"]["midground"] == {"hex": "#ffffff", "alpha": 1.0}
|
||||
# foreground falls back to default (transparent white)
|
||||
assert result["palette"]["foreground"]["hex"] == "#ffffff"
|
||||
assert result["palette"]["foreground"]["alpha"] == 0.0
|
||||
|
||||
def test_full_palette_form(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "full",
|
||||
"palette": {
|
||||
"background": {"hex": "#0a1628", "alpha": 1.0},
|
||||
"midground": {"hex": "#a8d0ff", "alpha": 0.9},
|
||||
"warmGlow": "rgba(255, 0, 0, 0.5)",
|
||||
"noiseOpacity": 0.5,
|
||||
},
|
||||
})
|
||||
assert result["palette"]["background"]["hex"] == "#0a1628"
|
||||
assert result["palette"]["midground"]["alpha"] == 0.9
|
||||
assert result["palette"]["warmGlow"] == "rgba(255, 0, 0, 0.5)"
|
||||
assert result["palette"]["noiseOpacity"] == 0.5
|
||||
|
||||
def test_default_typography_applied_when_missing(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({"name": "minimal"})
|
||||
typo = result["typography"]
|
||||
assert "fontSans" in typo
|
||||
assert "fontMono" in typo
|
||||
assert typo["baseSize"] == "15px"
|
||||
assert typo["lineHeight"] == "1.55"
|
||||
assert typo["letterSpacing"] == "0"
|
||||
|
||||
def test_partial_typography_merges_with_defaults(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "partial",
|
||||
"typography": {
|
||||
"fontSans": "MyFont, sans-serif",
|
||||
"baseSize": "12px",
|
||||
},
|
||||
})
|
||||
assert result["typography"]["fontSans"] == "MyFont, sans-serif"
|
||||
assert result["typography"]["baseSize"] == "12px"
|
||||
# fontMono defaulted
|
||||
assert "monospace" in result["typography"]["fontMono"]
|
||||
|
||||
def test_layout_defaults(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({"name": "minimal"})
|
||||
assert result["layout"]["radius"] == "0.5rem"
|
||||
assert result["layout"]["density"] == "comfortable"
|
||||
|
||||
def test_invalid_density_falls_back(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "bad",
|
||||
"layout": {"density": "ultra-spacious"},
|
||||
})
|
||||
assert result["layout"]["density"] == "comfortable"
|
||||
|
||||
def test_valid_densities_accepted(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
for d in ("compact", "comfortable", "spacious"):
|
||||
r = _normalise_theme_definition({"name": "x", "layout": {"density": d}})
|
||||
assert r["layout"]["density"] == d
|
||||
|
||||
def test_color_overrides_filter_unknown_keys(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "o",
|
||||
"colorOverrides": {
|
||||
"card": "#123456",
|
||||
"fakeToken": "#abcdef",
|
||||
"primary": 42, # non-string rejected
|
||||
"destructive": "#ff0000",
|
||||
},
|
||||
})
|
||||
assert result["colorOverrides"] == {
|
||||
"card": "#123456",
|
||||
"destructive": "#ff0000",
|
||||
}
|
||||
|
||||
def test_color_overrides_omitted_when_empty(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({"name": "x"})
|
||||
assert "colorOverrides" not in result
|
||||
|
||||
def test_alpha_clamped_to_unit_range(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "c",
|
||||
"palette": {"background": {"hex": "#000", "alpha": 99.5}},
|
||||
})
|
||||
assert r["palette"]["background"]["alpha"] == 1.0
|
||||
r2 = _normalise_theme_definition({
|
||||
"name": "c",
|
||||
"palette": {"background": {"hex": "#000", "alpha": -5}},
|
||||
})
|
||||
assert r2["palette"]["background"]["alpha"] == 0.0
|
||||
|
||||
def test_invalid_alpha_uses_default(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "c",
|
||||
"palette": {"background": {"hex": "#000", "alpha": "not a number"}},
|
||||
})
|
||||
assert r["palette"]["background"]["alpha"] == 1.0
|
||||
|
||||
|
||||
class TestDiscoverUserThemes:
|
||||
"""Tests for _discover_user_themes() — scans ~/.hermes/dashboard-themes/."""
|
||||
|
||||
def test_returns_empty_when_dir_missing(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
from hermes_cli import web_server
|
||||
assert web_server._discover_user_themes() == []
|
||||
|
||||
def test_loads_and_normalises_yaml(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
themes_dir = tmp_path / "dashboard-themes"
|
||||
themes_dir.mkdir()
|
||||
(themes_dir / "ocean.yaml").write_text(
|
||||
"name: ocean\n"
|
||||
"label: Ocean\n"
|
||||
"palette:\n"
|
||||
" background:\n"
|
||||
" hex: \"#0a1628\"\n"
|
||||
" alpha: 1.0\n"
|
||||
"layout:\n"
|
||||
" density: spacious\n"
|
||||
)
|
||||
from hermes_cli import web_server
|
||||
results = web_server._discover_user_themes()
|
||||
assert len(results) == 1
|
||||
assert results[0]["name"] == "ocean"
|
||||
assert results[0]["label"] == "Ocean"
|
||||
assert results[0]["palette"]["background"]["hex"] == "#0a1628"
|
||||
assert results[0]["layout"]["density"] == "spacious"
|
||||
# defaults filled in
|
||||
assert "fontSans" in results[0]["typography"]
|
||||
|
||||
def test_malformed_yaml_skipped(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
themes_dir = tmp_path / "dashboard-themes"
|
||||
themes_dir.mkdir()
|
||||
(themes_dir / "bad.yaml").write_text("::: not valid yaml :::\n\tindent wrong")
|
||||
(themes_dir / "nameless.yaml").write_text("label: No Name Here\n")
|
||||
(themes_dir / "ok.yaml").write_text("name: ok\n")
|
||||
from hermes_cli import web_server
|
||||
results = web_server._discover_user_themes()
|
||||
names = [r["name"] for r in results]
|
||||
assert "ok" in names
|
||||
assert "bad" not in names # malformed YAML
|
||||
assert len(results) == 1 # only the valid one
|
||||
|
||||
|
||||
class TestNormaliseThemeExtensions:
|
||||
"""Tests for the extended normaliser fields (assets, customCSS,
|
||||
componentStyles, layoutVariant) — the surfaces themes use to reskin
|
||||
the dashboard without shipping code."""
|
||||
|
||||
def test_layout_variant_defaults_to_standard(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({"name": "t"})
|
||||
assert result["layoutVariant"] == "standard"
|
||||
|
||||
def test_layout_variant_accepts_known_values(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
for variant in ("standard", "cockpit", "tiled"):
|
||||
r = _normalise_theme_definition({"name": "t", "layoutVariant": variant})
|
||||
assert r["layoutVariant"] == variant
|
||||
|
||||
def test_layout_variant_rejects_unknown(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({"name": "t", "layoutVariant": "warship"})
|
||||
assert r["layoutVariant"] == "standard"
|
||||
r2 = _normalise_theme_definition({"name": "t", "layoutVariant": 12})
|
||||
assert r2["layoutVariant"] == "standard"
|
||||
|
||||
def test_assets_named_slots_passthrough(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"assets": {
|
||||
"bg": "https://example.com/bg.jpg",
|
||||
"hero": "linear-gradient(180deg, red, blue)",
|
||||
"crest": "/ds-assets/crest.svg",
|
||||
"logo": " ", # whitespace-only — dropped
|
||||
"notAKnownKey": "ignored",
|
||||
},
|
||||
})
|
||||
assert r["assets"]["bg"] == "https://example.com/bg.jpg"
|
||||
assert r["assets"]["hero"].startswith("linear-gradient")
|
||||
assert r["assets"]["crest"] == "/ds-assets/crest.svg"
|
||||
assert "logo" not in r["assets"] # whitespace-only rejected
|
||||
assert "notAKnownKey" not in r["assets"] # unknown slot ignored
|
||||
|
||||
def test_assets_custom_block(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"assets": {
|
||||
"custom": {
|
||||
"scan-lines": "/img/scan.png",
|
||||
"my_overlay": "/img/ov.png",
|
||||
"bad key!": "x", # non-alnum key — rejected
|
||||
"empty": "", # empty value — rejected
|
||||
},
|
||||
},
|
||||
})
|
||||
assert r["assets"]["custom"] == {
|
||||
"scan-lines": "/img/scan.png",
|
||||
"my_overlay": "/img/ov.png",
|
||||
}
|
||||
|
||||
def test_assets_absent_means_no_field(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({"name": "t"})
|
||||
assert "assets" not in r
|
||||
|
||||
def test_custom_css_passthrough_and_capped(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
# Small CSS passes through verbatim.
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"customCSS": "body { color: red; }",
|
||||
})
|
||||
assert r["customCSS"] == "body { color: red; }"
|
||||
|
||||
# 40 KiB of CSS gets clipped to the 32 KiB cap.
|
||||
huge = "/* x */ " * (40 * 1024 // 8 + 10)
|
||||
r2 = _normalise_theme_definition({"name": "t", "customCSS": huge})
|
||||
assert len(r2["customCSS"]) <= 32 * 1024
|
||||
|
||||
def test_custom_css_empty_dropped(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
for val in ("", " \n\t", None):
|
||||
r = _normalise_theme_definition({"name": "t", "customCSS": val})
|
||||
assert "customCSS" not in r
|
||||
|
||||
def test_component_styles_per_bucket(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"componentStyles": {
|
||||
"card": {
|
||||
"clipPath": "polygon(0 0, 100% 0, 100% 100%, 0 100%)",
|
||||
"boxShadow": "inset 0 0 0 1px red",
|
||||
"bad prop!": "ignored", # non-alnum prop rejected
|
||||
},
|
||||
"header": {"background": "linear-gradient(red, blue)"},
|
||||
"rogueBucket": {"foo": "bar"}, # not a known bucket — rejected
|
||||
},
|
||||
})
|
||||
assert r["componentStyles"]["card"] == {
|
||||
"clipPath": "polygon(0 0, 100% 0, 100% 100%, 0 100%)",
|
||||
"boxShadow": "inset 0 0 0 1px red",
|
||||
}
|
||||
assert r["componentStyles"]["header"]["background"].startswith("linear-gradient")
|
||||
assert "rogueBucket" not in r["componentStyles"]
|
||||
|
||||
def test_component_styles_empty_buckets_dropped(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"componentStyles": {
|
||||
"card": {}, # empty — dropped entirely
|
||||
"header": {"bad prop!": "ignored"}, # all props rejected — bucket dropped
|
||||
"footer": {"background": "black"},
|
||||
},
|
||||
})
|
||||
assert "card" not in r.get("componentStyles", {})
|
||||
assert "header" not in r.get("componentStyles", {})
|
||||
assert r["componentStyles"]["footer"]["background"] == "black"
|
||||
|
||||
def test_component_styles_accepts_numeric_values(self):
|
||||
"""Numeric values (e.g. opacity: 0.8) are coerced to strings."""
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"componentStyles": {"card": {"opacity": 0.8, "zIndex": 5}},
|
||||
})
|
||||
assert r["componentStyles"]["card"] == {"opacity": "0.8", "zIndex": "5"}
|
||||
|
||||
|
||||
class TestDashboardPluginManifestExtensions:
|
||||
"""Tests for the extended plugin manifest fields (tab.override,
|
||||
tab.hidden, slots) read by _discover_dashboard_plugins()."""
|
||||
|
||||
def _write_plugin(self, tmp_path, name, manifest):
|
||||
import json
|
||||
plug_dir = tmp_path / "plugins" / name / "dashboard"
|
||||
plug_dir.mkdir(parents=True)
|
||||
(plug_dir / "manifest.json").write_text(json.dumps(manifest))
|
||||
return plug_dir
|
||||
|
||||
def test_override_and_hidden_carried_through(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self._write_plugin(tmp_path, "skin-home", {
|
||||
"name": "skin-home",
|
||||
"label": "Skin Home",
|
||||
"tab": {"path": "/skin-home", "override": "/", "hidden": True},
|
||||
"slots": ["sidebar", "header-left"],
|
||||
"entry": "dist/index.js",
|
||||
})
|
||||
from hermes_cli import web_server
|
||||
# Bust the process-level cache so the test plugin is picked up.
|
||||
web_server._dashboard_plugins_cache = None
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "skin-home")
|
||||
assert entry["tab"]["override"] == "/"
|
||||
assert entry["tab"]["hidden"] is True
|
||||
assert entry["slots"] == ["sidebar", "header-left"]
|
||||
|
||||
def test_override_requires_leading_slash(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self._write_plugin(tmp_path, "bad-override", {
|
||||
"name": "bad-override",
|
||||
"label": "Bad",
|
||||
"tab": {"path": "/bad", "override": "no-leading-slash"},
|
||||
"entry": "dist/index.js",
|
||||
})
|
||||
from hermes_cli import web_server
|
||||
web_server._dashboard_plugins_cache = None
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "bad-override")
|
||||
assert "override" not in entry["tab"]
|
||||
|
||||
def test_slots_default_empty(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self._write_plugin(tmp_path, "no-slots", {
|
||||
"name": "no-slots",
|
||||
"label": "No Slots",
|
||||
"tab": {"path": "/no-slots"},
|
||||
"entry": "dist/index.js",
|
||||
})
|
||||
from hermes_cli import web_server
|
||||
web_server._dashboard_plugins_cache = None
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "no-slots")
|
||||
assert entry["slots"] == []
|
||||
assert "hidden" not in entry["tab"]
|
||||
assert "override" not in entry["tab"]
|
||||
|
||||
def test_slots_filters_non_string_entries(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self._write_plugin(tmp_path, "mixed-slots", {
|
||||
"name": "mixed-slots",
|
||||
"label": "Mixed",
|
||||
"tab": {"path": "/mixed-slots"},
|
||||
"slots": ["sidebar", "", 42, None, "header-right"],
|
||||
"entry": "dist/index.js",
|
||||
})
|
||||
from hermes_cli import web_server
|
||||
web_server._dashboard_plugins_cache = None
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "mixed-slots")
|
||||
assert entry["slots"] == ["sidebar", "header-right"]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user