Compare commits
101 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7efd91d4b4 | |||
| 0aa1269e56 | |||
| 3c29834354 | |||
| 0eb85906b0 | |||
| ff9b0528a2 | |||
| 8feaa7cd1b | |||
| 57a2b97ae8 | |||
| bd9afb027a | |||
| 6051fba9dc | |||
| 2acc8783d1 | |||
| acdcb167fb | |||
| 51f4c9827f | |||
| 2e78a2b6b2 | |||
| 5a1c599412 | |||
| 0f6eabb890 | |||
| eb93f88e1d | |||
| 3ccda2aa05 | |||
| 983bbe2d40 | |||
| 379b2273d9 | |||
| 7db2703b33 | |||
| 7c59e1a871 | |||
| 6fdbf2f2d7 | |||
| 0a679cb7ad | |||
| 41b4d69167 | |||
| 3f343cf7cf | |||
| 4ae5b58cb1 | |||
| 2258a181f0 | |||
| 11b2942f16 | |||
| b08cbc7a79 | |||
| c95c6bdb7c | |||
| bd929ea514 | |||
| 6a20e187dd | |||
| 9ff21437a0 | |||
| 44a0cbe525 | |||
| 2af0848f3c | |||
| 7baf370d3d | |||
| eeda18a9b7 | |||
| 3a9598337f | |||
| 98418afd5d | |||
| 42ff785771 | |||
| 04c489b587 | |||
| 0bb460b070 | |||
| 3504bd401b | |||
| 50d97edbe1 | |||
| e26c4f0e34 | |||
| 24f139e16a | |||
| ef5eaf8d87 | |||
| bf196a3fc0 | |||
| f593c367be | |||
| 470389e6a3 | |||
| 18d5ba8676 | |||
| 8b79acb8de | |||
| 0086fd894d | |||
| 5e67b38437 | |||
| 1df35a93b2 | |||
| 9599271180 | |||
| a5e4a86ebe | |||
| d42b6a2edd | |||
| d001814e3f | |||
| 9d147f7fde | |||
| 692ae6dd07 | |||
| b61ac8964b | |||
| a1ff6b45ea | |||
| 4a0c02b7dc | |||
| 83859b4da0 | |||
| 67c8f837fc | |||
| c7d023937c | |||
| 78d1e252fa | |||
| d0821b0573 | |||
| a0d8dd7ba3 | |||
| e020f46bec | |||
| a884f6d5d8 | |||
| b848ce2c79 | |||
| 1dfcda4e3c | |||
| 1cc0bdd5f3 | |||
| 07046096d9 | |||
| 97b9b3d6a6 | |||
| 165b2e481a | |||
| 327b57da91 | |||
| 64e6165686 | |||
| b5333abc30 | |||
| 255ba5bf26 | |||
| 8f5fee3e3e | |||
| b6ca3c28dc | |||
| 882278520b | |||
| 9bf6e1cd6e | |||
| 9a885fba31 | |||
| aa47812edf | |||
| c8ff70fe03 | |||
| f5af6520d0 | |||
| 1e445b2547 | |||
| f28f07e98e | |||
| 7c4dd7d660 | |||
| e91be4d7dc | |||
| 60d1edc38a | |||
| 3e01de0b09 | |||
| f7e86577bc | |||
| 2e75460066 | |||
| 82a0ed1afb | |||
| 071bdb5a3f | |||
| bc9518f660 |
@@ -53,6 +53,9 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Build skills index (if not already present)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -36,6 +36,9 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
||||
@@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent
|
||||
## Development Environment
|
||||
|
||||
```bash
|
||||
source venv/bin/activate # ALWAYS activate before running Python
|
||||
# Prefer .venv; fall back to venv if that's what your checkout has.
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
```
|
||||
|
||||
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
|
||||
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
|
||||
main checkout).
|
||||
|
||||
## Project Structure
|
||||
|
||||
File counts shift constantly — don't treat the tree below as exhaustive.
|
||||
The canonical source is the filesystem. The notes call out the load-bearing
|
||||
entry points you'll actually edit.
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop
|
||||
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
|
||||
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
|
||||
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
|
||||
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
|
||||
├── agent/ # Agent internals
|
||||
│ ├── prompt_builder.py # System prompt assembly
|
||||
│ ├── context_compressor.py # Auto context compression
|
||||
│ ├── prompt_caching.py # Anthropic prompt caching
|
||||
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ ├── models_dev.py # models.dev registry integration (provider-aware context)
|
||||
│ ├── display.py # KawaiiSpinner, tool preview formatting
|
||||
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
├── hermes_cli/ # CLI subcommands and setup
|
||||
│ ├── main.py # Entry point — all `hermes` subcommands
|
||||
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
|
||||
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
|
||||
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── skin_engine.py # Skin/theme engine — CLI visual customization
|
||||
│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform
|
||||
│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform
|
||||
│ ├── skills_hub.py # `/skills` slash command (search, browse, install)
|
||||
│ ├── models.py # Model catalog, provider model lists
|
||||
│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway)
|
||||
│ └── auth.py # Provider credential resolution
|
||||
├── tools/ # Tool implementations (one file per tool)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection
|
||||
│ ├── terminal_tool.py # Terminal orchestration
|
||||
│ ├── process_registry.py # Background process management
|
||||
│ ├── file_tools.py # File read/write/search/patch
|
||||
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
|
||||
│ ├── browser_tool.py # Browserbase browser automation
|
||||
│ ├── code_execution_tool.py # execute_code sandbox
|
||||
│ ├── delegate_tool.py # Subagent delegation
|
||||
│ ├── mcp_tool.py # MCP client (~1050 lines)
|
||||
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
|
||||
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
|
||||
├── batch_runner.py # Parallel batch processing
|
||||
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
|
||||
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
|
||||
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
|
||||
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
├── gateway/ # Messaging platform gateway
|
||||
│ ├── run.py # Main loop, slash commands, message dispatch
|
||||
│ ├── session.py # SessionStore — conversation persistence
|
||||
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
|
||||
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
│ ├── src/entry.tsx # TTY gate + render()
|
||||
│ ├── src/app.tsx # Main state machine and UI
|
||||
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
|
||||
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
|
||||
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
|
||||
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
|
||||
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
|
||||
│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
|
||||
├── tui_gateway/ # Python JSON-RPC backend for the TUI
|
||||
│ ├── entry.py # stdio entrypoint
|
||||
│ ├── server.py # RPC handlers and session logic
|
||||
│ ├── render.py # Optional rich/ANSI bridge
|
||||
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── cron/ # Scheduler — jobs.py, scheduler.py
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── tests/ # Pytest suite (~3000 tests)
|
||||
└── batch_runner.py # Parallel batch processing
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
|
||||
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
|
||||
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
@@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/
|
||||
|
||||
## AIAgent Class (run_agent.py)
|
||||
|
||||
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
|
||||
session context, budget, credential pool, etc.). The signature below is the
|
||||
minimum subset you'll usually touch — read `run_agent.py` for the full list.
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(self,
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
max_iterations: int = 90,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
provider: str = None,
|
||||
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
|
||||
model: str = "", # empty → resolved from config/provider later
|
||||
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
quiet_mode: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
session_id: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
# ... plus provider, api_mode, callbacks, routing params
|
||||
credential_pool=None,
|
||||
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
|
||||
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
|
||||
): ...
|
||||
|
||||
def chat(self, message: str) -> str:
|
||||
@@ -120,10 +113,13 @@ class AIAgent:
|
||||
|
||||
### Agent Loop
|
||||
|
||||
The core loop is inside `run_conversation()` — entirely synchronous:
|
||||
The core loop is inside `run_conversation()` — entirely synchronous, with
|
||||
interrupt checks, budget tracking, and a one-turn grace call:
|
||||
|
||||
```python
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
|
||||
or self._budget_grace_call:
|
||||
if self._interrupt_requested: break
|
||||
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
@@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining >
|
||||
return response.content
|
||||
```
|
||||
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
|
||||
Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
|
||||
---
|
||||
|
||||
@@ -280,7 +277,7 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
|
||||
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
@@ -288,9 +285,13 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
### config.yaml options:
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. Bump `_config_version` (currently 5) to trigger migration for existing users
|
||||
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
|
||||
ONLY if you need to actively migrate/transform existing user config
|
||||
(renaming keys, changing structure). Adding a new key to an existing
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
|
||||
### .env variables:
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
"NEW_API_KEY": {
|
||||
@@ -302,13 +303,29 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
},
|
||||
```
|
||||
|
||||
### Config loaders (two separate systems):
|
||||
Non-secret settings (timeouts, thresholds, feature flags, paths, display
|
||||
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
|
||||
env var mirror for backward compatibility, bridge it from `config.yaml` to
|
||||
the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
|
||||
|
||||
### Config loaders (three paths — know which one you're in):
|
||||
|
||||
| Loader | Used by | Location |
|
||||
|--------|---------|----------|
|
||||
| `load_cli_config()` | CLI mode | `cli.py` |
|
||||
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
|
||||
| Direct YAML load | Gateway | `gateway/run.py` |
|
||||
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
|
||||
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
|
||||
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
|
||||
|
||||
If you add a new key and the CLI sees it but the gateway doesn't (or vice
|
||||
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
|
||||
|
||||
### Working directory:
|
||||
- **CLI** — uses the process's current directory (`os.getcwd()`).
|
||||
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
|
||||
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
|
||||
removed** — the config loader prints a deprecation warning if it's set in
|
||||
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
|
||||
`terminal.cwd` in `config.yaml`.
|
||||
|
||||
---
|
||||
|
||||
@@ -401,7 +418,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
|
||||
|
||||
---
|
||||
|
||||
## Plugins
|
||||
|
||||
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
|
||||
repo-shipped plugins can be discovered alongside user-installed ones in
|
||||
`~/.hermes/plugins/` and pip-installed entry points.
|
||||
|
||||
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
|
||||
|
||||
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
|
||||
and pip entry points. Each plugin exposes a `register(ctx)` function that
|
||||
can:
|
||||
|
||||
- Register Python-callback lifecycle hooks:
|
||||
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
|
||||
`on_session_start`, `on_session_end`
|
||||
- Register new tools via `ctx.register_tool(...)`
|
||||
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
|
||||
plugin's argparse tree is wired into `hermes` at startup so
|
||||
`hermes <pluginname> <subcmd>` works with no change to `main.py`
|
||||
|
||||
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
|
||||
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
|
||||
as a side effect of importing `model_tools.py`. Code paths that read plugin
|
||||
state without importing `model_tools.py` first must call `discover_plugins()`
|
||||
explicitly (it's idempotent).
|
||||
|
||||
### Memory-provider plugins (`plugins/memory/<name>/`)
|
||||
|
||||
Separate discovery system for pluggable memory backends. Current built-in
|
||||
providers include **honcho, mem0, supermemory, byterover, hindsight,
|
||||
holographic, openviking, retaindb**.
|
||||
|
||||
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
|
||||
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
|
||||
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
|
||||
`post_setup(hermes_home, config)` for setup-wizard integration.
|
||||
|
||||
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
|
||||
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
|
||||
it at argparse setup time and wires it into `hermes <plugin>`. The
|
||||
framework only exposes CLI commands for the **currently active** memory
|
||||
provider (read from `memory.provider` in config.yaml), so disabled
|
||||
providers don't clutter `hermes --help`.
|
||||
|
||||
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
|
||||
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
|
||||
If a plugin needs a capability the framework doesn't expose, expand the
|
||||
generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
|
||||
etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
|
||||
Context engines plug into `agent/context_engine.py`; image-gen providers
|
||||
into `agent/image_gen_provider.py`.
|
||||
|
||||
---
|
||||
|
||||
## Skills
|
||||
|
||||
Two parallel surfaces:
|
||||
|
||||
- **`skills/`** — built-in skills shipped and loadable by default.
|
||||
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
|
||||
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
|
||||
NOT active by default. Installed explicitly via
|
||||
`hermes skills install official/<category>/<skill>`. Adapter lives in
|
||||
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
|
||||
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
|
||||
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
|
||||
`research`, `security`, `web-development`.
|
||||
|
||||
When reviewing skill PRs, check which directory they target — heavy-dep or
|
||||
niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `platforms`
|
||||
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.config` (config.yaml settings the skill needs — stored
|
||||
under `skills.config.<key>`, prompted during setup, injected at load time).
|
||||
|
||||
---
|
||||
|
||||
## Important Policies
|
||||
|
||||
### Prompt Caching Must Not Break
|
||||
|
||||
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
|
||||
@@ -411,9 +516,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
|
||||
|
||||
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
|
||||
|
||||
### Working Directory Behavior
|
||||
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
|
||||
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
|
||||
must be **cache-aware**: default to deferred invalidation (change takes
|
||||
effect next session), with an opt-in `--now` flag for immediate
|
||||
invalidation. See `/skills install --now` for the canonical pattern.
|
||||
|
||||
### Background Process Notifications (Gateway)
|
||||
|
||||
@@ -435,7 +541,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
|
||||
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
|
||||
|
||||
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
|
||||
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
|
||||
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
|
||||
automatically scope to the active profile.
|
||||
|
||||
### Rules for profile-safe code
|
||||
@@ -492,8 +598,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
|
||||
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
|
||||
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
|
||||
|
||||
### DO NOT use `simple_term_menu` for interactive menus
|
||||
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
|
||||
### DO NOT introduce new `simple_term_menu` usage
|
||||
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
|
||||
the preferred UI is curses (stdlib) because `simple_term_menu` has
|
||||
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
|
||||
interactive menus must use `hermes_cli/curses_ui.py` — see
|
||||
`hermes_cli/tools_config.py` for the canonical pattern.
|
||||
|
||||
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
|
||||
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
|
||||
@@ -504,6 +614,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
|
||||
### DO NOT hardcode cross-tool references in schema descriptions
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### The gateway has TWO message guards — both must bypass approval/control commands
|
||||
When an agent is running, messages pass through two sequential guards:
|
||||
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
|
||||
`_pending_messages` when `session_key in self._active_sessions`, and
|
||||
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
|
||||
`/queue`, `/status`, `/approve`, `/deny` before they reach
|
||||
`running_agent.interrupt()`. Any new command that must reach the runner
|
||||
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
|
||||
guards and be dispatched inline, not via `_process_message_background()`
|
||||
(which races session lifecycle).
|
||||
|
||||
### Squash merges from stale branches silently revert recent fixes
|
||||
Before squash-merging a PR, ensure the branch is up to date with `main`
|
||||
(`git fetch origin main && git reset --hard origin/main` in the worktree,
|
||||
then re-apply the PR's commits). A stale branch's version of an unrelated
|
||||
file will silently overwrite recent fixes on main when squashed. Verify
|
||||
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
|
||||
red flag.
|
||||
|
||||
### Don't wire in dead code without E2E validation
|
||||
Unused code that was never shipped was dead for a reason. Before wiring an
|
||||
unused module into a live code path, E2E test the real resolution chain
|
||||
with actual imports (not mocks) against a temp `HERMES_HOME`.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
|
||||
@@ -559,7 +693,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
|
||||
```bash
|
||||
source venv/bin/activate
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
```
|
||||
|
||||
|
||||
+3
-3
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
|
||||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
|
||||
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
|
||||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
|
||||
Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
|
||||
|
||||
### Critical rules
|
||||
|
||||
@@ -597,7 +597,7 @@ refactor/description # Code restructuring
|
||||
|
||||
1. **Run tests**: `pytest tests/ -v`
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
||||
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
|
||||
| Set a personality | `/personality [name]` | `/personality [name]` |
|
||||
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
|
||||
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
|
||||
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
|
||||
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
|
||||
|
||||
@@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
python -m pytest tests/ -q
|
||||
scripts/run_tests.sh
|
||||
```
|
||||
|
||||
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -0,0 +1,453 @@
|
||||
# Hermes Agent v0.11.0 (v2026.4.23)
|
||||
|
||||
**Release Date:** April 23, 2026
|
||||
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
|
||||
|
||||
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
|
||||
|
||||
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
|
||||
|
||||
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
|
||||
|
||||
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
|
||||
|
||||
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
|
||||
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
|
||||
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
|
||||
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
|
||||
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
|
||||
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
|
||||
|
||||
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Transport Layer (NEW)
|
||||
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
|
||||
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
|
||||
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
|
||||
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
|
||||
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
|
||||
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
|
||||
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
|
||||
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
|
||||
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
|
||||
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
|
||||
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
|
||||
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
|
||||
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
|
||||
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
|
||||
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
|
||||
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
|
||||
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
|
||||
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
|
||||
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
|
||||
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
|
||||
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
|
||||
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
|
||||
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
|
||||
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
|
||||
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
|
||||
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
|
||||
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
|
||||
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
|
||||
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
|
||||
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
|
||||
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
|
||||
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
|
||||
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
|
||||
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
|
||||
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
|
||||
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
|
||||
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
|
||||
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
|
||||
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
|
||||
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
|
||||
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
|
||||
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
|
||||
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
|
||||
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
|
||||
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
|
||||
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
|
||||
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
|
||||
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
|
||||
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
|
||||
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
|
||||
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
|
||||
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
|
||||
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
|
||||
|
||||
### Session & Memory
|
||||
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
|
||||
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
|
||||
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
|
||||
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
|
||||
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
|
||||
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
|
||||
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ New Ink-based TUI
|
||||
|
||||
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
|
||||
|
||||
### TUI Foundations
|
||||
- New TUI based on Ink + Python JSON-RPC backend
|
||||
- Prettier + ESLint + vitest tooling for `ui-tui/`
|
||||
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
|
||||
- Persistent `_SlashWorker` subprocess for slash command dispatch
|
||||
|
||||
### UX & Features
|
||||
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
|
||||
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
|
||||
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
|
||||
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
|
||||
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
|
||||
- Sticky composer that freezes during scroll
|
||||
- OSC-52 clipboard support for copy across SSH sessions
|
||||
- Virtualized history rendering for performance
|
||||
- Slash command autocomplete via `complete.slash` RPC
|
||||
- Path autocomplete via `complete.path` RPC
|
||||
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
|
||||
|
||||
### Structural Refactors
|
||||
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
|
||||
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
|
||||
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
### Telegram
|
||||
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
|
||||
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
|
||||
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
|
||||
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
|
||||
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
|
||||
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
|
||||
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
|
||||
- Fix: Markdown special char escaping in `send_exec_approval`
|
||||
- Fix: parentheses in URLs during MarkdownV2 link conversion
|
||||
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
|
||||
- Many platform hint / streaming / session-key fixes
|
||||
|
||||
### Discord
|
||||
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
|
||||
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
|
||||
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
|
||||
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
|
||||
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
|
||||
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
|
||||
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
|
||||
|
||||
### Feishu
|
||||
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
|
||||
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
|
||||
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
|
||||
|
||||
### DingTalk
|
||||
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
|
||||
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
|
||||
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
|
||||
|
||||
### WhatsApp
|
||||
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
|
||||
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
|
||||
|
||||
### WeCom / Weixin
|
||||
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
|
||||
|
||||
### Signal
|
||||
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
|
||||
|
||||
### Slack
|
||||
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
|
||||
|
||||
### BlueBubbles (iMessage)
|
||||
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
|
||||
|
||||
### Gateway Core
|
||||
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
|
||||
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
|
||||
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
|
||||
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
|
||||
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
|
||||
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
|
||||
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
|
||||
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
|
||||
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
|
||||
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
|
||||
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
|
||||
- Fix: unlink stale PID + lock files on cleanup
|
||||
- Fix: force-unlink stale PID file after `--replace` takeover
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin Surface (major expansion)
|
||||
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
|
||||
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
|
||||
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
|
||||
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
|
||||
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
|
||||
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
|
||||
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
|
||||
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
|
||||
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
### Browser
|
||||
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
|
||||
- Camofox hardening + connection stability across the window
|
||||
|
||||
### Execute Code
|
||||
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
|
||||
|
||||
### Image Generation
|
||||
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
|
||||
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
|
||||
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
|
||||
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
|
||||
|
||||
### TTS / STT / Voice
|
||||
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
|
||||
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
|
||||
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
|
||||
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
|
||||
|
||||
### Webhook / Cron
|
||||
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
|
||||
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
|
||||
|
||||
### Delegate
|
||||
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
### File / Patch
|
||||
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
|
||||
|
||||
### API Server
|
||||
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
|
||||
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
|
||||
|
||||
### Docker / Podman
|
||||
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
|
||||
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
|
||||
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
|
||||
|
||||
### MCP
|
||||
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill System
|
||||
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
|
||||
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
|
||||
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
|
||||
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
|
||||
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
|
||||
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
|
||||
|
||||
### New Skills
|
||||
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
|
||||
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
|
||||
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
|
||||
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
|
||||
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
|
||||
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
|
||||
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
|
||||
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
|
||||
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
|
||||
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
|
||||
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
|
||||
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
|
||||
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
|
||||
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
|
||||
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
|
||||
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
|
||||
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
|
||||
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
|
||||
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
|
||||
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
|
||||
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
|
||||
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
|
||||
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
|
||||
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
|
||||
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
|
||||
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
|
||||
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
|
||||
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
|
||||
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
|
||||
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
|
||||
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
|
||||
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
|
||||
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
|
||||
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
|
||||
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
|
||||
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
The `fix:` category in this window covers 482 PRs. Highlights:
|
||||
|
||||
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
|
||||
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
|
||||
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
|
||||
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
|
||||
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
|
||||
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
|
||||
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
|
||||
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
|
||||
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
|
||||
- Doctor checks for Kimi China credentials fixed
|
||||
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
|
||||
- Rapid Telegram follow-ups no longer get cut off
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- Atropos + wandb links in user guide
|
||||
- ACP / VS Code / Zed / JetBrains integration docs refresh
|
||||
- Webhook subscription docs updated for direct-delivery mode
|
||||
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
|
||||
- Transport layer developer guide added
|
||||
- Website removed Discussions link from README
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count)
|
||||
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
|
||||
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
|
||||
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
|
||||
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
|
||||
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
|
||||
- **@ethernet8023** — 3 PRs
|
||||
- **@benbarclay** — 3 PRs
|
||||
- **@Aslaaen** — 2 PRs
|
||||
|
||||
### Also contributing
|
||||
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
|
||||
|
||||
### All Contributors (alphabetical)
|
||||
|
||||
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
|
||||
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
|
||||
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
|
||||
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
|
||||
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
|
||||
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
|
||||
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
|
||||
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
|
||||
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
|
||||
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
|
||||
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
|
||||
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
|
||||
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
|
||||
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
|
||||
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
|
||||
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
|
||||
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
|
||||
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
|
||||
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
|
||||
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
|
||||
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
|
||||
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
|
||||
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
|
||||
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
|
||||
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
|
||||
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
|
||||
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
|
||||
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
|
||||
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
|
||||
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
|
||||
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
|
||||
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
|
||||
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
|
||||
|
||||
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
|
||||
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
|
||||
@@ -151,7 +151,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
# differs from their main chat model, map it here. The vision auto-detect
|
||||
# "exotic provider" branch checks this before falling back to the main model.
|
||||
_PROVIDER_VISION_MODELS: Dict[str, str] = {
|
||||
"xiaomi": "mimo-v2-omni",
|
||||
"xiaomi": "mimo-v2.5",
|
||||
"zai": "glm-5v-turbo",
|
||||
}
|
||||
|
||||
@@ -916,6 +916,19 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
"""Return a more precise OpenRouter auth failure reason for logs."""
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
if entry is None:
|
||||
return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
|
||||
if not _pool_runtime_api_key(entry):
|
||||
return "OpenRouter credential pool entry is missing a runtime API key"
|
||||
if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
|
||||
return "OPENROUTER_API_KEY not set"
|
||||
return "no usable OpenRouter credentials found"
|
||||
|
||||
|
||||
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
# Check cross-session rate limit guard before attempting Nous —
|
||||
# if another session already recorded a 429, skip Nous entirely
|
||||
@@ -1627,8 +1640,10 @@ def resolve_provider_client(
|
||||
if provider == "openrouter":
|
||||
client, default = _try_openrouter()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: openrouter requested "
|
||||
"but OPENROUTER_API_KEY not set")
|
||||
logger.warning(
|
||||
"resolve_provider_client: openrouter requested but %s",
|
||||
_describe_openrouter_unavailable(),
|
||||
)
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
|
||||
@@ -45,6 +45,7 @@ class FailoverReason(enum.Enum):
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
@@ -194,6 +195,29 @@ _MODEL_NOT_FOUND_PATTERNS = [
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# OpenRouter aggregator policy-block patterns.
|
||||
#
|
||||
# When a user's OpenRouter account privacy setting (or a per-request
|
||||
# `provider.data_collection: deny` preference) excludes the only endpoint
|
||||
# serving a model, OpenRouter returns 404 with a *specific* message that is
|
||||
# distinct from "model not found":
|
||||
#
|
||||
# "No endpoints available matching your guardrail restrictions and
|
||||
# data policy. Configure: https://openrouter.ai/settings/privacy"
|
||||
#
|
||||
# We classify this as `provider_policy_blocked` rather than
|
||||
# `model_not_found` because:
|
||||
# - The model *exists* — model_not_found is misleading in logs
|
||||
# - Provider fallback won't help: the account-level setting applies to
|
||||
# every call on the same OpenRouter account
|
||||
# - The error body already contains the fix URL, so the user gets
|
||||
# actionable guidance without us rewriting the message
|
||||
_PROVIDER_POLICY_BLOCKED_PATTERNS = [
|
||||
"no endpoints available matching your guardrail",
|
||||
"no endpoints available matching your data policy",
|
||||
"no endpoints found matching your data policy",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
@@ -523,6 +547,17 @@ def _classify_by_status(
|
||||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
# OpenRouter policy-block 404 — distinct from "model not found".
|
||||
# The model exists; the user's account privacy setting excludes the
|
||||
# only endpoint serving it. Falling back to another provider won't
|
||||
# help (same account setting applies). The error body already
|
||||
# contains the fix URL, so just surface it.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -640,6 +675,12 @@ def _classify_400(
|
||||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -812,6 +853,15 @@ def _classify_by_message(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Provider policy-block (aggregator-side guardrail) — check before
|
||||
# model_not_found so we don't mis-label as a missing model.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
+128
-6
@@ -123,6 +123,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
"gpt-5.5": 400000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -183,12 +187,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"moonshotai/Kimi-K2.6": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 256000,
|
||||
"mimo-v2-pro": 1000000,
|
||||
"mimo-v2-omni": 256000,
|
||||
"mimo-v2-flash": 256000,
|
||||
"mimo-v2.5-pro": 1000000,
|
||||
"mimo-v2.5": 1000000,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 262144,
|
||||
"mimo-v2-pro": 1048576,
|
||||
"mimo-v2.5-pro": 1048576,
|
||||
"mimo-v2.5": 1048576,
|
||||
"mimo-v2-omni": 262144,
|
||||
"mimo-v2-flash": 262144,
|
||||
"zai-org/GLM-5": 202752,
|
||||
}
|
||||
|
||||
@@ -1002,6 +1006,115 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
||||
return None
|
||||
|
||||
|
||||
# Known ChatGPT Codex OAuth context windows (observed via live
|
||||
# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
|
||||
# `context_window` values, which are what Codex actually enforces — the
|
||||
# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
|
||||
# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
|
||||
#
|
||||
# Used as a fallback when the live probe fails (no token, network error).
|
||||
# Longest keys first so substring match picks the most specific entry.
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
|
||||
"gpt-5.1-codex-max": 272_000,
|
||||
"gpt-5.1-codex-mini": 272_000,
|
||||
"gpt-5.3-codex": 272_000,
|
||||
"gpt-5.2-codex": 272_000,
|
||||
"gpt-5.4-mini": 272_000,
|
||||
"gpt-5.5": 272_000,
|
||||
"gpt-5.4": 272_000,
|
||||
"gpt-5.2": 272_000,
|
||||
"gpt-5": 272_000,
|
||||
}
|
||||
|
||||
|
||||
_codex_oauth_context_cache: Dict[str, int] = {}
|
||||
_codex_oauth_context_cache_time: float = 0.0
|
||||
_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
|
||||
"""Probe the ChatGPT Codex /models endpoint for per-slug context windows.
|
||||
|
||||
Codex OAuth imposes its own context limits that differ from the direct
|
||||
OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
|
||||
`context_window` field in each model entry is the authoritative source.
|
||||
|
||||
Returns a ``{slug: context_window}`` dict. Empty on failure.
|
||||
"""
|
||||
global _codex_oauth_context_cache, _codex_oauth_context_cache_time
|
||||
now = time.time()
|
||||
if (
|
||||
_codex_oauth_context_cache
|
||||
and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
|
||||
):
|
||||
return _codex_oauth_context_cache
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=10,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.debug(
|
||||
"Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
|
||||
resp.status_code,
|
||||
)
|
||||
return {}
|
||||
data = resp.json()
|
||||
except Exception as exc:
|
||||
logger.debug("Codex /models probe failed: %s", exc)
|
||||
return {}
|
||||
|
||||
entries = data.get("models", []) if isinstance(data, dict) else []
|
||||
result: Dict[str, int] = {}
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
ctx = item.get("context_window")
|
||||
if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
|
||||
result[slug.strip()] = ctx
|
||||
|
||||
if result:
|
||||
_codex_oauth_context_cache = result
|
||||
_codex_oauth_context_cache_time = now
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_codex_oauth_context_length(
|
||||
model: str, access_token: str = ""
|
||||
) -> Optional[int]:
|
||||
"""Resolve a Codex OAuth model's real context window.
|
||||
|
||||
Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
|
||||
have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
|
||||
"""
|
||||
model_bare = _strip_provider_prefix(model).strip()
|
||||
if not model_bare:
|
||||
return None
|
||||
|
||||
if access_token:
|
||||
live = _fetch_codex_oauth_context_lengths(access_token)
|
||||
if model_bare in live:
|
||||
return live[model_bare]
|
||||
# Case-insensitive match in case casing drifts
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in live.items():
|
||||
if slug.lower() == model_lower:
|
||||
return ctx
|
||||
|
||||
# Fallback: longest-key-first substring match over hardcoded defaults.
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in sorted(
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
|
||||
):
|
||||
if slug in model_lower:
|
||||
return ctx
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_nous_context_length(model: str) -> Optional[int]:
|
||||
"""Resolve Nous Portal model context length via OpenRouter metadata.
|
||||
|
||||
@@ -1146,6 +1259,15 @@ def get_model_context_length(
|
||||
ctx = _resolve_nous_context_length(model)
|
||||
if ctx:
|
||||
return ctx
|
||||
if effective_provider == "openai-codex":
|
||||
# Codex OAuth enforces lower context limits than the direct OpenAI
|
||||
# API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
|
||||
# on Codex). Authoritative source is Codex's own /models endpoint.
|
||||
codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
|
||||
if codex_ctx:
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
|
||||
@@ -0,0 +1,190 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
|
||||
|
||||
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
|
||||
tool calling. Requests that violate it fail with HTTP 400:
|
||||
|
||||
tools.function.parameters is not a valid moonshot flavored json schema,
|
||||
details: <...>
|
||||
|
||||
Known rejection modes documented at
|
||||
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
|
||||
and MoonshotAI/kimi-cli#1595:
|
||||
|
||||
1. Every property schema must carry a ``type``. Standard JSON Schema allows
|
||||
type to be omitted (the value is then unconstrained); Moonshot refuses.
|
||||
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
|
||||
the parent. Presence of both causes "type should be defined in anyOf
|
||||
items instead of the parent schema".
|
||||
|
||||
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
|
||||
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
|
||||
applies at MCP registration time for all providers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Keys whose values are maps of name → schema (not schemas themselves).
|
||||
# When we recurse, we walk the values of these maps as schemas, but we do
|
||||
# NOT apply the missing-type repair to the map itself.
|
||||
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
|
||||
|
||||
# Keys whose values are lists of schemas.
|
||||
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
|
||||
|
||||
# Keys whose values are a single nested schema.
|
||||
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
|
||||
|
||||
|
||||
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
"""Recursively apply Moonshot repairs to a schema node.
|
||||
|
||||
``is_schema=True`` means this dict is a JSON Schema node and gets the
|
||||
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
|
||||
it's a container map (e.g. the value of ``properties``) and we only
|
||||
recurse into its values.
|
||||
"""
|
||||
if isinstance(node, list):
|
||||
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
|
||||
# every element is itself a schema.
|
||||
return [_repair_schema(item, is_schema=True) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
# Walk the dict, deciding per-key whether recursion is into a schema
|
||||
# node, a container map, or a scalar.
|
||||
repaired: Dict[str, Any] = {}
|
||||
for key, value in node.items():
|
||||
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
|
||||
# Map of name → schema. Don't treat the map itself as a schema
|
||||
# (it has no type / properties of its own), but each value is.
|
||||
repaired[key] = {
|
||||
sub_key: _repair_schema(sub_val, is_schema=True)
|
||||
for sub_key, sub_val in value.items()
|
||||
}
|
||||
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
|
||||
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
|
||||
elif key in _SCHEMA_NODE_KEYS:
|
||||
# items / not / additionalProperties: single nested schema.
|
||||
# additionalProperties can also be a bool — leave those alone.
|
||||
if isinstance(value, dict):
|
||||
repaired[key] = _repair_schema(value, is_schema=True)
|
||||
else:
|
||||
repaired[key] = value
|
||||
else:
|
||||
# Scalars (description, title, format, enum values, etc.) pass through.
|
||||
repaired[key] = value
|
||||
|
||||
if not is_schema:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
return repaired
|
||||
|
||||
# Rule 1: property schemas without type need one. $ref nodes are exempt
|
||||
# — their type comes from the referenced definition.
|
||||
if "$ref" in repaired:
|
||||
return repaired
|
||||
return _fill_missing_type(repaired)
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Infer a reasonable ``type`` if this schema node has none."""
|
||||
if "type" in node and node["type"] not in (None, ""):
|
||||
return node
|
||||
|
||||
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
|
||||
# → type of first enum value, else fall back to ``string`` (safest scalar).
|
||||
if "properties" in node or "required" in node or "additionalProperties" in node:
|
||||
inferred = "object"
|
||||
elif "items" in node or "prefixItems" in node:
|
||||
inferred = "array"
|
||||
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
|
||||
sample = node["enum"][0]
|
||||
if isinstance(sample, bool):
|
||||
inferred = "boolean"
|
||||
elif isinstance(sample, int):
|
||||
inferred = "integer"
|
||||
elif isinstance(sample, float):
|
||||
inferred = "number"
|
||||
else:
|
||||
inferred = "string"
|
||||
else:
|
||||
inferred = "string"
|
||||
|
||||
return {**node, "type": inferred}
|
||||
|
||||
|
||||
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a Moonshot-compatible object schema.
|
||||
|
||||
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
|
||||
applied. Input is not mutated.
|
||||
"""
|
||||
if not isinstance(parameters, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
|
||||
if not isinstance(repaired, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
# Top-level must be an object schema
|
||||
if repaired.get("type") != "object":
|
||||
repaired["type"] = "object"
|
||||
if "properties" not in repaired:
|
||||
repaired["properties"] = {}
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
|
||||
if not tools:
|
||||
return tools
|
||||
|
||||
sanitized: List[Dict[str, Any]] = []
|
||||
any_change = False
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
fn = tool.get("function")
|
||||
if not isinstance(fn, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
params = fn.get("parameters")
|
||||
repaired = sanitize_moonshot_tool_parameters(params)
|
||||
if repaired is not params:
|
||||
any_change = True
|
||||
new_fn = {**fn, "parameters": repaired}
|
||||
sanitized.append({**tool, "function": new_fn})
|
||||
else:
|
||||
sanitized.append(tool)
|
||||
|
||||
return sanitized if any_change else tools
|
||||
|
||||
|
||||
def is_moonshot_model(model: str | None) -> bool:
|
||||
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
|
||||
|
||||
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
|
||||
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
|
||||
Detection by model name covers Nous / OpenRouter / other aggregators that
|
||||
route to Moonshot's inference, where the base URL is the aggregator's, not
|
||||
``api.moonshot.ai``.
|
||||
"""
|
||||
if not model:
|
||||
return False
|
||||
bare = model.strip().lower()
|
||||
# Last path segment (covers aggregator-prefixed slugs)
|
||||
tail = bare.rsplit("/", 1)[-1]
|
||||
if tail.startswith("kimi-") or tail == "kimi":
|
||||
return True
|
||||
# Vendor-prefixed forms commonly used on aggregators
|
||||
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
|
||||
return True
|
||||
return False
|
||||
@@ -345,7 +345,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
|
||||
disabled = _get_disabled_skill_names()
|
||||
seen_names: set = set()
|
||||
|
||||
@@ -356,7 +356,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
dirs_to_scan.extend(get_external_skills_dirs())
|
||||
|
||||
for scan_dir in dirs_to_scan:
|
||||
for skill_md in scan_dir.rglob("SKILL.md"):
|
||||
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
|
||||
continue
|
||||
try:
|
||||
|
||||
@@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
@@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
|
||||
# etc.) compatible, in addition to direct moonshot.ai endpoints.
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > provider default
|
||||
|
||||
@@ -61,6 +61,20 @@ class ToolCall:
|
||||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
@property
|
||||
def extra_content(self) -> Optional[Dict[str, Any]]:
|
||||
"""Gemini extra_content (thought_signature) from provider_data.
|
||||
|
||||
Gemini 3 thinking models attach ``extra_content`` with a
|
||||
``thought_signature`` to each tool call. This signature must be
|
||||
replayed on subsequent API calls — without it the API rejects the
|
||||
request with HTTP 400. The chat_completions transport stores this
|
||||
in ``provider_data["extra_content"]``; this property exposes it so
|
||||
``_build_assistant_message`` can ``getattr(tc, "extra_content")``
|
||||
uniformly.
|
||||
"""
|
||||
return (self.provider_data or {}).get("extra_content")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Usage:
|
||||
|
||||
@@ -507,6 +507,13 @@ agent:
|
||||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Max app-level retry attempts for API errors (connection drops, provider
|
||||
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
|
||||
# to 1 if you use fallback providers and want fast failover on flaky
|
||||
# primaries (default 3). The OpenAI SDK does its own low-level retries
|
||||
# underneath this wrapper — this is the Hermes-level loop.
|
||||
# api_max_retries: 3
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
|
||||
@@ -6685,6 +6685,13 @@ class HermesCLI:
|
||||
print(f" ⚠ Port {_port} is not reachable at {cdp_url}")
|
||||
|
||||
os.environ["BROWSER_CDP_URL"] = cdp_url
|
||||
# Eagerly start the CDP supervisor so pending_dialogs + frame_tree
|
||||
# show up in the next browser_snapshot. No-op if already started.
|
||||
try:
|
||||
from tools.browser_tool import _ensure_cdp_supervisor # type: ignore[import-not-found]
|
||||
_ensure_cdp_supervisor("default")
|
||||
except Exception:
|
||||
pass
|
||||
print()
|
||||
print("🌐 Browser connected to live Chrome via CDP")
|
||||
print(f" Endpoint: {cdp_url}")
|
||||
@@ -6706,7 +6713,8 @@ class HermesCLI:
|
||||
if current:
|
||||
os.environ.pop("BROWSER_CDP_URL", None)
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
from tools.browser_tool import cleanup_all_browsers, _stop_cdp_supervisor
|
||||
_stop_cdp_supervisor("default")
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -384,6 +384,7 @@ def create_job(
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new cron job.
|
||||
@@ -403,6 +404,9 @@ def create_job(
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
|
||||
Returns:
|
||||
The created job dict
|
||||
@@ -433,6 +437,8 @@ def create_job(
|
||||
normalized_base_url = normalized_base_url or None
|
||||
normalized_script = str(script).strip() if isinstance(script, str) else None
|
||||
normalized_script = normalized_script or None
|
||||
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
@@ -464,6 +470,7 @@ def create_job(
|
||||
# Delivery configuration
|
||||
"deliver": deliver,
|
||||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
"enabled_toolsets": normalized_toolsets,
|
||||
}
|
||||
|
||||
jobs = load_jobs()
|
||||
|
||||
@@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
|
||||
"""Resolve the toolset list for a cron job.
|
||||
|
||||
Precedence:
|
||||
1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
|
||||
Keeps the agent's job-scoped toolset override intact — #6130.
|
||||
2. Per-platform ``hermes tools`` config for the ``cron`` platform.
|
||||
Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
|
||||
so users can gate cron toolsets globally without recreating every job.
|
||||
3. ``None`` on any lookup failure — AIAgent loads the full default set
|
||||
(legacy behavior before this change, preserved as the safety net).
|
||||
|
||||
_DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
|
||||
``_get_platform_tools`` for unconfigured platforms, so fresh installs
|
||||
get cron WITHOUT ``moa`` by default (issue reported by Norbert —
|
||||
surprise $4.63 run).
|
||||
"""
|
||||
per_job = job.get("enabled_toolsets")
|
||||
if per_job:
|
||||
return per_job
|
||||
try:
|
||||
from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load
|
||||
return sorted(_get_platform_tools(cfg or {}, "cron"))
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Cron toolset resolution failed, falling back to full default toolset: %s",
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
# Valid delivery platforms — used to validate user-supplied platform names
|
||||
# in cron delivery targets, preventing env var enumeration via crafted names.
|
||||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
@@ -886,6 +917,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
|
||||
|
||||
@@ -2440,6 +2440,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt: Optional[str] = None,
|
||||
chat_id_alt: Optional[str] = None,
|
||||
is_bot: bool = False,
|
||||
guild_id: Optional[str] = None,
|
||||
parent_chat_id: Optional[str] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> SessionSource:
|
||||
"""Helper to build a SessionSource for this platform."""
|
||||
# Normalize empty topic to None
|
||||
@@ -2457,6 +2460,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt=user_id_alt,
|
||||
chat_id_alt=chat_id_alt,
|
||||
is_bot=is_bot,
|
||||
guild_id=str(guild_id) if guild_id else None,
|
||||
parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
|
||||
message_id=str(message_id) if message_id else None,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
|
||||
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
|
||||
|
||||
try:
|
||||
import discord
|
||||
@@ -802,8 +803,27 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not self._client:
|
||||
return
|
||||
try:
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
|
||||
sync_policy = self._get_discord_command_sync_policy()
|
||||
if sync_policy == "off":
|
||||
logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
|
||||
return
|
||||
|
||||
if sync_policy == "bulk":
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
|
||||
return
|
||||
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
|
||||
logger.info(
|
||||
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
|
||||
self.name,
|
||||
summary["total"],
|
||||
summary["unchanged"],
|
||||
summary["updated"],
|
||||
summary["recreated"],
|
||||
summary["created"],
|
||||
summary["deleted"],
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
|
||||
except asyncio.CancelledError:
|
||||
@@ -811,6 +831,183 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)
|
||||
|
||||
def _get_discord_command_sync_policy(self) -> str:
|
||||
raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
|
||||
if raw in _DISCORD_COMMAND_SYNC_POLICIES:
|
||||
return raw
|
||||
if raw:
|
||||
logger.warning(
|
||||
"[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
|
||||
self.name,
|
||||
raw,
|
||||
)
|
||||
return "safe"
|
||||
|
||||
def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Reduce command payloads to the semantic fields Hermes manages."""
|
||||
contexts = payload.get("contexts")
|
||||
integration_types = payload.get("integration_types")
|
||||
return {
|
||||
"type": int(payload.get("type", 1) or 1),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"default_member_permissions": self._normalize_permissions(
|
||||
payload.get("default_member_permissions")
|
||||
),
|
||||
"dm_permission": bool(payload.get("dm_permission", True)),
|
||||
"nsfw": bool(payload.get("nsfw", False)),
|
||||
"contexts": sorted(int(c) for c in contexts) if contexts else None,
|
||||
"integration_types": (
|
||||
sorted(int(i) for i in integration_types) if integration_types else None
|
||||
),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_permissions(value: Any) -> Optional[str]:
|
||||
"""Discord emits default_member_permissions as str server-side but discord.py
|
||||
sets it as int locally. Normalize to str-or-None so the comparison is stable."""
|
||||
if value is None:
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
|
||||
"""Build a canonical-ready dict from an AppCommand.
|
||||
|
||||
discord.py's AppCommand.to_dict() does NOT include nsfw,
|
||||
dm_permission, or default_member_permissions (they live only on the
|
||||
attributes). Pull them from the attributes so the canonicalizer sees
|
||||
the real server-side values instead of defaults — otherwise any
|
||||
command using non-default permissions would diff on every startup.
|
||||
"""
|
||||
payload = dict(command.to_dict())
|
||||
nsfw = getattr(command, "nsfw", None)
|
||||
if nsfw is not None:
|
||||
payload["nsfw"] = bool(nsfw)
|
||||
guild_only = getattr(command, "guild_only", None)
|
||||
if guild_only is not None:
|
||||
payload["dm_permission"] = not bool(guild_only)
|
||||
default_permissions = getattr(command, "default_member_permissions", None)
|
||||
if default_permissions is not None:
|
||||
payload["default_member_permissions"] = getattr(
|
||||
default_permissions, "value", default_permissions
|
||||
)
|
||||
return payload
|
||||
|
||||
def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
"type": int(payload.get("type", 0) or 0),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"required": bool(payload.get("required", False)),
|
||||
"autocomplete": bool(payload.get("autocomplete", False)),
|
||||
"choices": [
|
||||
{
|
||||
"name": str(choice.get("name", "") or ""),
|
||||
"value": choice.get("value"),
|
||||
}
|
||||
for choice in payload.get("choices", []) or []
|
||||
if isinstance(choice, dict)
|
||||
],
|
||||
"channel_types": list(payload.get("channel_types", []) or []),
|
||||
"min_value": payload.get("min_value"),
|
||||
"max_value": payload.get("max_value"),
|
||||
"min_length": payload.get("min_length"),
|
||||
"max_length": payload.get("max_length"),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Fields supported by discord.py's edit_global_command route."""
|
||||
canonical = self._canonicalize_app_command_payload(payload)
|
||||
return {
|
||||
"name": canonical["name"],
|
||||
"description": canonical["description"],
|
||||
"options": canonical["options"],
|
||||
}
|
||||
|
||||
async def _safe_sync_slash_commands(self) -> Dict[str, int]:
|
||||
"""Diff existing global commands and only mutate the commands that changed."""
|
||||
if not self._client:
|
||||
return {
|
||||
"total": 0,
|
||||
"unchanged": 0,
|
||||
"updated": 0,
|
||||
"recreated": 0,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
|
||||
tree = self._client.tree
|
||||
app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
|
||||
if not app_id:
|
||||
raise RuntimeError("Discord application ID is unavailable for slash command sync")
|
||||
|
||||
desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
|
||||
desired_by_key = {
|
||||
(int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
|
||||
for payload in desired_payloads
|
||||
}
|
||||
existing_commands = await tree.fetch_commands()
|
||||
existing_by_key = {
|
||||
(
|
||||
int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
|
||||
str(command.name or "").lower(),
|
||||
): command
|
||||
for command in existing_commands
|
||||
}
|
||||
|
||||
unchanged = 0
|
||||
updated = 0
|
||||
recreated = 0
|
||||
created = 0
|
||||
deleted = 0
|
||||
http = self._client.http
|
||||
|
||||
for key, desired in desired_by_key.items():
|
||||
current = existing_by_key.pop(key, None)
|
||||
if current is None:
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
created += 1
|
||||
continue
|
||||
|
||||
current_existing_payload = self._existing_command_to_payload(current)
|
||||
current_payload = self._canonicalize_app_command_payload(current_existing_payload)
|
||||
desired_payload = self._canonicalize_app_command_payload(desired)
|
||||
if current_payload == desired_payload:
|
||||
unchanged += 1
|
||||
continue
|
||||
|
||||
if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
recreated += 1
|
||||
continue
|
||||
|
||||
await http.edit_global_command(app_id, current.id, desired)
|
||||
updated += 1
|
||||
|
||||
for current in existing_by_key.values():
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
deleted += 1
|
||||
|
||||
return {
|
||||
"total": len(desired_payloads),
|
||||
"unchanged": unchanged,
|
||||
"updated": updated,
|
||||
"recreated": recreated,
|
||||
"created": created,
|
||||
"deleted": deleted,
|
||||
}
|
||||
|
||||
async def _add_reaction(self, message: Any, emoji: str) -> bool:
|
||||
"""Add an emoji reaction to a Discord message."""
|
||||
if not message or not hasattr(message, "add_reaction"):
|
||||
@@ -3059,6 +3256,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
parent_channel_id = str(message.channel.id)
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
@@ -3118,6 +3316,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
is_bot=getattr(message.author, "bot", False),
|
||||
guild_id=str(message.guild.id) if message.guild else None,
|
||||
parent_chat_id=parent_channel_id,
|
||||
message_id=str(message.id),
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
|
||||
+78
-36
@@ -1551,27 +1551,23 @@ class GatewayRunner:
|
||||
)
|
||||
return True
|
||||
|
||||
# --- Normal busy case (agent actively running a task) ---
|
||||
# The user sent a message while the agent is working. Interrupt the
|
||||
# agent immediately so it stops the current tool-calling loop and
|
||||
# processes the new message. The pending message is stored in the
|
||||
# adapter so the base adapter picks it up once the interrupted run
|
||||
# returns. A brief ack tells the user what's happening (debounced
|
||||
# to avoid spam when they fire multiple messages quickly).
|
||||
|
||||
# Normal busy case (agent actively running a task)
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
if not adapter:
|
||||
return False # let default path handle it
|
||||
|
||||
# Store the message so it's processed as the next turn after the
|
||||
# interrupt causes the current run to exit.
|
||||
# current run finishes (or is interrupted).
|
||||
from gateway.platforms.base import merge_pending_message_event
|
||||
merge_pending_message_event(adapter._pending_messages, session_key, event)
|
||||
|
||||
# Interrupt the running agent — this aborts in-flight tool calls and
|
||||
# causes the agent loop to exit at the next check point.
|
||||
is_queue_mode = self._busy_input_mode == "queue"
|
||||
|
||||
# If not in queue mode, interrupt the running agent immediately.
|
||||
# This aborts in-flight tool calls and causes the agent loop to exit
|
||||
# at the next check point.
|
||||
running_agent = self._running_agents.get(session_key)
|
||||
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||||
if not is_queue_mode and running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||||
try:
|
||||
running_agent.interrupt(event.text)
|
||||
except Exception:
|
||||
@@ -1583,7 +1579,7 @@ class GatewayRunner:
|
||||
now = time.time()
|
||||
last_ack = self._busy_ack_ts.get(session_key, 0)
|
||||
if now - last_ack < _BUSY_ACK_COOLDOWN:
|
||||
return True # interrupt sent, ack already delivered recently
|
||||
return True # interrupt sent (if not queue), ack already delivered recently
|
||||
|
||||
self._busy_ack_ts[session_key] = now
|
||||
|
||||
@@ -1608,10 +1604,16 @@ class GatewayRunner:
|
||||
pass
|
||||
|
||||
status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
|
||||
message = (
|
||||
f"⚡ Interrupting current task{status_detail}. "
|
||||
f"I'll respond to your message shortly."
|
||||
)
|
||||
if is_queue_mode:
|
||||
message = (
|
||||
f"⏳ Queued for the next turn{status_detail}. "
|
||||
f"I'll respond once the current task finishes."
|
||||
)
|
||||
else:
|
||||
message = (
|
||||
f"⚡ Interrupting current task{status_detail}. "
|
||||
f"I'll respond to your message shortly."
|
||||
)
|
||||
|
||||
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
try:
|
||||
@@ -2560,6 +2562,40 @@ class GatewayRunner:
|
||||
return
|
||||
|
||||
async def _stop_impl() -> None:
|
||||
def _kill_tool_subprocesses(phase: str) -> None:
|
||||
"""Kill tool subprocesses + tear down terminal envs + browsers.
|
||||
|
||||
Called twice in the shutdown path: once eagerly after a
|
||||
drain timeout forces agent interrupt (so we reclaim bash/
|
||||
sleep children before systemd TimeoutStopSec escalates to
|
||||
SIGKILL on the cgroup — #8202), and once as a final
|
||||
catch-all at the end of _stop_impl() for the graceful
|
||||
path or anything respawned mid-teardown.
|
||||
|
||||
All steps are best-effort; exceptions are swallowed so
|
||||
one subsystem's failure doesn't block the rest.
|
||||
"""
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
_killed = process_registry.kill_all()
|
||||
if _killed:
|
||||
logger.info(
|
||||
"Shutdown (%s): killed %d tool subprocess(es)",
|
||||
phase, _killed,
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug("process_registry.kill_all (%s) error: %s", phase, _e)
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception as _e:
|
||||
logger.debug("cleanup_all_environments (%s) error: %s", phase, _e)
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
cleanup_all_browsers()
|
||||
except Exception as _e:
|
||||
logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e)
|
||||
|
||||
logger.info(
|
||||
"Stopping gateway%s...",
|
||||
" for restart" if self._restart_requested else "",
|
||||
@@ -2621,6 +2657,16 @@ class GatewayRunner:
|
||||
self._update_runtime_status("draining")
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Kill lingering tool subprocesses NOW, before we spend more
|
||||
# budget on adapter disconnect / session DB close. Under
|
||||
# systemd (TimeoutStopSec bounded by drain_timeout+headroom),
|
||||
# deferring this to the end of stop() risks systemd escalating
|
||||
# to SIGKILL on the cgroup first — at which point bash/sleep
|
||||
# children left behind by an interrupted terminal tool get
|
||||
# killed by systemd instead of us (issue #8202). The final
|
||||
# catch-all cleanup below still runs for the graceful path.
|
||||
_kill_tool_subprocesses("post-interrupt")
|
||||
|
||||
if self._restart_requested and self._restart_detached:
|
||||
try:
|
||||
await self._launch_detached_restart_command()
|
||||
@@ -2656,22 +2702,13 @@ class GatewayRunner:
|
||||
self._shutdown_event.set()
|
||||
|
||||
# Global cleanup: kill any remaining tool subprocesses not tied
|
||||
# to a specific agent (catch-all for zombie prevention).
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
process_registry.kill_all()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
# to a specific agent (catch-all for zombie prevention). On the
|
||||
# drain-timeout path we already did this earlier after agent
|
||||
# interrupt — this second call catches (a) the graceful path
|
||||
# where drain succeeded without interrupt, and (b) anything
|
||||
# that got respawned between the earlier call and adapter
|
||||
# disconnect (defense in depth; safe to call repeatedly).
|
||||
_kill_tool_subprocesses("final-cleanup")
|
||||
|
||||
# Close SQLite session DBs so the WAL write lock is released.
|
||||
# Without this, --replace and similar restart flows leave the
|
||||
@@ -10338,9 +10375,9 @@ class GatewayRunner:
|
||||
# Periodic "still working" notifications for long-running tasks.
|
||||
# Fires every N seconds so the user knows the agent hasn't died.
|
||||
# Config: agent.gateway_notify_interval in config.yaml, or
|
||||
# HERMES_AGENT_NOTIFY_INTERVAL env var. Default 600s (10 min).
|
||||
# HERMES_AGENT_NOTIFY_INTERVAL env var. Default 180s (3 min).
|
||||
# 0 = disable notifications.
|
||||
_NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
|
||||
_NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180))
|
||||
_NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
|
||||
_notify_start = time.time()
|
||||
|
||||
@@ -10919,6 +10956,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
from gateway.status import (
|
||||
acquire_gateway_runtime_lock,
|
||||
get_running_pid,
|
||||
get_process_start_time,
|
||||
release_gateway_runtime_lock,
|
||||
remove_pid_file,
|
||||
terminate_pid,
|
||||
@@ -10926,6 +10964,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
existing_pid = get_running_pid()
|
||||
if existing_pid is not None and existing_pid != os.getpid():
|
||||
if replace:
|
||||
existing_start_time = get_process_start_time(existing_pid)
|
||||
logger.info(
|
||||
"Replacing existing gateway instance (PID %d) with --replace.",
|
||||
existing_pid,
|
||||
@@ -10994,7 +11033,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
# leaving stale lock files that block the new gateway from starting.
|
||||
try:
|
||||
from gateway.status import release_all_scoped_locks
|
||||
_released = release_all_scoped_locks()
|
||||
_released = release_all_scoped_locks(
|
||||
owner_pid=existing_pid,
|
||||
owner_start_time=existing_start_time,
|
||||
)
|
||||
if _released:
|
||||
logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
|
||||
except Exception:
|
||||
|
||||
+41
-9
@@ -83,6 +83,9 @@ class SessionSource:
|
||||
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
|
||||
guild_id: Optional[str] = None # Discord guild / Slack workspace / Matrix server scope
|
||||
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
|
||||
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -120,8 +123,14 @@ class SessionSource:
|
||||
d["user_id_alt"] = self.user_id_alt
|
||||
if self.chat_id_alt:
|
||||
d["chat_id_alt"] = self.chat_id_alt
|
||||
if self.guild_id:
|
||||
d["guild_id"] = self.guild_id
|
||||
if self.parent_chat_id:
|
||||
d["parent_chat_id"] = self.parent_chat_id
|
||||
if self.message_id:
|
||||
d["message_id"] = self.message_id
|
||||
return d
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
|
||||
return cls(
|
||||
@@ -135,6 +144,9 @@ class SessionSource:
|
||||
chat_topic=data.get("chat_topic"),
|
||||
user_id_alt=data.get("user_id_alt"),
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
guild_id=data.get("guild_id"),
|
||||
parent_chat_id=data.get("parent_chat_id"),
|
||||
message_id=data.get("message_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -273,14 +285,34 @@ def build_session_context_prompt(
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.DISCORD:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
# The discord tool self-gates on DISCORD_BOT_TOKEN at registry
|
||||
# check time. Match that condition so the prompt stays honest:
|
||||
# with a token the agent has fetch_messages/search_members/
|
||||
# create_thread (and optionally discord_admin) and should know
|
||||
# the IDs it can call them with; without one it really is
|
||||
# limited to reading/replying via the gateway.
|
||||
if (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
|
||||
src = context.source
|
||||
id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
|
||||
if src.guild_id:
|
||||
id_lines.append(f" - Guild: `{src.guild_id}`")
|
||||
if src.thread_id and src.parent_chat_id:
|
||||
id_lines.append(f" - Parent channel: `{src.parent_chat_id}`")
|
||||
id_lines.append(f" - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
|
||||
else:
|
||||
id_lines.append(f" - Channel: `{src.chat_id}`")
|
||||
if src.message_id:
|
||||
id_lines.append(f" - Triggering message: `{src.message_id}`")
|
||||
lines.extend(id_lines)
|
||||
else:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
platforms_list = ["local (files on this machine)"]
|
||||
|
||||
+34
-3
@@ -113,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def get_process_start_time(pid: int) -> Optional[int]:
|
||||
"""Public wrapper for retrieving a process start time when available."""
|
||||
return _get_process_start_time(pid)
|
||||
|
||||
|
||||
def _read_process_cmdline(pid: int) -> Optional[str]:
|
||||
"""Return the process command line as a space-separated string."""
|
||||
cmdline_path = Path(f"/proc/{pid}/cmdline")
|
||||
@@ -562,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def release_all_scoped_locks() -> int:
|
||||
"""Remove all scoped lock files in the lock directory.
|
||||
def release_all_scoped_locks(
|
||||
*,
|
||||
owner_pid: Optional[int] = None,
|
||||
owner_start_time: Optional[int] = None,
|
||||
) -> int:
|
||||
"""Remove scoped lock files in the lock directory.
|
||||
|
||||
Called during --replace to clean up stale locks left by stopped/killed
|
||||
gateway processes that did not release their locks gracefully.
|
||||
gateway processes that did not release their locks gracefully. When an
|
||||
``owner_pid`` is provided, only lock records belonging to that gateway
|
||||
process are removed. ``owner_start_time`` further narrows the match to
|
||||
protect against PID reuse.
|
||||
|
||||
When no owner is provided, preserves the legacy behavior and removes every
|
||||
scoped lock file in the directory.
|
||||
|
||||
Returns the number of lock files removed.
|
||||
"""
|
||||
lock_dir = _get_lock_dir()
|
||||
removed = 0
|
||||
if lock_dir.exists():
|
||||
for lock_file in lock_dir.glob("*.lock"):
|
||||
if owner_pid is not None:
|
||||
record = _read_json_file(lock_file)
|
||||
if not isinstance(record, dict):
|
||||
continue
|
||||
try:
|
||||
record_pid = int(record.get("pid"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if record_pid != owner_pid:
|
||||
continue
|
||||
if (
|
||||
owner_start_time is not None
|
||||
and record.get("start_time") != owner_start_time
|
||||
):
|
||||
continue
|
||||
try:
|
||||
lock_file.unlink(missing_ok=True)
|
||||
removed += 1
|
||||
|
||||
@@ -11,5 +11,5 @@ Provides subcommands for:
|
||||
- hermes cron - Manage cron jobs
|
||||
"""
|
||||
|
||||
__version__ = "0.10.0"
|
||||
__release_date__ = "2026.4.16"
|
||||
__version__ = "0.11.0"
|
||||
__release_date__ = "2026.4.23"
|
||||
|
||||
+19
-1
@@ -619,7 +619,25 @@ def _oauth_trace(event: str, *, sequence_id: Optional[str] = None, **fields: Any
|
||||
# =============================================================================
|
||||
|
||||
def _auth_file_path() -> Path:
|
||||
return get_hermes_home() / "auth.json"
|
||||
path = get_hermes_home() / "auth.json"
|
||||
# Seat belt: if pytest is running and HERMES_HOME resolves to the real
|
||||
# user's auth store, refuse rather than silently corrupt it. This catches
|
||||
# tests that forgot to monkeypatch HERMES_HOME, tests invoked without the
|
||||
# hermetic conftest, or sandbox escapes via threads/subprocesses. In
|
||||
# production (no PYTEST_CURRENT_TEST) this is a single dict lookup.
|
||||
if os.environ.get("PYTEST_CURRENT_TEST"):
|
||||
real_home_auth = (Path.home() / ".hermes" / "auth.json").resolve(strict=False)
|
||||
try:
|
||||
resolved = path.resolve(strict=False)
|
||||
except Exception:
|
||||
resolved = path
|
||||
if resolved == real_home_auth:
|
||||
raise RuntimeError(
|
||||
f"Refusing to touch real user auth store during test run: {path}. "
|
||||
"Set HERMES_HOME to a tmp_path in your test fixture, or run "
|
||||
"via scripts/run_tests.sh for hermetic CI-parity env."
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
def _auth_lock_path() -> Path:
|
||||
|
||||
+54
-1
@@ -238,6 +238,52 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
|
||||
return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}
|
||||
|
||||
|
||||
_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag"
|
||||
_latest_release_cache: Optional[tuple] = None # (tag, url) once resolved
|
||||
|
||||
|
||||
def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]:
|
||||
"""Return ``(tag, release_url)`` for the latest git tag, or None.
|
||||
|
||||
Local-only — runs ``git describe --tags --abbrev=0`` against the
|
||||
Hermes checkout. Cached per-process. Release URL always points at the
|
||||
canonical NousResearch/hermes-agent repo (forks don't get a link).
|
||||
"""
|
||||
global _latest_release_cache
|
||||
if _latest_release_cache is not None:
|
||||
return _latest_release_cache or None
|
||||
|
||||
repo_dir = repo_dir or _resolve_repo_dir()
|
||||
if repo_dir is None:
|
||||
_latest_release_cache = () # falsy sentinel — skip future lookups
|
||||
return None
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "describe", "--tags", "--abbrev=0"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=3,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
except Exception:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
tag = (result.stdout or "").strip()
|
||||
if not tag:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
url = f"{_RELEASE_URL_BASE}/{tag}"
|
||||
_latest_release_cache = (tag, url)
|
||||
return _latest_release_cache
|
||||
|
||||
|
||||
def format_banner_version_label() -> str:
|
||||
"""Return the version label shown in the startup banner title."""
|
||||
base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
|
||||
@@ -519,9 +565,16 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
agent_name = _skin_branding("agent_name", "Hermes Agent")
|
||||
title_color = _skin_color("banner_title", "#FFD700")
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
version_label = format_banner_version_label()
|
||||
release_info = get_latest_release_tag()
|
||||
if release_info:
|
||||
_tag, _url = release_info
|
||||
title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]"
|
||||
else:
|
||||
title_markup = f"[bold {title_color}]{version_label}[/]"
|
||||
outer_panel = Panel(
|
||||
layout_table,
|
||||
title=f"[bold {title_color}]{format_banner_version_label()}[/]",
|
||||
title=title_markup,
|
||||
border_style=border_color,
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
||||
@@ -12,6 +12,7 @@ import os
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_CODEX_MODELS: List[str] = [
|
||||
"gpt-5.5",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.4",
|
||||
"gpt-5.3-codex",
|
||||
@@ -21,6 +22,7 @@ DEFAULT_CODEX_MODELS: List[str] = [
|
||||
]
|
||||
|
||||
_FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
|
||||
("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
|
||||
("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.3-codex", ("gpt-5.2-codex",)),
|
||||
|
||||
+47
-4
@@ -361,6 +361,15 @@ DEFAULT_CONFIG = {
|
||||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
# Max app-level retry attempts for API errors (connection drops,
|
||||
# provider timeouts, 5xx, etc.) before the agent surfaces the
|
||||
# failure. The OpenAI SDK already does its own low-level retries
|
||||
# (max_retries=2 default) for transient network errors; this is
|
||||
# the Hermes-level retry loop that wraps the whole call. Lower
|
||||
# this to 1 if you use fallback providers and want fast failover
|
||||
# on flaky primaries; raise it if you prefer to tolerate longer
|
||||
# provider hiccups on a single provider.
|
||||
"api_max_retries": 3,
|
||||
"service_tier": "",
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
@@ -375,7 +384,11 @@ DEFAULT_CONFIG = {
|
||||
# Periodic "still working" notification interval (seconds).
|
||||
# Sends a status message every N seconds so the user knows the
|
||||
# agent hasn't died during long tasks. 0 = disable notifications.
|
||||
"gateway_notify_interval": 600,
|
||||
# Lower values mean faster feedback on slow tasks but more chat
|
||||
# noise; 180s is a compromise that catches spinning weak-model runs
|
||||
# (60+ tool iterations with tiny output) before users assume the
|
||||
# bot is dead and /restart.
|
||||
"gateway_notify_interval": 180,
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
@@ -453,6 +466,12 @@ DEFAULT_CONFIG = {
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||
# Active only when a CDP-capable backend is attached (Browserbase or
|
||||
# local Chrome via /browser connect). See
|
||||
# website/docs/developer-guide/browser-supervisor.md.
|
||||
"dialog_policy": "must_respond", # must_respond | auto_dismiss | auto_accept
|
||||
"dialog_timeout_s": 300, # Safety auto-dismiss after N seconds under must_respond
|
||||
"camofox": {
|
||||
# When true, Hermes sends a stable profile-scoped userId to Camofox
|
||||
# so the server maps it to a persistent Firefox profile automatically.
|
||||
@@ -473,7 +492,27 @@ DEFAULT_CONFIG = {
|
||||
# exceed this are rejected with guidance to use offset+limit.
|
||||
# 100K chars ≈ 25–35K tokens across typical tokenisers.
|
||||
"file_read_max_chars": 100_000,
|
||||
|
||||
|
||||
# Tool-output truncation thresholds. When terminal output or a
|
||||
# single read_file page exceeds these limits, Hermes truncates the
|
||||
# payload sent to the model (keeping head + tail for terminal,
|
||||
# enforcing pagination for read_file). Tuning these trades context
|
||||
# footprint against how much raw output the model can see in one
|
||||
# shot. Ported from anomalyco/opencode PR #23770.
|
||||
#
|
||||
# - max_bytes: terminal_tool output cap, in chars
|
||||
# (default 50_000 ≈ 12-15K tokens).
|
||||
# - max_lines: read_file pagination cap — the maximum `limit`
|
||||
# a single read_file call can request before
|
||||
# being clamped (default 2000).
|
||||
# - max_line_length: per-line cap applied when read_file emits a
|
||||
# line-numbered view (default 2000 chars).
|
||||
"tool_output": {
|
||||
"max_bytes": 50_000,
|
||||
"max_lines": 2000,
|
||||
"max_line_length": 2000,
|
||||
},
|
||||
|
||||
"compression": {
|
||||
"enabled": True,
|
||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||
@@ -726,6 +765,10 @@ DEFAULT_CONFIG = {
|
||||
"inherit_mcp_toolsets": True,
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s,
|
||||
# no ceiling). High-reasoning models on large tasks
|
||||
# (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
|
||||
# raise if children time out before producing output.
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
|
||||
@@ -790,7 +833,7 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# discord / discord_admin tools: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
@@ -1291,7 +1334,7 @@ OPTIONAL_ENV_VARS = {
|
||||
"advanced": True,
|
||||
},
|
||||
"XIAOMI_API_KEY": {
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"prompt": "Xiaomi MiMo API Key",
|
||||
"url": "https://platform.xiaomimimo.com",
|
||||
"password": True,
|
||||
|
||||
+62
-1
@@ -175,6 +175,60 @@ def _request_gateway_self_restart(pid: int) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
|
||||
"""Send SIGUSR1 to a gateway PID and wait for it to exit gracefully.
|
||||
|
||||
SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
|
||||
which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
|
||||
seconds), then exits with code 75. Both systemd (``Restart=on-failure``
|
||||
+ ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
|
||||
= false``) relaunch the process after the graceful exit.
|
||||
|
||||
This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
|
||||
which SIGKILL in-flight agents after a short timeout.
|
||||
|
||||
Args:
|
||||
pid: Gateway process PID (systemd MainPID, launchd PID, or bare
|
||||
process PID).
|
||||
drain_timeout: Seconds to wait for the process to exit after sending
|
||||
SIGUSR1. Should be slightly larger than the gateway's
|
||||
``agent.restart_drain_timeout`` to allow the drain loop to
|
||||
finish cleanly.
|
||||
|
||||
Returns:
|
||||
True if the PID was signalled and exited within the timeout.
|
||||
False if SIGUSR1 couldn't be sent or the process didn't exit in
|
||||
time (caller should fall back to a harder restart path).
|
||||
"""
|
||||
if not hasattr(signal, "SIGUSR1"):
|
||||
return False
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, signal.SIGUSR1)
|
||||
except ProcessLookupError:
|
||||
# Already gone — nothing to drain.
|
||||
return True
|
||||
except (PermissionError, OSError):
|
||||
return False
|
||||
|
||||
import time as _time
|
||||
|
||||
deadline = _time.monotonic() + max(drain_timeout, 1.0)
|
||||
while _time.monotonic() < deadline:
|
||||
try:
|
||||
os.kill(pid, 0) # signal 0 — probe liveness
|
||||
except ProcessLookupError:
|
||||
return True
|
||||
except PermissionError:
|
||||
# Process still exists but we can't signal it. Treat as alive
|
||||
# so the caller falls back.
|
||||
pass
|
||||
_time.sleep(0.5)
|
||||
# Drain didn't finish in time.
|
||||
return False
|
||||
|
||||
|
||||
def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
|
||||
if pid is None or pid <= 0:
|
||||
return
|
||||
@@ -1469,7 +1523,14 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
||||
path_entries.append(resolved_node_dir)
|
||||
|
||||
common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
|
||||
restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))
|
||||
# systemd's TimeoutStopSec must exceed the gateway's drain_timeout so
|
||||
# there's budget left for post-interrupt cleanup (tool subprocess kill,
|
||||
# adapter disconnect, session DB close) before systemd escalates to
|
||||
# SIGKILL on the cgroup — otherwise bash/sleep tool-call children left
|
||||
# by a force-interrupted agent get reaped by systemd instead of us
|
||||
# (#8202). 30s of headroom covers the worst case we've observed.
|
||||
_drain_timeout = int(_get_restart_drain_timeout() or 0)
|
||||
restart_timeout = max(60, _drain_timeout) + 30
|
||||
|
||||
if system:
|
||||
username, group_name, home_dir = _system_service_identity(run_as_user)
|
||||
|
||||
+144
-47
@@ -3984,7 +3984,18 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
pass
|
||||
|
||||
if mdev_models:
|
||||
model_list = mdev_models
|
||||
# Merge models.dev with curated list so newly added models
|
||||
# (not yet in models.dev) still appear in the picker.
|
||||
if curated:
|
||||
seen = {m.lower() for m in mdev_models}
|
||||
merged = list(mdev_models)
|
||||
for m in curated:
|
||||
if m.lower() not in seen:
|
||||
merged.append(m)
|
||||
seen.add(m.lower())
|
||||
model_list = merged
|
||||
else:
|
||||
model_list = mdev_models
|
||||
print(f" Found {len(model_list)} model(s) from models.dev registry")
|
||||
elif curated and len(curated) >= 8:
|
||||
# Curated list is substantial — use it directly, skip live probe
|
||||
@@ -5853,12 +5864,15 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
# Write exit code *before* the gateway restart attempt.
|
||||
# When running as ``hermes update --gateway`` (spawned by the gateway's
|
||||
# /update command), this process lives inside the gateway's systemd
|
||||
# cgroup. ``systemctl restart hermes-gateway`` kills everything in the
|
||||
# cgroup (KillMode=mixed → SIGKILL to remaining processes), including
|
||||
# us and the wrapping bash shell. The shell never reaches its
|
||||
# ``printf $status > .update_exit_code`` epilogue, so the exit-code
|
||||
# marker file is never created. The new gateway's update watcher then
|
||||
# polls for 30 minutes and sends a spurious timeout message.
|
||||
# cgroup. A graceful SIGUSR1 restart keeps the drain loop alive long
|
||||
# enough for the exit-code marker to be written below, but the
|
||||
# fallback ``systemctl restart`` path (see below) kills everything in
|
||||
# the cgroup (KillMode=mixed → SIGKILL to remaining processes),
|
||||
# including us and the wrapping bash shell. The shell never reaches
|
||||
# its ``printf $status > .update_exit_code`` epilogue, so the
|
||||
# exit-code marker file would never be created. The new gateway's
|
||||
# update watcher would then poll for 30 minutes and send a spurious
|
||||
# timeout message.
|
||||
#
|
||||
# Writing the marker here — after git pull + pip install succeed but
|
||||
# before we attempt the restart — ensures the new gateway sees it
|
||||
@@ -5880,9 +5894,37 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
_ensure_user_systemd_env,
|
||||
find_gateway_pids,
|
||||
_get_service_pids,
|
||||
_graceful_restart_via_sigusr1,
|
||||
)
|
||||
import signal as _signal
|
||||
|
||||
# Drain budget for graceful SIGUSR1 restarts. The gateway drains
|
||||
# for up to ``agent.restart_drain_timeout`` (default 60s) before
|
||||
# exiting with code 75; we wait slightly longer so the drain
|
||||
# completes before we fall back to a hard restart. On older
|
||||
# systemd units without SIGUSR1 wiring this wait just times out
|
||||
# and we fall back to ``systemctl restart`` (the old behaviour).
|
||||
try:
|
||||
from hermes_constants import (
|
||||
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN,
|
||||
)
|
||||
except Exception:
|
||||
_DEFAULT_DRAIN = 60.0
|
||||
_cfg_drain = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
_cfg_agent = (load_config().get("agent") or {})
|
||||
_cfg_drain = _cfg_agent.get("restart_drain_timeout")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
_drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
|
||||
except (TypeError, ValueError):
|
||||
_drain_budget = float(_DEFAULT_DRAIN)
|
||||
# Add a 15s margin so the drain loop + final exit finish before
|
||||
# we escalate to ``systemctl restart`` / SIGTERM.
|
||||
_drain_budget = max(_drain_budget, 30.0) + 15.0
|
||||
|
||||
restarted_services = []
|
||||
killed_pids = set()
|
||||
|
||||
@@ -5929,59 +5971,114 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if check.stdout.strip() == "active":
|
||||
restart = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
if check.stdout.strip() != "active":
|
||||
continue
|
||||
|
||||
# Prefer a graceful SIGUSR1 restart so in-flight
|
||||
# agent runs drain instead of being SIGKILLed.
|
||||
# The gateway's SIGUSR1 handler calls
|
||||
# request_restart(via_service=True) → drain →
|
||||
# exit(75); systemd's Restart=on-failure (and
|
||||
# RestartForceExitStatus=75) respawns the unit.
|
||||
_main_pid = 0
|
||||
try:
|
||||
_show = subprocess.run(
|
||||
scope_cmd + [
|
||||
"show", svc_name,
|
||||
"--property=MainPID", "--value",
|
||||
],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
_main_pid = int((_show.stdout or "").strip() or 0)
|
||||
except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
|
||||
_main_pid = 0
|
||||
|
||||
_graceful_ok = False
|
||||
if _main_pid > 0:
|
||||
print(
|
||||
f" → {svc_name}: draining (up to {int(_drain_budget)}s)..."
|
||||
)
|
||||
_graceful_ok = _graceful_restart_via_sigusr1(
|
||||
_main_pid, drain_timeout=_drain_budget,
|
||||
)
|
||||
|
||||
if _graceful_ok:
|
||||
# Gateway exited 75; systemd should relaunch
|
||||
# via Restart=on-failure. Verify the new
|
||||
# process came up.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
continue
|
||||
# Process exited but wasn't respawned (older
|
||||
# unit without Restart=on-failure or
|
||||
# RestartForceExitStatus=75). Fall through
|
||||
# to systemctl start/restart.
|
||||
print(
|
||||
f" ⚠ {svc_name} drained but didn't relaunch — forcing restart"
|
||||
)
|
||||
|
||||
# Fallback: blunt systemctl restart. This is
|
||||
# what the old code always did; we get here only
|
||||
# when the graceful path failed (unit missing
|
||||
# SIGUSR1 wiring, drain exceeded the budget,
|
||||
# restart-policy mismatch).
|
||||
restart = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
# Verify the service actually survived the
|
||||
# restart. systemctl restart returns 0 even
|
||||
# if the new process crashes immediately.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
timeout=5,
|
||||
)
|
||||
if restart.returncode == 0:
|
||||
# Verify the service actually survived the
|
||||
# restart. systemctl restart returns 0 even
|
||||
# if the new process crashes immediately.
|
||||
if verify.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
else:
|
||||
# Retry once — transient startup failures
|
||||
# (stale module cache, import race) often
|
||||
# resolve on the second attempt.
|
||||
print(
|
||||
f" ⚠ {svc_name} died after restart, retrying..."
|
||||
)
|
||||
retry = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
verify2 = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
if verify2.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
print(f" ✓ {svc_name} recovered on retry")
|
||||
else:
|
||||
# Retry once — transient startup failures
|
||||
# (stale module cache, import race) often
|
||||
# resolve on the second attempt.
|
||||
print(
|
||||
f" ⚠ {svc_name} died after restart, retrying..."
|
||||
f" ✗ {svc_name} failed to stay running after restart.\n"
|
||||
f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
|
||||
f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
|
||||
)
|
||||
retry = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
_time.sleep(3)
|
||||
verify2 = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify2.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
print(f" ✓ {svc_name} recovered on retry")
|
||||
else:
|
||||
print(
|
||||
f" ✗ {svc_name} failed to stay running after restart.\n"
|
||||
f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
|
||||
f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
|
||||
+142
-19
@@ -304,6 +304,113 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
|
||||
# Alias resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _model_sort_key(model_id: str, prefix: str) -> tuple:
|
||||
"""Sort key for model version preference.
|
||||
|
||||
Extracts version numbers after the family prefix and returns a sort key
|
||||
that prefers higher versions. Suffix tokens (``pro``, ``omni``, etc.)
|
||||
are used as tiebreakers, with common quality indicators ranked.
|
||||
|
||||
Examples (with prefix ``"mimo"``)::
|
||||
|
||||
mimo-v2.5-pro → (-2.5, 0, 'pro') # highest version wins
|
||||
mimo-v2.5 → (-2.5, 1, '') # no suffix = lower than pro
|
||||
mimo-v2-pro → (-2.0, 0, 'pro')
|
||||
mimo-v2-omni → (-2.0, 1, 'omni')
|
||||
mimo-v2-flash → (-2.0, 1, 'flash')
|
||||
"""
|
||||
# Strip the prefix (and optional "/" separator for aggregator slugs)
|
||||
rest = model_id[len(prefix):]
|
||||
if rest.startswith("/"):
|
||||
rest = rest[1:]
|
||||
rest = rest.lstrip("-").strip()
|
||||
|
||||
# Parse version and suffix from the remainder.
|
||||
# "v2.5-pro" → version [2.5], suffix "pro"
|
||||
# "-omni" → version [], suffix "omni"
|
||||
# State machine: start → in_version → between → in_suffix
|
||||
nums: list[float] = []
|
||||
suffix_buf = ""
|
||||
state = "start"
|
||||
num_buf = ""
|
||||
|
||||
for ch in rest:
|
||||
if state == "start":
|
||||
if ch in "vV":
|
||||
state = "in_version"
|
||||
elif ch.isdigit():
|
||||
state = "in_version"
|
||||
num_buf += ch
|
||||
elif ch in "-_.":
|
||||
pass # skip separators before any content
|
||||
else:
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "in_version":
|
||||
if ch.isdigit():
|
||||
num_buf += ch
|
||||
elif ch == ".":
|
||||
if "." in num_buf:
|
||||
# Second dot — flush current number, start new component
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
else:
|
||||
num_buf += ch
|
||||
elif ch in "-_.":
|
||||
if num_buf:
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
state = "between"
|
||||
else:
|
||||
if num_buf:
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "between":
|
||||
if ch.isdigit():
|
||||
state = "in_version"
|
||||
num_buf = ch
|
||||
elif ch in "vV":
|
||||
state = "in_version"
|
||||
elif ch in "-_.":
|
||||
pass
|
||||
else:
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "in_suffix":
|
||||
suffix_buf += ch
|
||||
|
||||
# Flush remaining buffer (strip trailing dots — "5.4." → "5.4")
|
||||
if num_buf and state == "in_version":
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
suffix = suffix_buf.lower().strip("-_.")
|
||||
suffix = suffix.strip()
|
||||
|
||||
# Negate versions so higher → sorts first
|
||||
version_key = tuple(-n for n in nums)
|
||||
|
||||
# Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite
|
||||
# Lower number = preferred
|
||||
_SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0}
|
||||
suffix_rank = _SUFFIX_RANK.get(suffix, 1)
|
||||
|
||||
return version_key + (suffix_rank, suffix)
|
||||
|
||||
|
||||
def resolve_alias(
|
||||
raw_input: str,
|
||||
current_provider: str,
|
||||
@@ -311,9 +418,9 @@ def resolve_alias(
|
||||
"""Resolve a short alias against the current provider's catalog.
|
||||
|
||||
Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
|
||||
current provider's models.dev catalog for the first model whose ID
|
||||
starts with ``vendor/family`` (or just ``family`` for non-aggregator
|
||||
providers).
|
||||
current provider's models.dev catalog for the model whose ID starts
|
||||
with ``vendor/family`` (or just ``family`` for non-aggregator
|
||||
providers) and has the **highest version**.
|
||||
|
||||
Returns:
|
||||
``(provider, resolved_model_id, alias_name)`` if a match is
|
||||
@@ -341,28 +448,44 @@ def resolve_alias(
|
||||
|
||||
vendor, family = identity
|
||||
|
||||
# Search the provider's catalog from models.dev
|
||||
# Build catalog from models.dev, then merge in static _PROVIDER_MODELS
|
||||
# entries that models.dev may be missing (e.g. newly added models not
|
||||
# yet synced to the registry).
|
||||
catalog = list_provider_models(current_provider)
|
||||
if not catalog:
|
||||
return None
|
||||
try:
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
static = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if static:
|
||||
seen = {m.lower() for m in catalog}
|
||||
for m in static:
|
||||
if m.lower() not in seen:
|
||||
catalog.append(m)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# For aggregators, models are vendor/model-name format
|
||||
aggregator = is_aggregator(current_provider)
|
||||
|
||||
for model_id in catalog:
|
||||
mid_lower = model_id.lower()
|
||||
if aggregator:
|
||||
# Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
|
||||
prefix = f"{vendor}/{family}".lower()
|
||||
if mid_lower.startswith(prefix):
|
||||
return (current_provider, model_id, key)
|
||||
else:
|
||||
# Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
|
||||
family_lower = family.lower()
|
||||
if mid_lower.startswith(family_lower):
|
||||
return (current_provider, model_id, key)
|
||||
if aggregator:
|
||||
prefix = f"{vendor}/{family}".lower()
|
||||
matches = [
|
||||
mid for mid in catalog
|
||||
if mid.lower().startswith(prefix)
|
||||
]
|
||||
else:
|
||||
family_lower = family.lower()
|
||||
matches = [
|
||||
mid for mid in catalog
|
||||
if mid.lower().startswith(family_lower)
|
||||
]
|
||||
|
||||
return None
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
# Sort by version descending — prefer the latest/highest version
|
||||
prefix_for_sort = f"{vendor}/{family}" if aggregator else family
|
||||
matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort))
|
||||
return (current_provider, matches[0], key)
|
||||
|
||||
|
||||
def get_authenticated_provider_slugs(
|
||||
|
||||
+24
-2
@@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
||||
# (model_id, display description shown in menus)
|
||||
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("moonshotai/kimi-k2.6", "recommended"),
|
||||
("deepseek/deepseek-v4-pro", ""),
|
||||
("deepseek/deepseek-v4-flash", ""),
|
||||
("anthropic/claude-opus-4.7", ""),
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("anthropic/claude-sonnet-4.6", ""),
|
||||
@@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]:
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"moonshotai/kimi-k2.6",
|
||||
"deepseek/deepseek-v4-pro",
|
||||
"deepseek/deepseek-v4-flash",
|
||||
"xiaomi/mimo-v2.5-pro",
|
||||
"xiaomi/mimo-v2.5",
|
||||
"anthropic/claude-opus-4.7",
|
||||
@@ -246,10 +250,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"claude-haiku-4-5-20251001",
|
||||
],
|
||||
"deepseek": [
|
||||
"deepseek-v4-pro",
|
||||
"deepseek-v4-flash",
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
],
|
||||
"xiaomi": [
|
||||
"mimo-v2.5-pro",
|
||||
"mimo-v2.5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"mimo-v2-flash",
|
||||
@@ -301,6 +309,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"kimi-k2.5",
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"mimo-v2.5-pro",
|
||||
"mimo-v2.5",
|
||||
"mimo-v2-pro",
|
||||
"mimo-v2-omni",
|
||||
"minimax-m2.7",
|
||||
@@ -692,7 +702,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
|
||||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
@@ -1674,7 +1684,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
if normalized == "openai-codex":
|
||||
from hermes_cli.codex_models import get_codex_model_ids
|
||||
|
||||
return get_codex_model_ids()
|
||||
# Pass the live OAuth access token so the picker matches whatever
|
||||
# ChatGPT lists for this account right now (new models appear without
|
||||
# a Hermes release). Falls back to the hardcoded catalog if no token
|
||||
# or the endpoint is unreachable.
|
||||
access_token = None
|
||||
try:
|
||||
from hermes_cli.auth import resolve_codex_runtime_credentials
|
||||
|
||||
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
|
||||
access_token = creds.get("api_key")
|
||||
except Exception:
|
||||
access_token = None
|
||||
return get_codex_model_ids(access_token=access_token)
|
||||
if normalized in {"copilot", "copilot-acp"}:
|
||||
try:
|
||||
live = _fetch_github_models(_resolve_copilot_catalog_api_key())
|
||||
|
||||
@@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
||||
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
|
||||
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
|
||||
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
|
||||
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
|
||||
])
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -103,7 +103,7 @@ _DEFAULT_PROVIDER_MODELS = {
|
||||
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
||||
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
||||
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
|
||||
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
|
||||
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
|
||||
"huggingface": [
|
||||
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
|
||||
|
||||
@@ -289,6 +289,7 @@ TIPS = [
|
||||
"When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
|
||||
"agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
|
||||
"agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
|
||||
"agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.",
|
||||
"The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
|
||||
"Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
|
||||
"The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
|
||||
|
||||
@@ -67,12 +67,13 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("messaging", "📨 Cross-Platform Messaging", "send_message"),
|
||||
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
|
||||
("homeassistant", "🏠 Home Assistant", "smart home device control"),
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
|
||||
# but the setup checklist won't pre-select them for first-time users.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"}
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "discord_admin"}
|
||||
|
||||
|
||||
def _get_effective_configurable_toolsets():
|
||||
@@ -549,7 +550,7 @@ def _get_platform_tools(
|
||||
include_default_mcp_servers: bool = True,
|
||||
) -> Set[str]:
|
||||
"""Resolve which individual toolset names are enabled for a platform."""
|
||||
from toolsets import resolve_toolset
|
||||
from toolsets import resolve_toolset, TOOLSETS
|
||||
|
||||
platform_toolsets = config.get("platform_toolsets") or {}
|
||||
toolset_names = platform_toolsets.get(platform)
|
||||
@@ -563,6 +564,8 @@ def _get_platform_tools(
|
||||
toolset_names = [str(ts) for ts in toolset_names]
|
||||
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
|
||||
# If the saved list contains any configurable keys directly, the user
|
||||
# has explicitly configured this platform — use direct membership.
|
||||
@@ -585,16 +588,46 @@ def _get_platform_tools(
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if ts_tools and ts_tools.issubset(all_tool_names):
|
||||
enabled_toolsets.add(ts_key)
|
||||
|
||||
default_off = set(_DEFAULT_OFF_TOOLSETS)
|
||||
if platform in default_off:
|
||||
default_off.remove(platform)
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
|
||||
# feishu_drive). These are part of the platform's default composite but
|
||||
# absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
|
||||
# checklist or in a user-saved config. Must run in BOTH branches —
|
||||
# otherwise saving via `hermes tools` (which flips has_explicit_config
|
||||
# to True) silently drops them.
|
||||
platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
|
||||
configurable_tool_universe = set()
|
||||
for ck in configurable_keys:
|
||||
configurable_tool_universe.update(resolve_toolset(ck))
|
||||
claimed = set()
|
||||
for ts_key in enabled_toolsets:
|
||||
claimed.update(resolve_toolset(ts_key))
|
||||
skip = configurable_keys | plugin_ts_keys | platform_default_keys
|
||||
skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
|
||||
skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
|
||||
for ts_key, ts_def in TOOLSETS.items():
|
||||
if ts_key in skip:
|
||||
continue
|
||||
if ts_def.get("includes"):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
|
||||
continue
|
||||
if ts_tools.issubset(configurable_tool_universe):
|
||||
continue
|
||||
if not ts_tools.issubset(claimed):
|
||||
enabled_toolsets.add(ts_key)
|
||||
claimed.update(ts_tools)
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled.
|
||||
# A plugin toolset is "known" for a platform once `hermes tools`
|
||||
# has been saved for that platform (tracked via known_plugin_toolsets).
|
||||
# Unknown plugins default to enabled; known-but-absent = disabled.
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
if plugin_ts_keys:
|
||||
known_map = config.get("known_plugin_toolsets", {})
|
||||
known_for_platform = set(known_map.get(platform, []))
|
||||
@@ -609,7 +642,6 @@ def _get_platform_tools(
|
||||
|
||||
# Preserve any explicit non-configurable toolset entries (for example,
|
||||
# custom toolsets or MCP server names saved in platform_toolsets).
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
explicit_passthrough = {
|
||||
ts
|
||||
for ts in toolset_names
|
||||
@@ -669,6 +701,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
|
||||
if not isinstance(existing_toolsets, list):
|
||||
existing_toolsets = []
|
||||
existing_toolsets = [str(ts) for ts in existing_toolsets]
|
||||
|
||||
# Preserve any entries that are NOT configurable toolsets and NOT platform
|
||||
# defaults (i.e. only MCP server names should be preserved)
|
||||
@@ -676,6 +709,8 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
entry for entry in existing_toolsets
|
||||
if entry not in configurable_keys and entry not in platform_default_keys
|
||||
}
|
||||
if "no_mcp" not in enabled_toolset_keys:
|
||||
preserved_entries.discard("no_mcp")
|
||||
|
||||
# Merge preserved entries with new enabled toolsets
|
||||
config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
|
||||
|
||||
@@ -0,0 +1,548 @@
|
||||
"""Process-wide voice recording + TTS API for the TUI gateway.
|
||||
|
||||
Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
|
||||
(text-to-speech) behind idempotent, stateful entry points that the gateway's
|
||||
``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
|
||||
call from a dedicated thread. The gateway imports this module lazily so that
|
||||
missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
|
||||
an ``ImportError`` at call time, not at startup.
|
||||
|
||||
Two usage modes are exposed:
|
||||
|
||||
* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
|
||||
manually-bounded capture used when the caller drives the start/stop pair
|
||||
explicitly.
|
||||
* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
|
||||
the classic CLI voice mode: recording auto-stops on silence, transcribes,
|
||||
hands the result to a callback, and then auto-restarts for the next turn.
|
||||
Three consecutive no-speech cycles stop the loop and fire
|
||||
``on_silent_limit`` so the UI can turn the mode off.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from tools.voice_mode import (
|
||||
create_audio_recorder,
|
||||
is_whisper_hallucination,
|
||||
play_audio_file,
|
||||
transcribe_recording,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _debug(msg: str) -> None:
|
||||
"""Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
|
||||
|
||||
Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
|
||||
which createGatewayEventHandler shows as an Activity line — exactly
|
||||
what we need to diagnose "why didn't the loop auto-restart?" in the
|
||||
user's real terminal without shipping a separate debug RPC.
|
||||
|
||||
Any OSError / BrokenPipeError is swallowed because this fires from
|
||||
background threads (silence callback, TTS daemon, beep) where a
|
||||
broken stderr pipe must not kill the whole gateway — the main
|
||||
command pipe (stdin+stdout) is what actually matters.
|
||||
"""
|
||||
if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
|
||||
return
|
||||
try:
|
||||
print(f"[voice] {msg}", file=sys.stderr, flush=True)
|
||||
except (BrokenPipeError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _beeps_enabled() -> bool:
|
||||
"""CLI parity: voice.beep_enabled in config.yaml (default True)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
voice_cfg = load_config().get("voice", {})
|
||||
if isinstance(voice_cfg, dict):
|
||||
return bool(voice_cfg.get("beep_enabled", True))
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def _play_beep(frequency: int, count: int = 1) -> None:
|
||||
"""Audible cue matching cli.py's record/stop beeps.
|
||||
|
||||
880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
|
||||
660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
|
||||
Best-effort — sounddevice failures are silently swallowed so the
|
||||
voice loop never breaks because a speaker was unavailable.
|
||||
"""
|
||||
if not _beeps_enabled():
|
||||
return
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
|
||||
play_beep(frequency=frequency, count=count)
|
||||
except Exception as e:
|
||||
_debug(f"beep {frequency}Hz failed: {e}")
|
||||
|
||||
# ── Push-to-talk state ───────────────────────────────────────────────
|
||||
_recorder = None
|
||||
_recorder_lock = threading.Lock()
|
||||
|
||||
# ── Continuous (VAD) state ───────────────────────────────────────────
|
||||
_continuous_lock = threading.Lock()
|
||||
_continuous_active = False
|
||||
_continuous_recorder: Any = None
|
||||
|
||||
# ── TTS-vs-STT feedback guard ────────────────────────────────────────
|
||||
# When TTS plays the agent reply over the speakers, the live microphone
|
||||
# picks it up and transcribes the agent's own voice as user input — an
|
||||
# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
|
||||
# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
|
||||
# playing, set while silent. _continuous_on_silence waits on it before
|
||||
# re-arming the recorder, and speak_text itself cancels any live capture
|
||||
# before starting playback so the tail of the previous utterance doesn't
|
||||
# leak into the mic.
|
||||
_tts_playing = threading.Event()
|
||||
_tts_playing.set() # initially "not playing"
|
||||
_continuous_on_transcript: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_status: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_silent_limit: Optional[Callable[[], None]] = None
|
||||
_continuous_no_speech_count = 0
|
||||
_CONTINUOUS_NO_SPEECH_LIMIT = 3
|
||||
|
||||
|
||||
# ── Push-to-talk API ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_recording() -> None:
|
||||
"""Begin capturing from the default input device (push-to-talk).
|
||||
|
||||
Idempotent — calling again while a recording is in progress is a no-op.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
if _recorder is not None and getattr(_recorder, "is_recording", False):
|
||||
return
|
||||
rec = create_audio_recorder()
|
||||
rec.start()
|
||||
_recorder = rec
|
||||
|
||||
|
||||
def stop_and_transcribe() -> Optional[str]:
|
||||
"""Stop the active push-to-talk recording, transcribe, return text.
|
||||
|
||||
Returns ``None`` when no recording is active, when the microphone
|
||||
captured no speech, or when Whisper returned a known hallucination.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
rec = _recorder
|
||||
_recorder = None
|
||||
|
||||
if rec is None:
|
||||
return None
|
||||
|
||||
wav_path = rec.stop()
|
||||
if not wav_path:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
except Exception as e:
|
||||
logger.warning("voice transcription failed: %s", e)
|
||||
return None
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# transcribe_recording returns {"success": bool, "transcript": str, ...}
|
||||
# — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
|
||||
if not result.get("success"):
|
||||
return None
|
||||
text = (result.get("transcript") or "").strip()
|
||||
if not text or is_whisper_hallucination(text):
|
||||
return None
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# ── Continuous (VAD) API ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_continuous(
|
||||
on_transcript: Callable[[str], None],
|
||||
on_status: Optional[Callable[[str], None]] = None,
|
||||
on_silent_limit: Optional[Callable[[], None]] = None,
|
||||
silence_threshold: int = 200,
|
||||
silence_duration: float = 3.0,
|
||||
) -> None:
|
||||
"""Start a VAD-driven continuous recording loop.
|
||||
|
||||
The loop calls ``on_transcript(text)`` each time speech is detected and
|
||||
transcribed successfully, then auto-restarts. After
|
||||
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
|
||||
picked up at all) the loop stops itself and calls ``on_silent_limit``
|
||||
so the UI can reflect "voice off". Idempotent — calling while already
|
||||
active is a no-op.
|
||||
|
||||
``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
|
||||
``"idle"`` so the UI can show a live indicator.
|
||||
"""
|
||||
global _continuous_active, _continuous_recorder
|
||||
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if _continuous_active:
|
||||
_debug("start_continuous: already active — no-op")
|
||||
return
|
||||
_continuous_active = True
|
||||
_continuous_on_transcript = on_transcript
|
||||
_continuous_on_status = on_status
|
||||
_continuous_on_silent_limit = on_silent_limit
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if _continuous_recorder is None:
|
||||
_continuous_recorder = create_audio_recorder()
|
||||
|
||||
_continuous_recorder._silence_threshold = silence_threshold
|
||||
_continuous_recorder._silence_duration = silence_duration
|
||||
rec = _continuous_recorder
|
||||
|
||||
_debug(
|
||||
f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
|
||||
)
|
||||
|
||||
# CLI parity: single 880 Hz beep *before* opening the stream — placing
|
||||
# the beep after stream.start() on macOS triggers a CoreAudio conflict
|
||||
# (cli.py:7528 comment).
|
||||
_play_beep(frequency=880, count=1)
|
||||
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to start continuous recording: %s", e)
|
||||
_debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
raise
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def stop_continuous() -> None:
|
||||
"""Stop the active continuous loop and release the microphone.
|
||||
|
||||
Idempotent — calling while not active is a no-op. Any in-flight
|
||||
transcription completes but its result is discarded (the callback
|
||||
checks ``_continuous_active`` before firing).
|
||||
"""
|
||||
global _continuous_active, _continuous_on_transcript
|
||||
global _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_recorder, _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
return
|
||||
_continuous_active = False
|
||||
rec = _continuous_recorder
|
||||
on_status = _continuous_on_status
|
||||
_continuous_on_transcript = None
|
||||
_continuous_on_status = None
|
||||
_continuous_on_silent_limit = None
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if rec is not None:
|
||||
try:
|
||||
# cancel() (not stop()) discards buffered frames — the loop
|
||||
# is over, we don't want to transcribe a half-captured turn.
|
||||
rec.cancel()
|
||||
except Exception as e:
|
||||
logger.warning("failed to cancel recorder: %s", e)
|
||||
|
||||
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
|
||||
# silence-auto-stop path plays).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def is_continuous_active() -> bool:
|
||||
"""Whether a continuous voice loop is currently running."""
|
||||
with _continuous_lock:
|
||||
return _continuous_active
|
||||
|
||||
|
||||
def _continuous_on_silence() -> None:
|
||||
"""AudioRecorder silence callback — runs in a daemon thread.
|
||||
|
||||
Stops the current capture, transcribes, delivers the text via
|
||||
``on_transcript``, and — if the loop is still active — starts the
|
||||
next capture. Three consecutive silent cycles end the loop.
|
||||
"""
|
||||
global _continuous_active, _continuous_no_speech_count
|
||||
|
||||
_debug("_continuous_on_silence: fired")
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: loop inactive — abort")
|
||||
return
|
||||
rec = _continuous_recorder
|
||||
on_transcript = _continuous_on_transcript
|
||||
on_status = _continuous_on_status
|
||||
on_silent_limit = _continuous_on_silent_limit
|
||||
|
||||
if rec is None:
|
||||
_debug("_continuous_on_silence: no recorder — abort")
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("transcribing")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
wav_path = rec.stop()
|
||||
# Peak RMS is the critical diagnostic when stop() returns None despite
|
||||
# the VAD firing — tells us at a glance whether the mic was too quiet
|
||||
# for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
|
||||
peak_rms = getattr(rec, "_peak_rms", -1)
|
||||
_debug(
|
||||
f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
|
||||
)
|
||||
|
||||
# CLI parity: double 660 Hz beep after the stream stops (safe from the
|
||||
# CoreAudio conflict that blocks pre-start beeps).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
transcript: Optional[str] = None
|
||||
|
||||
if wav_path:
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
# transcribe_recording returns {"success": bool, "transcript": str,
|
||||
# "error": str?} — NOT {"text": str}. Using the wrong key silently
|
||||
# produced empty transcripts even when Groq/local STT returned fine,
|
||||
# which masqueraded as "not hearing the user" to the caller.
|
||||
success = bool(result.get("success"))
|
||||
text = (result.get("transcript") or "").strip()
|
||||
err = result.get("error")
|
||||
_debug(
|
||||
f"_continuous_on_silence: transcribe -> success={success} "
|
||||
f"text={text!r} err={err!r}"
|
||||
)
|
||||
if success and text and not is_whisper_hallucination(text):
|
||||
transcript = text
|
||||
except Exception as e:
|
||||
logger.warning("continuous transcription failed: %s", e)
|
||||
_debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
# User stopped us while we were transcribing — discard.
|
||||
_debug("_continuous_on_silence: stopped during transcribe — no restart")
|
||||
return
|
||||
if transcript:
|
||||
_continuous_no_speech_count = 0
|
||||
else:
|
||||
_continuous_no_speech_count += 1
|
||||
should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
|
||||
no_speech = _continuous_no_speech_count
|
||||
|
||||
if transcript and on_transcript:
|
||||
try:
|
||||
on_transcript(transcript)
|
||||
except Exception as e:
|
||||
logger.warning("on_transcript callback raised: %s", e)
|
||||
|
||||
if should_halt:
|
||||
_debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
_continuous_no_speech_count = 0
|
||||
if on_silent_limit:
|
||||
try:
|
||||
on_silent_limit()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
rec.cancel()
|
||||
except Exception:
|
||||
pass
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
|
||||
# finish before re-arming the mic, then leave a small gap to avoid
|
||||
# catching the tail of the speaker output. Without this the voice
|
||||
# loop becomes a feedback loop — the agent's spoken reply lands
|
||||
# back in the mic and gets re-submitted.
|
||||
if not _tts_playing.is_set():
|
||||
_debug("_continuous_on_silence: waiting for TTS to finish")
|
||||
_tts_playing.wait(timeout=60)
|
||||
import time as _time
|
||||
_time.sleep(0.3)
|
||||
|
||||
# User may have stopped the loop during the wait.
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: stopped while waiting for TTS")
|
||||
return
|
||||
|
||||
# Restart for the next turn.
|
||||
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
||||
_play_beep(frequency=880, count=1)
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to restart continuous recording: %s", e)
|
||||
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── TTS API ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def speak_text(text: str) -> None:
|
||||
"""Synthesize ``text`` with the configured TTS provider and play it.
|
||||
|
||||
Mirrors cli.py:_voice_speak_response exactly — same markdown strip
|
||||
pipeline, same 4000-char cap, same explicit mp3 output path, same
|
||||
MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
|
||||
of both extensions. Keeping these in sync means a voice-mode TTS
|
||||
session in the TUI sounds identical to one in the classic CLI.
|
||||
|
||||
While playback is in flight the module-level _tts_playing Event is
|
||||
cleared so the continuous-recording loop knows to wait before
|
||||
re-arming the mic (otherwise the agent's spoken reply feedback-loops
|
||||
through the microphone and the agent ends up replying to itself).
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return
|
||||
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
# Cancel any live capture before we open the speakers — otherwise the
|
||||
# last ~200ms of the user's turn tail + the first syllables of our TTS
|
||||
# both end up in the next recording window. The continuous loop will
|
||||
# re-arm itself after _tts_playing flips back (see _continuous_on_silence).
|
||||
paused_recording = False
|
||||
with _continuous_lock:
|
||||
if (
|
||||
_continuous_active
|
||||
and _continuous_recorder is not None
|
||||
and getattr(_continuous_recorder, "is_recording", False)
|
||||
):
|
||||
try:
|
||||
_continuous_recorder.cancel()
|
||||
paused_recording = True
|
||||
except Exception as e:
|
||||
logger.warning("failed to pause recorder for TTS: %s", e)
|
||||
|
||||
_tts_playing.clear()
|
||||
_debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
|
||||
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool
|
||||
|
||||
tts_text = text[:4000] if len(text) > 4000 else text
|
||||
tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text) # fenced code blocks
|
||||
tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text) # [text](url) → text
|
||||
tts_text = re.sub(r'https?://\S+', '', tts_text) # bare URLs
|
||||
tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold
|
||||
tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic
|
||||
tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # inline code
|
||||
tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers
|
||||
tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list bullets
|
||||
tts_text = re.sub(r'---+', '', tts_text) # horizontal rules
|
||||
tts_text = re.sub(r'\n{3,}', '\n\n', tts_text) # excess newlines
|
||||
tts_text = tts_text.strip()
|
||||
if not tts_text:
|
||||
return
|
||||
|
||||
# MP3 output path, pre-chosen so we can play the MP3 directly even
|
||||
# when text_to_speech_tool auto-converts to OGG for messaging
|
||||
# platforms. afplay's OGG support is flaky, MP3 always works.
|
||||
os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
|
||||
mp3_path = os.path.join(
|
||||
tempfile.gettempdir(),
|
||||
"hermes_voice",
|
||||
f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
|
||||
)
|
||||
|
||||
_debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
|
||||
text_to_speech_tool(text=tts_text, output_path=mp3_path)
|
||||
|
||||
if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
|
||||
_debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
|
||||
play_audio_file(mp3_path)
|
||||
try:
|
||||
os.unlink(mp3_path)
|
||||
ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
|
||||
if os.path.isfile(ogg_path):
|
||||
os.unlink(ogg_path)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
_debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
|
||||
except Exception as e:
|
||||
logger.warning("Voice TTS playback failed: %s", e)
|
||||
_debug(f"speak_text raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
_tts_playing.set()
|
||||
_debug("speak_text: TTS done")
|
||||
|
||||
# Re-arm the mic so the user can answer without pressing Ctrl+B.
|
||||
# Small delay lets the OS flush speaker output and afplay fully
|
||||
# release the audio device before sounddevice re-opens the input.
|
||||
if paused_recording:
|
||||
time.sleep(0.3)
|
||||
with _continuous_lock:
|
||||
if _continuous_active and _continuous_recorder is not None:
|
||||
try:
|
||||
_continuous_recorder.start(
|
||||
on_silence_stop=_continuous_on_silence
|
||||
)
|
||||
_debug("speak_text: recording resumed after TTS")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"failed to resume recorder after TTS: %s", e
|
||||
)
|
||||
+292
-21
@@ -71,6 +71,7 @@ app = FastAPI(title="Hermes Agent", version=__version__)
|
||||
# Injected into the SPA HTML so only the legitimate web UI can use it.
|
||||
# ---------------------------------------------------------------------------
|
||||
_SESSION_TOKEN = secrets.token_urlsafe(32)
|
||||
_SESSION_HEADER_NAME = "X-Hermes-Session-Token"
|
||||
|
||||
# Simple rate limiter for the reveal endpoint
|
||||
_reveal_timestamps: List[float] = []
|
||||
@@ -104,14 +105,29 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
|
||||
})
|
||||
|
||||
|
||||
def _require_token(request: Request) -> None:
|
||||
"""Validate the ephemeral session token. Raises 401 on mismatch.
|
||||
def _has_valid_session_token(request: Request) -> bool:
|
||||
"""True if the request carries a valid dashboard session token.
|
||||
|
||||
Uses ``hmac.compare_digest`` to prevent timing side-channels.
|
||||
The dedicated session header avoids collisions with reverse proxies that
|
||||
already use ``Authorization`` (for example Caddy ``basic_auth``). We still
|
||||
accept the legacy Bearer path for backward compatibility with older
|
||||
dashboard bundles.
|
||||
"""
|
||||
session_header = request.headers.get(_SESSION_HEADER_NAME, "")
|
||||
if session_header and hmac.compare_digest(
|
||||
session_header.encode(),
|
||||
_SESSION_TOKEN.encode(),
|
||||
):
|
||||
return True
|
||||
|
||||
auth = request.headers.get("authorization", "")
|
||||
expected = f"Bearer {_SESSION_TOKEN}"
|
||||
if not hmac.compare_digest(auth.encode(), expected.encode()):
|
||||
return hmac.compare_digest(auth.encode(), expected.encode())
|
||||
|
||||
|
||||
def _require_token(request: Request) -> None:
|
||||
"""Validate the ephemeral session token. Raises 401 on mismatch."""
|
||||
if not _has_valid_session_token(request):
|
||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||
|
||||
|
||||
@@ -205,9 +221,7 @@ async def auth_middleware(request: Request, call_next):
|
||||
"""Require the session token on all /api/ routes except the public list."""
|
||||
path = request.url.path
|
||||
if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
|
||||
auth = request.headers.get("authorization", "")
|
||||
expected = f"Bearer {_SESSION_TOKEN}"
|
||||
if not hmac.compare_digest(auth.encode(), expected.encode()):
|
||||
if not _has_valid_session_token(request):
|
||||
return JSONResponse(
|
||||
status_code=401,
|
||||
content={"detail": "Unauthorized"},
|
||||
@@ -417,7 +431,14 @@ class EnvVarReveal(BaseModel):
|
||||
|
||||
|
||||
_GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
|
||||
_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
|
||||
try:
|
||||
_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
|
||||
except (ValueError, TypeError):
|
||||
_log.warning(
|
||||
"Invalid GATEWAY_HEALTH_TIMEOUT value %r — using default 3.0s",
|
||||
os.getenv("GATEWAY_HEALTH_TIMEOUT"),
|
||||
)
|
||||
_GATEWAY_HEALTH_TIMEOUT = 3.0
|
||||
|
||||
|
||||
def _probe_gateway_health() -> tuple[bool, dict | None]:
|
||||
@@ -2304,8 +2325,227 @@ _BUILTIN_DASHBOARD_THEMES = [
|
||||
]
|
||||
|
||||
|
||||
def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]:
|
||||
"""Normalise a theme layer spec from YAML into `{hex, alpha}` form.
|
||||
|
||||
Accepts shorthand (a bare hex string) or full dict form. Returns
|
||||
``None`` on garbage input so the caller can fall back to a built-in
|
||||
default rather than blowing up.
|
||||
"""
|
||||
if value is None:
|
||||
return {"hex": default_hex, "alpha": default_alpha}
|
||||
if isinstance(value, str):
|
||||
return {"hex": value, "alpha": default_alpha}
|
||||
if isinstance(value, dict):
|
||||
hex_val = value.get("hex", default_hex)
|
||||
alpha_val = value.get("alpha", default_alpha)
|
||||
if not isinstance(hex_val, str):
|
||||
return None
|
||||
try:
|
||||
alpha_f = float(alpha_val)
|
||||
except (TypeError, ValueError):
|
||||
alpha_f = default_alpha
|
||||
return {"hex": hex_val, "alpha": max(0.0, min(1.0, alpha_f))}
|
||||
return None
|
||||
|
||||
|
||||
_THEME_DEFAULT_TYPOGRAPHY: Dict[str, str] = {
|
||||
"fontSans": 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
|
||||
"fontMono": 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace',
|
||||
"baseSize": "15px",
|
||||
"lineHeight": "1.55",
|
||||
"letterSpacing": "0",
|
||||
}
|
||||
|
||||
_THEME_DEFAULT_LAYOUT: Dict[str, str] = {
|
||||
"radius": "0.5rem",
|
||||
"density": "comfortable",
|
||||
}
|
||||
|
||||
_THEME_OVERRIDE_KEYS = {
|
||||
"card", "cardForeground", "popover", "popoverForeground",
|
||||
"primary", "primaryForeground", "secondary", "secondaryForeground",
|
||||
"muted", "mutedForeground", "accent", "accentForeground",
|
||||
"destructive", "destructiveForeground", "success", "warning",
|
||||
"border", "input", "ring",
|
||||
}
|
||||
|
||||
# Well-known named asset slots themes can populate. Any other keys under
|
||||
# ``assets.custom`` are exposed as ``--theme-asset-custom-<key>`` CSS vars
|
||||
# for plugin/shell use.
|
||||
_THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"}
|
||||
|
||||
# Component-style buckets themes can override. The value under each bucket
|
||||
# is a mapping from camelCase property name to CSS string; each pair emits
|
||||
# ``--component-<bucket>-<kebab-property>`` on :root. The frontend's shell
|
||||
# components (Card, App header, Backdrop, etc.) consume these vars so themes
|
||||
# can restyle chrome (clip-path, border-image, segmented progress, etc.)
|
||||
# without shipping their own CSS.
|
||||
_THEME_COMPONENT_BUCKETS = {
|
||||
"card", "header", "footer", "sidebar", "tab",
|
||||
"progress", "badge", "backdrop", "page",
|
||||
}
|
||||
|
||||
_THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"}
|
||||
|
||||
# Cap on customCSS length so a malformed/oversized theme YAML can't blow up
|
||||
# the response payload or the <style> tag. 32 KiB is plenty for every
|
||||
# practical reskin (the Strike Freedom demo is ~2 KiB).
|
||||
_THEME_CUSTOM_CSS_MAX = 32 * 1024
|
||||
|
||||
|
||||
def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Normalise a user theme YAML into the wire format `ThemeProvider`
|
||||
expects. Returns ``None`` if the theme is unusable.
|
||||
|
||||
Accepts both the full schema (palette/typography/layout) and a loose
|
||||
form with bare hex strings, so hand-written YAMLs stay friendly.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
name = data.get("name")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
return None
|
||||
|
||||
# Palette
|
||||
palette_src = data.get("palette", {}) if isinstance(data.get("palette"), dict) else {}
|
||||
# Allow top-level `colors.background` as a shorthand too.
|
||||
colors_src = data.get("colors", {}) if isinstance(data.get("colors"), dict) else {}
|
||||
|
||||
def _layer(key: str, default_hex: str, default_alpha: float = 1.0) -> Dict[str, Any]:
|
||||
spec = palette_src.get(key, colors_src.get(key))
|
||||
parsed = _parse_theme_layer(spec, default_hex, default_alpha)
|
||||
return parsed if parsed is not None else {"hex": default_hex, "alpha": default_alpha}
|
||||
|
||||
palette = {
|
||||
"background": _layer("background", "#041c1c", 1.0),
|
||||
"midground": _layer("midground", "#ffe6cb", 1.0),
|
||||
"foreground": _layer("foreground", "#ffffff", 0.0),
|
||||
"warmGlow": palette_src.get("warmGlow") or data.get("warmGlow") or "rgba(255, 189, 56, 0.35)",
|
||||
"noiseOpacity": 1.0,
|
||||
}
|
||||
raw_noise = palette_src.get("noiseOpacity", data.get("noiseOpacity"))
|
||||
try:
|
||||
palette["noiseOpacity"] = float(raw_noise) if raw_noise is not None else 1.0
|
||||
except (TypeError, ValueError):
|
||||
palette["noiseOpacity"] = 1.0
|
||||
|
||||
# Typography
|
||||
typo_src = data.get("typography", {}) if isinstance(data.get("typography"), dict) else {}
|
||||
typography = dict(_THEME_DEFAULT_TYPOGRAPHY)
|
||||
for key in ("fontSans", "fontMono", "fontDisplay", "fontUrl", "baseSize", "lineHeight", "letterSpacing"):
|
||||
val = typo_src.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
typography[key] = val
|
||||
|
||||
# Layout
|
||||
layout_src = data.get("layout", {}) if isinstance(data.get("layout"), dict) else {}
|
||||
layout = dict(_THEME_DEFAULT_LAYOUT)
|
||||
radius = layout_src.get("radius")
|
||||
if isinstance(radius, str) and radius.strip():
|
||||
layout["radius"] = radius
|
||||
density = layout_src.get("density")
|
||||
if isinstance(density, str) and density in ("compact", "comfortable", "spacious"):
|
||||
layout["density"] = density
|
||||
|
||||
# Color overrides — keep only valid keys with string values.
|
||||
overrides_src = data.get("colorOverrides", {})
|
||||
color_overrides: Dict[str, str] = {}
|
||||
if isinstance(overrides_src, dict):
|
||||
for key, val in overrides_src.items():
|
||||
if key in _THEME_OVERRIDE_KEYS and isinstance(val, str) and val.strip():
|
||||
color_overrides[key] = val
|
||||
|
||||
# Assets — named slots + arbitrary user-defined keys. Values must be
|
||||
# strings (URLs or CSS ``url(...)``/``linear-gradient(...)`` expressions).
|
||||
# We don't fetch remote assets here; the frontend just injects them as
|
||||
# CSS vars. Empty values are dropped so a theme can explicitly clear a
|
||||
# slot by setting ``hero: ""``.
|
||||
assets_out: Dict[str, Any] = {}
|
||||
assets_src = data.get("assets", {}) if isinstance(data.get("assets"), dict) else {}
|
||||
for key in _THEME_NAMED_ASSET_KEYS:
|
||||
val = assets_src.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
assets_out[key] = val
|
||||
custom_assets_src = assets_src.get("custom")
|
||||
if isinstance(custom_assets_src, dict):
|
||||
custom_assets: Dict[str, str] = {}
|
||||
for key, val in custom_assets_src.items():
|
||||
if (
|
||||
isinstance(key, str)
|
||||
and key.replace("-", "").replace("_", "").isalnum()
|
||||
and isinstance(val, str)
|
||||
and val.strip()
|
||||
):
|
||||
custom_assets[key] = val
|
||||
if custom_assets:
|
||||
assets_out["custom"] = custom_assets
|
||||
|
||||
# Custom CSS — raw CSS text the frontend injects as a scoped <style>
|
||||
# tag on theme apply. Clipped to _THEME_CUSTOM_CSS_MAX to keep the
|
||||
# payload bounded. We intentionally do NOT parse/sanitise the CSS
|
||||
# here — the dashboard is localhost-only and themes are user-authored
|
||||
# YAML in ~/.hermes/, same trust level as the config file itself.
|
||||
custom_css_val = data.get("customCSS")
|
||||
custom_css: Optional[str] = None
|
||||
if isinstance(custom_css_val, str) and custom_css_val.strip():
|
||||
custom_css = custom_css_val[:_THEME_CUSTOM_CSS_MAX]
|
||||
|
||||
# Component style overrides — per-bucket dicts of camelCase CSS
|
||||
# property -> CSS string. The frontend converts these into CSS vars
|
||||
# that shell components (Card, App header, Backdrop) consume.
|
||||
component_styles_src = data.get("componentStyles", {})
|
||||
component_styles: Dict[str, Dict[str, str]] = {}
|
||||
if isinstance(component_styles_src, dict):
|
||||
for bucket, props in component_styles_src.items():
|
||||
if bucket not in _THEME_COMPONENT_BUCKETS or not isinstance(props, dict):
|
||||
continue
|
||||
clean: Dict[str, str] = {}
|
||||
for prop, value in props.items():
|
||||
if (
|
||||
isinstance(prop, str)
|
||||
and prop.replace("-", "").replace("_", "").isalnum()
|
||||
and isinstance(value, (str, int, float))
|
||||
and str(value).strip()
|
||||
):
|
||||
clean[prop] = str(value)
|
||||
if clean:
|
||||
component_styles[bucket] = clean
|
||||
|
||||
layout_variant_src = data.get("layoutVariant")
|
||||
layout_variant = (
|
||||
layout_variant_src
|
||||
if isinstance(layout_variant_src, str) and layout_variant_src in _THEME_LAYOUT_VARIANTS
|
||||
else "standard"
|
||||
)
|
||||
|
||||
result: Dict[str, Any] = {
|
||||
"name": name,
|
||||
"label": data.get("label") or name,
|
||||
"description": data.get("description", ""),
|
||||
"palette": palette,
|
||||
"typography": typography,
|
||||
"layout": layout,
|
||||
"layoutVariant": layout_variant,
|
||||
}
|
||||
if color_overrides:
|
||||
result["colorOverrides"] = color_overrides
|
||||
if assets_out:
|
||||
result["assets"] = assets_out
|
||||
if custom_css is not None:
|
||||
result["customCSS"] = custom_css
|
||||
if component_styles:
|
||||
result["componentStyles"] = component_styles
|
||||
return result
|
||||
|
||||
|
||||
def _discover_user_themes() -> list:
|
||||
"""Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
|
||||
"""Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes.
|
||||
|
||||
Returns a list of fully-normalised theme definitions ready to ship
|
||||
to the frontend, so the client can apply them without a secondary
|
||||
round-trip or a built-in stub.
|
||||
"""
|
||||
themes_dir = get_hermes_home() / "dashboard-themes"
|
||||
if not themes_dir.is_dir():
|
||||
return []
|
||||
@@ -2313,33 +2553,42 @@ def _discover_user_themes() -> list:
|
||||
for f in sorted(themes_dir.glob("*.yaml")):
|
||||
try:
|
||||
data = yaml.safe_load(f.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict) and data.get("name"):
|
||||
result.append({
|
||||
"name": data["name"],
|
||||
"label": data.get("label", data["name"]),
|
||||
"description": data.get("description", ""),
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
normalised = _normalise_theme_definition(data)
|
||||
if normalised is not None:
|
||||
result.append(normalised)
|
||||
return result
|
||||
|
||||
|
||||
@app.get("/api/dashboard/themes")
|
||||
async def get_dashboard_themes():
|
||||
"""Return available themes and the currently active one."""
|
||||
"""Return available themes and the currently active one.
|
||||
|
||||
Built-in entries ship name/label/description only (the frontend owns
|
||||
their full definitions in `web/src/themes/presets.ts`). User themes
|
||||
from `~/.hermes/dashboard-themes/*.yaml` ship with their full
|
||||
normalised definition under `definition`, so the client can apply
|
||||
them without a stub.
|
||||
"""
|
||||
config = load_config()
|
||||
active = config.get("dashboard", {}).get("theme", "default")
|
||||
user_themes = _discover_user_themes()
|
||||
# Merge built-in + user, user themes override built-in by name.
|
||||
seen = set()
|
||||
themes = []
|
||||
for t in _BUILTIN_DASHBOARD_THEMES:
|
||||
seen.add(t["name"])
|
||||
themes.append(t)
|
||||
for t in user_themes:
|
||||
if t["name"] not in seen:
|
||||
themes.append(t)
|
||||
seen.add(t["name"])
|
||||
if t["name"] in seen:
|
||||
continue
|
||||
themes.append({
|
||||
"name": t["name"],
|
||||
"label": t["label"],
|
||||
"description": t["description"],
|
||||
"definition": t,
|
||||
})
|
||||
seen.add(t["name"])
|
||||
return {"themes": themes, "active": active}
|
||||
|
||||
|
||||
@@ -2396,13 +2645,35 @@ def _discover_dashboard_plugins() -> list:
|
||||
if name in seen_names:
|
||||
continue
|
||||
seen_names.add(name)
|
||||
# Tab options: ``path`` + ``position`` for a new tab, optional
|
||||
# ``override`` to replace a built-in route, and ``hidden`` to
|
||||
# register the plugin component/slots without adding a tab
|
||||
# (useful for slot-only plugins like a header-crest injector).
|
||||
raw_tab = data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
|
||||
tab_info = {
|
||||
"path": raw_tab.get("path", f"/{name}"),
|
||||
"position": raw_tab.get("position", "end"),
|
||||
}
|
||||
override_path = raw_tab.get("override")
|
||||
if isinstance(override_path, str) and override_path.startswith("/"):
|
||||
tab_info["override"] = override_path
|
||||
if bool(raw_tab.get("hidden")):
|
||||
tab_info["hidden"] = True
|
||||
# Slots: list of named slot locations this plugin populates.
|
||||
# The frontend exposes ``registerSlot(pluginName, slotName, Component)``
|
||||
# on window; plugins with non-empty slots call it from their JS bundle.
|
||||
slots_src = data.get("slots")
|
||||
slots: List[str] = []
|
||||
if isinstance(slots_src, list):
|
||||
slots = [s for s in slots_src if isinstance(s, str) and s]
|
||||
plugins.append({
|
||||
"name": name,
|
||||
"label": data.get("label", name),
|
||||
"description": data.get("description", ""),
|
||||
"icon": data.get("icon", "Puzzle"),
|
||||
"version": data.get("version", "0.0.0"),
|
||||
"tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
|
||||
"tab": tab_info,
|
||||
"slots": slots,
|
||||
"entry": data.get("entry", "dist/index.js"),
|
||||
"css": data.get("css"),
|
||||
"has_api": bool(data.get("api")),
|
||||
|
||||
+52
-23
@@ -288,30 +288,34 @@ def get_tool_definitions(
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
||||
break
|
||||
|
||||
# Rebuild discord_server schema based on the bot's privileged intents
|
||||
# (detected from GET /applications/@me) and the user's action allowlist
|
||||
# in config. Hides actions the bot's intents don't support so the
|
||||
# model never attempts them, and annotates fetch_messages when the
|
||||
# Rebuild discord / discord_admin schemas based on the bot's privileged
|
||||
# intents (detected from GET /applications/@me) and the user's action
|
||||
# allowlist in config. Hides actions the bot's intents don't support so
|
||||
# the model never attempts them, and annotates fetch_messages when the
|
||||
# MESSAGE_CONTENT intent is missing.
|
||||
if "discord_server" in available_tool_names:
|
||||
try:
|
||||
from tools.discord_tool import get_dynamic_schema
|
||||
dynamic = get_dynamic_schema()
|
||||
except Exception: # pragma: no cover — defensive, fall back to static
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
# Tool filtered out entirely (empty allowlist or detection disabled
|
||||
# the only remaining actions). Drop it from the schema list.
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != "discord_server"
|
||||
]
|
||||
available_tool_names.discard("discord_server")
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "discord_server":
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
_discord_schema_fns = {
|
||||
"discord": "get_dynamic_schema_core",
|
||||
"discord_admin": "get_dynamic_schema_admin",
|
||||
}
|
||||
for discord_tool_name in _discord_schema_fns:
|
||||
if discord_tool_name in available_tool_names:
|
||||
try:
|
||||
from tools import discord_tool as _dt
|
||||
schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
|
||||
dynamic = schema_fn()
|
||||
except Exception:
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != discord_tool_name
|
||||
]
|
||||
available_tool_names.discard(discord_tool_name)
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == discord_tool_name:
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
|
||||
# Strip web tool cross-references from browser_navigate description when
|
||||
# web_search / web_extract are not available. The static schema says
|
||||
@@ -418,6 +422,31 @@ def _coerce_value(value: str, expected_type):
|
||||
return _coerce_number(value, integer_only=(expected_type == "integer"))
|
||||
if expected_type == "boolean":
|
||||
return _coerce_boolean(value)
|
||||
if expected_type == "array":
|
||||
return _coerce_json(value, list)
|
||||
if expected_type == "object":
|
||||
return _coerce_json(value, dict)
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_json(value: str, expected_python_type: type):
|
||||
"""Parse *value* as JSON when the schema expects an array or object.
|
||||
|
||||
Handles model output drift where a complex oneOf/discriminated-union schema
|
||||
causes the LLM to emit the array/object as a JSON string instead of a native
|
||||
structure. Returns the original string if parsing fails or yields the wrong
|
||||
Python type.
|
||||
"""
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
except (ValueError, TypeError):
|
||||
return value
|
||||
if isinstance(parsed, expected_python_type):
|
||||
logger.debug(
|
||||
"coerce_tool_args: coerced string to %s via json.loads",
|
||||
expected_python_type.__name__,
|
||||
)
|
||||
return parsed
|
||||
return value
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,313 @@
|
||||
"""xAI image generation backend.
|
||||
|
||||
Exposes xAI's ``grok-imagine-image`` model as an
|
||||
:class:`ImageGenProvider` implementation.
|
||||
|
||||
Features:
|
||||
- Text-to-image generation
|
||||
- Multiple aspect ratios (1:1, 16:9, 9:16, etc.)
|
||||
- Multiple resolutions (1K, 2K)
|
||||
- Base64 output saved to cache
|
||||
|
||||
Selection precedence (first hit wins):
|
||||
1. ``XAI_IMAGE_MODEL`` env var
|
||||
2. ``image_gen.xai.model`` in ``config.yaml``
|
||||
3. :data:`DEFAULT_MODEL`
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from agent.image_gen_provider import (
|
||||
DEFAULT_ASPECT_RATIO,
|
||||
ImageGenProvider,
|
||||
error_response,
|
||||
resolve_aspect_ratio,
|
||||
save_b64_image,
|
||||
success_response,
|
||||
)
|
||||
from tools.xai_http import hermes_xai_user_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model catalog
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
API_MODEL = "grok-imagine-image"
|
||||
|
||||
_MODELS: Dict[str, Dict[str, Any]] = {
|
||||
"grok-imagine-image": {
|
||||
"display": "Grok Imagine Image",
|
||||
"speed": "~5-10s",
|
||||
"strengths": "Fast, high-quality",
|
||||
},
|
||||
}
|
||||
|
||||
DEFAULT_MODEL = "grok-imagine-image"
|
||||
|
||||
# xAI aspect ratios (more options than FAL/OpenAI)
|
||||
_XAI_ASPECT_RATIOS = {
|
||||
"landscape": "16:9",
|
||||
"square": "1:1",
|
||||
"portrait": "9:16",
|
||||
"4:3": "4:3",
|
||||
"3:4": "3:4",
|
||||
"3:2": "3:2",
|
||||
"2:3": "2:3",
|
||||
}
|
||||
|
||||
# xAI resolutions
|
||||
_XAI_RESOLUTIONS = {
|
||||
"1k": "1024",
|
||||
"2k": "2048",
|
||||
}
|
||||
|
||||
DEFAULT_RESOLUTION = "1k"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_xai_config() -> Dict[str, Any]:
|
||||
"""Read ``image_gen.xai`` from config.yaml."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
|
||||
xai_section = section.get("xai") if isinstance(section, dict) else None
|
||||
return xai_section if isinstance(xai_section, dict) else {}
|
||||
except Exception as exc:
|
||||
logger.debug("Could not load image_gen.xai config: %s", exc)
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_model() -> Tuple[str, Dict[str, Any]]:
|
||||
"""Decide which model to use and return ``(model_id, meta)``."""
|
||||
env_override = os.environ.get("XAI_IMAGE_MODEL")
|
||||
if env_override and env_override in _MODELS:
|
||||
return env_override, _MODELS[env_override]
|
||||
|
||||
cfg = _load_xai_config()
|
||||
candidate = cfg.get("model") if isinstance(cfg.get("model"), str) else None
|
||||
if candidate and candidate in _MODELS:
|
||||
return candidate, _MODELS[candidate]
|
||||
|
||||
return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
|
||||
|
||||
|
||||
def _resolve_resolution() -> str:
|
||||
"""Get configured resolution."""
|
||||
cfg = _load_xai_config()
|
||||
res = cfg.get("resolution") if isinstance(cfg.get("resolution"), str) else None
|
||||
if res and res in _XAI_RESOLUTIONS:
|
||||
return res
|
||||
return DEFAULT_RESOLUTION
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class XAIImageGenProvider(ImageGenProvider):
|
||||
"""xAI ``grok-imagine-image`` backend."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "xai"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "xAI (Grok)"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return bool(os.getenv("XAI_API_KEY"))
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"id": model_id,
|
||||
"display": meta.get("display", model_id),
|
||||
"speed": meta.get("speed", ""),
|
||||
"strengths": meta.get("strengths", ""),
|
||||
}
|
||||
for model_id, meta in _MODELS.items()
|
||||
]
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "xAI (Grok)",
|
||||
"badge": "paid",
|
||||
"tag": "Native xAI image generation via grok-imagine-image",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "XAI_API_KEY",
|
||||
"prompt": "xAI API key",
|
||||
"url": "https://console.x.ai/",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image using xAI's grok-imagine-image."""
|
||||
api_key = os.getenv("XAI_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
return error_response(
|
||||
error="XAI_API_KEY not set. Get one at https://console.x.ai/",
|
||||
error_type="missing_api_key",
|
||||
provider="xai",
|
||||
aspect_ratio=aspect_ratio,
|
||||
)
|
||||
|
||||
model_id, meta = _resolve_model()
|
||||
aspect = resolve_aspect_ratio(aspect_ratio)
|
||||
xai_ar = _XAI_ASPECT_RATIOS.get(aspect, "1:1")
|
||||
resolution = _resolve_resolution()
|
||||
xai_res = _XAI_RESOLUTIONS.get(resolution, "1024")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"model": API_MODEL,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": xai_ar,
|
||||
"resolution": xai_res,
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": hermes_xai_user_agent(),
|
||||
}
|
||||
|
||||
base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{base_url}/images/generations",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=120,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.HTTPError as exc:
|
||||
status = exc.response.status_code if exc.response else 0
|
||||
try:
|
||||
err_msg = exc.response.json().get("error", {}).get("message", exc.response.text[:300])
|
||||
except Exception:
|
||||
err_msg = exc.response.text[:300] if exc.response else str(exc)
|
||||
logger.error("xAI image gen failed (%d): %s", status, err_msg)
|
||||
return error_response(
|
||||
error=f"xAI image generation failed ({status}): {err_msg}",
|
||||
error_type="api_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.Timeout:
|
||||
return error_response(
|
||||
error="xAI image generation timed out (120s)",
|
||||
error_type="timeout",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
except requests.ConnectionError as exc:
|
||||
return error_response(
|
||||
error=f"xAI connection error: {exc}",
|
||||
error_type="connection_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
try:
|
||||
result = response.json()
|
||||
except Exception as exc:
|
||||
return error_response(
|
||||
error=f"xAI returned invalid JSON: {exc}",
|
||||
error_type="invalid_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
# Parse response — xAI returns data[0].b64_json or data[0].url
|
||||
data = result.get("data", [])
|
||||
if not data:
|
||||
return error_response(
|
||||
error="xAI returned no image data",
|
||||
error_type="empty_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
first = data[0]
|
||||
b64 = first.get("b64_json")
|
||||
url = first.get("url")
|
||||
|
||||
if b64:
|
||||
try:
|
||||
saved_path = save_b64_image(b64, prefix=f"xai_{model_id}")
|
||||
except Exception as exc:
|
||||
return error_response(
|
||||
error=f"Could not save image to cache: {exc}",
|
||||
error_type="io_error",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
image_ref = str(saved_path)
|
||||
elif url:
|
||||
image_ref = url
|
||||
else:
|
||||
return error_response(
|
||||
error="xAI response contained neither b64_json nor URL",
|
||||
error_type="empty_response",
|
||||
provider="xai",
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
)
|
||||
|
||||
extra: Dict[str, Any] = {
|
||||
"resolution": xai_res,
|
||||
}
|
||||
|
||||
return success_response(
|
||||
image=image_ref,
|
||||
model=model_id,
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
provider="xai",
|
||||
extra=extra,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def register(ctx: Any) -> None:
|
||||
"""Register this provider with the image gen registry."""
|
||||
ctx.register_image_gen_provider(XAIImageGenProvider())
|
||||
@@ -0,0 +1,7 @@
|
||||
name: xai
|
||||
version: 1.0.0
|
||||
description: "xAI image generation backend (grok-imagine-image). Text-to-image."
|
||||
author: Julien Talbot
|
||||
kind: backend
|
||||
requires_env:
|
||||
- XAI_API_KEY
|
||||
@@ -0,0 +1,70 @@
|
||||
# Strike Freedom Cockpit — dashboard skin demo
|
||||
|
||||
Demonstrates how the dashboard skin+plugin system can be used to build a
|
||||
fully custom cockpit-style reskin without touching the core dashboard.
|
||||
|
||||
Two pieces:
|
||||
|
||||
- `theme/strike-freedom.yaml` — a dashboard theme YAML that paints the
|
||||
palette, typography, layout variant (`cockpit`), component chrome
|
||||
(notched card corners, scanlines, accent colors), and declares asset
|
||||
slots (`hero`, `crest`, `bg`).
|
||||
- `dashboard/` — a plugin that populates the `sidebar`, `header-left`,
|
||||
and `footer-right` slots reserved by the cockpit layout. The sidebar
|
||||
renders an MS-STATUS panel with segmented telemetry bars driven by
|
||||
real agent status; the header-left injects a COMPASS crest; the
|
||||
footer-right replaces the default org tagline.
|
||||
|
||||
## Install
|
||||
|
||||
1. **Theme** — copy the theme YAML into your Hermes home:
|
||||
|
||||
```
|
||||
cp theme/strike-freedom.yaml ~/.hermes/dashboard-themes/
|
||||
```
|
||||
|
||||
2. **Plugin** — the `dashboard/` directory gets auto-discovered because
|
||||
it lives under `plugins/` in the repo. On a user install, copy the
|
||||
whole plugin directory into `~/.hermes/plugins/`:
|
||||
|
||||
```
|
||||
cp -r . ~/.hermes/plugins/strike-freedom-cockpit
|
||||
```
|
||||
|
||||
3. Restart the web UI (or `GET /api/dashboard/plugins/rescan`), open it,
|
||||
pick **Strike Freedom** from the theme switcher.
|
||||
|
||||
## Customising the artwork
|
||||
|
||||
The sidebar plugin reads `--theme-asset-hero` and `--theme-asset-crest`
|
||||
from the active theme. Drop your own URLs into the theme YAML:
|
||||
|
||||
```yaml
|
||||
assets:
|
||||
hero: "/my-images/strike-freedom.png"
|
||||
crest: "/my-images/compass-crest.svg"
|
||||
bg: "/my-images/cosmic-era-bg.jpg"
|
||||
```
|
||||
|
||||
The plugin reads those at render time — no plugin code changes needed
|
||||
to swap artwork across themes.
|
||||
|
||||
## What this demo proves
|
||||
|
||||
The dashboard skin+plugin system supports (ref: `web/src/themes/types.ts`,
|
||||
`web/src/plugins/slots.ts`):
|
||||
|
||||
- Palette, typography, font URLs, density, radius — already present
|
||||
- **Asset URLs exposed as CSS vars** (bg / hero / crest / logo /
|
||||
sidebar / header + arbitrary `custom.*`)
|
||||
- **Raw `customCSS` blocks** injected as scoped `<style>` tags
|
||||
- **Per-component style overrides** (card / header / sidebar / backdrop /
|
||||
tab / progress / footer / badge / page) via CSS vars
|
||||
- **`layoutVariant`** — `standard`, `cockpit`, or `tiled`
|
||||
- **Plugin slots** — 10 named shell slots plugins can inject into
|
||||
(`backdrop`, `header-left/right/banner`, `sidebar`, `pre-main`,
|
||||
`post-main`, `footer-left/right`, `overlay`)
|
||||
- **Route overrides** — plugins can replace a built-in page entirely
|
||||
(`tab.override: "/"`) instead of just adding a tab
|
||||
- **Hidden plugins** — slot-only plugins that never show in the nav
|
||||
(`tab.hidden: true`) — as used here
|
||||
@@ -0,0 +1,309 @@
|
||||
/**
|
||||
* Strike Freedom Cockpit — dashboard plugin demo.
|
||||
*
|
||||
* A slot-only plugin (manifest sets tab.hidden: true) that populates
|
||||
* three shell slots when the user has the ``strike-freedom`` theme
|
||||
* selected (or any theme that picks layoutVariant: cockpit):
|
||||
*
|
||||
* - sidebar → MS-STATUS panel: ENERGY / SHIELD / POWER bars,
|
||||
* ZGMF-X20A identity line, pilot block, hero
|
||||
* render (from --theme-asset-hero when the theme
|
||||
* provides one).
|
||||
* - header-left → COMPASS faction crest (uses --theme-asset-crest
|
||||
* if provided, falls back to a geometric SVG).
|
||||
* - footer-right → COSMIC ERA tagline that replaces the default
|
||||
* footer org line.
|
||||
*
|
||||
* The plugin demonstrates every extension point added alongside the
|
||||
* slot system: registerSlot, tab.hidden, reading theme asset CSS vars
|
||||
* from plugin code, and rendering above the built-in route content.
|
||||
*/
|
||||
(function () {
|
||||
"use strict";
|
||||
|
||||
const SDK = window.__HERMES_PLUGIN_SDK__;
|
||||
const PLUGINS = window.__HERMES_PLUGINS__;
|
||||
if (!SDK || !PLUGINS || !PLUGINS.registerSlot) {
|
||||
// Old dashboard bundle without slot support — bail silently rather
|
||||
// than breaking the page.
|
||||
return;
|
||||
}
|
||||
|
||||
const { React } = SDK;
|
||||
const { useState, useEffect } = SDK.hooks;
|
||||
const { api } = SDK;
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
/** Read a CSS custom property from :root. Empty string when unset. */
|
||||
function cssVar(name) {
|
||||
if (typeof document === "undefined") return "";
|
||||
return getComputedStyle(document.documentElement).getPropertyValue(name).trim();
|
||||
}
|
||||
|
||||
/** Segmented chip progress bar — 10 cells filled proportionally to value. */
|
||||
function TelemetryBar(props) {
|
||||
const { label, value, color } = props;
|
||||
const cells = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const filled = Math.round(value / 10) > i;
|
||||
cells.push(
|
||||
React.createElement("span", {
|
||||
key: i,
|
||||
style: {
|
||||
flex: 1,
|
||||
height: 8,
|
||||
background: filled ? color : "rgba(255,255,255,0.06)",
|
||||
transition: "background 200ms",
|
||||
clipPath: "polygon(2px 0, 100% 0, calc(100% - 2px) 100%, 0 100%)",
|
||||
},
|
||||
}),
|
||||
);
|
||||
}
|
||||
return React.createElement(
|
||||
"div",
|
||||
{ style: { display: "flex", flexDirection: "column", gap: 4 } },
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
display: "flex",
|
||||
justifyContent: "space-between",
|
||||
fontSize: "0.65rem",
|
||||
letterSpacing: "0.12em",
|
||||
opacity: 0.75,
|
||||
},
|
||||
},
|
||||
React.createElement("span", null, label),
|
||||
React.createElement("span", { style: { color, fontWeight: 700 } }, value + "%"),
|
||||
),
|
||||
React.createElement(
|
||||
"div",
|
||||
{ style: { display: "flex", gap: 2 } },
|
||||
cells,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Sidebar: MS-STATUS panel
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function SidebarSlot() {
|
||||
// Pull live-ish numbers from the status API so the plugin isn't just
|
||||
// a static decoration. Fall back to full bars if the API is slow /
|
||||
// unavailable.
|
||||
const [status, setStatus] = useState(null);
|
||||
useEffect(function () {
|
||||
let cancel = false;
|
||||
api.getStatus()
|
||||
.then(function (s) { if (!cancel) setStatus(s); })
|
||||
.catch(function () {});
|
||||
return function () { cancel = true; };
|
||||
}, []);
|
||||
|
||||
// Map real status signals to HUD telemetry. Energy/shield/power
|
||||
// aren't literal concepts on a software agent, so we read them from
|
||||
// adjacent signals: active sessions, gateway connected-platforms,
|
||||
// and agent-online health.
|
||||
const energy = status && status.gateway_online ? 92 : 18;
|
||||
const shield = status && status.connected_platforms
|
||||
? Math.min(100, 40 + (status.connected_platforms.length * 15))
|
||||
: 70;
|
||||
const power = status && status.active_sessions
|
||||
? Math.min(100, 55 + (status.active_sessions.length * 10))
|
||||
: 87;
|
||||
|
||||
const hero = cssVar("--theme-asset-hero");
|
||||
|
||||
return React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
padding: "1rem 0.75rem",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: "1rem",
|
||||
fontFamily: "var(--theme-font-display, sans-serif)",
|
||||
letterSpacing: "0.08em",
|
||||
textTransform: "uppercase",
|
||||
fontSize: "0.65rem",
|
||||
},
|
||||
},
|
||||
// Header line
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
borderBottom: "1px solid rgba(64,200,255,0.3)",
|
||||
paddingBottom: 8,
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: 2,
|
||||
},
|
||||
},
|
||||
React.createElement("span", { style: { opacity: 0.6 } }, "ms status"),
|
||||
React.createElement("span", { style: { fontWeight: 700, fontSize: "0.85rem" } }, "zgmf-x20a"),
|
||||
React.createElement("span", { style: { opacity: 0.6, fontSize: "0.6rem" } }, "strike freedom"),
|
||||
),
|
||||
// Hero slot — only renders when the theme provides one.
|
||||
hero
|
||||
? React.createElement("div", {
|
||||
style: {
|
||||
width: "100%",
|
||||
aspectRatio: "3 / 4",
|
||||
backgroundImage: hero,
|
||||
backgroundSize: "contain",
|
||||
backgroundPosition: "center",
|
||||
backgroundRepeat: "no-repeat",
|
||||
opacity: 0.85,
|
||||
},
|
||||
"aria-hidden": true,
|
||||
})
|
||||
: React.createElement("div", {
|
||||
style: {
|
||||
width: "100%",
|
||||
aspectRatio: "3 / 4",
|
||||
border: "1px dashed rgba(64,200,255,0.25)",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
fontSize: "0.55rem",
|
||||
opacity: 0.4,
|
||||
},
|
||||
}, "hero slot — set assets.hero in theme"),
|
||||
// Pilot block
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
borderTop: "1px solid rgba(64,200,255,0.18)",
|
||||
borderBottom: "1px solid rgba(64,200,255,0.18)",
|
||||
padding: "8px 0",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
gap: 2,
|
||||
},
|
||||
},
|
||||
React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "pilot"),
|
||||
React.createElement("span", { style: { fontWeight: 700 } }, "hermes agent"),
|
||||
React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "compass"),
|
||||
),
|
||||
// Telemetry bars
|
||||
React.createElement(TelemetryBar, { label: "energy", value: energy, color: "#ffce3a" }),
|
||||
React.createElement(TelemetryBar, { label: "shield", value: shield, color: "#3fd3ff" }),
|
||||
React.createElement(TelemetryBar, { label: "power", value: power, color: "#ff3a5e" }),
|
||||
// System online
|
||||
React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
marginTop: 4,
|
||||
padding: "6px 8px",
|
||||
border: "1px solid rgba(74,222,128,0.4)",
|
||||
color: "#4ade80",
|
||||
textAlign: "center",
|
||||
fontWeight: 700,
|
||||
fontSize: "0.6rem",
|
||||
},
|
||||
},
|
||||
status && status.gateway_online ? "system online" : "system offline",
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Header-left: COMPASS crest
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function HeaderCrestSlot() {
|
||||
const crest = cssVar("--theme-asset-crest");
|
||||
const inner = crest
|
||||
? React.createElement("div", {
|
||||
style: {
|
||||
width: 28,
|
||||
height: 28,
|
||||
backgroundImage: crest,
|
||||
backgroundSize: "contain",
|
||||
backgroundPosition: "center",
|
||||
backgroundRepeat: "no-repeat",
|
||||
},
|
||||
"aria-hidden": true,
|
||||
})
|
||||
: React.createElement(
|
||||
"svg",
|
||||
{
|
||||
width: 28,
|
||||
height: 28,
|
||||
viewBox: "0 0 28 28",
|
||||
fill: "none",
|
||||
stroke: "currentColor",
|
||||
strokeWidth: 1.5,
|
||||
"aria-hidden": true,
|
||||
},
|
||||
React.createElement("path", { d: "M14 2 L26 14 L14 26 L2 14 Z" }),
|
||||
React.createElement("path", { d: "M14 8 L20 14 L14 20 L8 14 Z" }),
|
||||
React.createElement("circle", { cx: 14, cy: 14, r: 2, fill: "currentColor" }),
|
||||
);
|
||||
return React.createElement(
|
||||
"div",
|
||||
{
|
||||
style: {
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
paddingLeft: 12,
|
||||
paddingRight: 8,
|
||||
color: "var(--color-accent, #3fd3ff)",
|
||||
},
|
||||
},
|
||||
inner,
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Footer-right: COSMIC ERA tagline
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function FooterTaglineSlot() {
|
||||
return React.createElement(
|
||||
"span",
|
||||
{
|
||||
style: {
|
||||
fontFamily: "var(--theme-font-display, sans-serif)",
|
||||
fontSize: "0.6rem",
|
||||
letterSpacing: "0.18em",
|
||||
textTransform: "uppercase",
|
||||
opacity: 0.75,
|
||||
mixBlendMode: "plus-lighter",
|
||||
},
|
||||
},
|
||||
"compass hermes systems / cosmic era 71",
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Hidden tab placeholder — tab.hidden=true means this never renders in
|
||||
// the nav, but we still register something sensible in case someone
|
||||
// manually navigates to /strike-freedom-cockpit (e.g. via a bookmark).
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
function HiddenPage() {
|
||||
return React.createElement(
|
||||
"div",
|
||||
{ style: { padding: "2rem", opacity: 0.6, fontSize: "0.8rem" } },
|
||||
"Strike Freedom cockpit is a slot-only plugin — it populates the sidebar, header, and footer instead of showing a tab page.",
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Registration
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const NAME = "strike-freedom-cockpit";
|
||||
PLUGINS.register(NAME, HiddenPage);
|
||||
PLUGINS.registerSlot(NAME, "sidebar", SidebarSlot);
|
||||
PLUGINS.registerSlot(NAME, "header-left", HeaderCrestSlot);
|
||||
PLUGINS.registerSlot(NAME, "footer-right", FooterTaglineSlot);
|
||||
})();
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "strike-freedom-cockpit",
|
||||
"label": "Strike Freedom Cockpit",
|
||||
"description": "MS-STATUS sidebar + header crest for the Strike Freedom theme",
|
||||
"icon": "Shield",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/strike-freedom-cockpit",
|
||||
"position": "end",
|
||||
"hidden": true
|
||||
},
|
||||
"slots": ["sidebar", "header-left", "footer-right"],
|
||||
"entry": "dist/index.js"
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
# Strike Freedom — Hermes dashboard theme demo
|
||||
#
|
||||
# Copy this file to ~/.hermes/dashboard-themes/strike-freedom.yaml and
|
||||
# restart the web UI (or hit `/api/dashboard/plugins/rescan`). Pair with
|
||||
# the `strike-freedom-cockpit` plugin (plugins/strike-freedom-cockpit/)
|
||||
# for the full cockpit experience — this theme paints the palette,
|
||||
# chrome, and layout; the plugin supplies the MS-STATUS sidebar + header
|
||||
# crest that the cockpit layout variant reserves space for.
|
||||
#
|
||||
# Demonstrates every theme extension point added alongside the plugin
|
||||
# slot system: palette, typography, layoutVariant, assets, customCSS,
|
||||
# componentStyles, colorOverrides.
|
||||
name: strike-freedom
|
||||
label: "Strike Freedom"
|
||||
description: "Cockpit HUD — deep navy + cyan + gold accents"
|
||||
|
||||
# ------- palette (3-layer) -------
|
||||
palette:
|
||||
background: "#05091a"
|
||||
midground: "#d8f0ff"
|
||||
foreground:
|
||||
hex: "#ffffff"
|
||||
alpha: 0
|
||||
warmGlow: "rgba(255, 199, 55, 0.24)"
|
||||
noiseOpacity: 0.7
|
||||
|
||||
# ------- typography -------
|
||||
typography:
|
||||
fontSans: '"Orbitron", "Eurostile", "Bank Gothic", "Impact", sans-serif'
|
||||
fontMono: '"Share Tech Mono", "JetBrains Mono", ui-monospace, monospace'
|
||||
fontDisplay: '"Orbitron", "Eurostile", "Impact", sans-serif'
|
||||
fontUrl: "https://fonts.googleapis.com/css2?family=Orbitron:wght@400;500;600;700;800&family=Share+Tech+Mono&display=swap"
|
||||
baseSize: "14px"
|
||||
lineHeight: "1.5"
|
||||
letterSpacing: "0.04em"
|
||||
|
||||
# ------- layout -------
|
||||
layout:
|
||||
radius: "0"
|
||||
density: "compact"
|
||||
|
||||
# ``cockpit`` reserves a 260px left rail that the shell renders when the
|
||||
# user is on this theme. A paired plugin populates the rail via the
|
||||
# ``sidebar`` slot; with no plugin the rail shows a placeholder.
|
||||
layoutVariant: cockpit
|
||||
|
||||
# ------- assets -------
|
||||
# Use any URL (https, data:, /dashboard-plugins/...) or a pre-wrapped
|
||||
# ``url(...)``/``linear-gradient(...)`` expression. The shell exposes
|
||||
# each as a CSS var so plugins can read the same imagery.
|
||||
assets:
|
||||
bg: "linear-gradient(140deg, #05091a 0%, #0a1530 55%, #102048 100%)"
|
||||
# Plugin reads --theme-asset-hero / --theme-asset-crest to populate
|
||||
# its sidebar hero render + header crest. Replace these URLs with your
|
||||
# own artwork (copy files into ~/.hermes/dashboard-themes/assets/ and
|
||||
# reference them as /dashboard-themes-assets/strike-freedom/hero.png
|
||||
# once that static route is wired up — for now use inline data URLs or
|
||||
# remote URLs).
|
||||
hero: ""
|
||||
crest: ""
|
||||
|
||||
# ------- component chrome -------
|
||||
# Each bucket's props become CSS vars (--component-<bucket>-<kebab>) that
|
||||
# built-in shell components (Card, header, sidebar, backdrop) consume.
|
||||
componentStyles:
|
||||
card:
|
||||
# Notched corners on the top-left + bottom-right — classic mecha UI.
|
||||
clipPath: "polygon(12px 0, 100% 0, 100% calc(100% - 12px), calc(100% - 12px) 100%, 0 100%, 0 12px)"
|
||||
background: "linear-gradient(180deg, rgba(10, 22, 52, 0.85) 0%, rgba(5, 9, 26, 0.92) 100%)"
|
||||
boxShadow: "inset 0 0 0 1px rgba(64, 200, 255, 0.28), 0 0 18px -6px rgba(64, 200, 255, 0.4)"
|
||||
header:
|
||||
background: "linear-gradient(180deg, rgba(16, 32, 72, 0.95) 0%, rgba(5, 9, 26, 0.9) 100%)"
|
||||
sidebar:
|
||||
background: "linear-gradient(180deg, rgba(8, 18, 42, 0.88) 0%, rgba(5, 9, 26, 0.85) 100%)"
|
||||
tab:
|
||||
clipPath: "polygon(6px 0, 100% 0, calc(100% - 6px) 100%, 0 100%)"
|
||||
backdrop:
|
||||
backgroundSize: "cover"
|
||||
backgroundPosition: "center"
|
||||
fillerOpacity: "1"
|
||||
fillerBlendMode: "normal"
|
||||
|
||||
# ------- color overrides -------
|
||||
colorOverrides:
|
||||
primary: "#ffce3a"
|
||||
primaryForeground: "#05091a"
|
||||
accent: "#3fd3ff"
|
||||
accentForeground: "#05091a"
|
||||
ring: "#3fd3ff"
|
||||
success: "#4ade80"
|
||||
warning: "#ffce3a"
|
||||
destructive: "#ff3a5e"
|
||||
border: "rgba(64, 200, 255, 0.28)"
|
||||
|
||||
# ------- customCSS -------
|
||||
# Raw CSS injected as a scoped <style> tag on theme apply. Use this for
|
||||
# selector-level tweaks componentStyles can't express (pseudo-elements,
|
||||
# animations, media queries). Bounded to 32 KiB per theme.
|
||||
customCSS: |
|
||||
/* Scanline overlay — subtle, only when theme is active. */
|
||||
:root[data-layout-variant="cockpit"] body::before {
|
||||
content: "";
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
pointer-events: none;
|
||||
z-index: 100;
|
||||
background: repeating-linear-gradient(
|
||||
to bottom,
|
||||
transparent 0px,
|
||||
transparent 2px,
|
||||
rgba(64, 200, 255, 0.035) 3px,
|
||||
rgba(64, 200, 255, 0.035) 4px
|
||||
);
|
||||
mix-blend-mode: screen;
|
||||
}
|
||||
|
||||
/* Chevron pips on card corners. */
|
||||
[data-layout-variant="cockpit"] .border-border::before,
|
||||
[data-layout-variant="cockpit"] .border-border::after {
|
||||
content: "";
|
||||
position: absolute;
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border: 1px solid rgba(64, 200, 255, 0.55);
|
||||
pointer-events: none;
|
||||
}
|
||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "hermes-agent"
|
||||
version = "0.10.0"
|
||||
version = "0.11.0"
|
||||
description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
+35
-2
@@ -262,6 +262,7 @@ _MAX_TOOL_WORKERS = 8
|
||||
_DESTRUCTIVE_PATTERNS = re.compile(
|
||||
r"""(?:^|\s|&&|\|\||;|`)(?:
|
||||
rm\s|rmdir\s|
|
||||
cp\s|install\s|
|
||||
mv\s|
|
||||
sed\s+-i|
|
||||
truncate\s|
|
||||
@@ -1548,6 +1549,17 @@ class AIAgent:
|
||||
_agent_section = {}
|
||||
self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
|
||||
|
||||
# App-level API retry count (wraps each model API call). Default 3,
|
||||
# overridable via agent.api_max_retries in config.yaml. See #11616.
|
||||
try:
|
||||
_raw_api_retries = _agent_section.get("api_max_retries", 3)
|
||||
_api_retries = int(_raw_api_retries)
|
||||
if _api_retries < 1:
|
||||
_api_retries = 1 # 1 = no retry (single attempt)
|
||||
except (TypeError, ValueError):
|
||||
_api_retries = 3
|
||||
self._api_max_retries = _api_retries
|
||||
|
||||
# Initialize context compressor for automatic context management
|
||||
# Compresses conversation when approaching model's context limit
|
||||
# Configuration via config.yaml (compression section)
|
||||
@@ -9259,7 +9271,7 @@ class AIAgent:
|
||||
|
||||
api_start_time = time.time()
|
||||
retry_count = 0
|
||||
max_retries = 3
|
||||
max_retries = self._api_max_retries
|
||||
primary_recovery_attempted = False
|
||||
max_compression_attempts = 3
|
||||
codex_auth_retry_attempted=False
|
||||
@@ -10563,9 +10575,30 @@ class AIAgent:
|
||||
# Error is about the INPUT being too large — reduce context_length.
|
||||
# Try to parse the actual limit from the error message
|
||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
_provider_lower = (getattr(self, "provider", "") or "").lower()
|
||||
_base_lower = (getattr(self, "base_url", "") or "").rstrip("/").lower()
|
||||
is_minimax_provider = (
|
||||
_provider_lower in {"minimax", "minimax-cn"}
|
||||
or _base_lower.startswith((
|
||||
"https://api.minimax.io/anthropic",
|
||||
"https://api.minimaxi.com/anthropic",
|
||||
))
|
||||
)
|
||||
minimax_delta_only_overflow = (
|
||||
is_minimax_provider
|
||||
and parsed_limit is None
|
||||
and "context window exceeds limit (" in error_msg
|
||||
)
|
||||
if parsed_limit and parsed_limit < old_ctx:
|
||||
new_ctx = parsed_limit
|
||||
self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
|
||||
self._vprint(f"{self.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
|
||||
elif minimax_delta_only_overflow:
|
||||
new_ctx = old_ctx
|
||||
self._vprint(
|
||||
f"{self.log_prefix}Provider reported overflow amount only; "
|
||||
f"keeping context_length at {old_ctx:,} tokens and compressing.",
|
||||
force=True,
|
||||
)
|
||||
else:
|
||||
# Step down to the next probe tier
|
||||
new_ctx = get_next_probe_tier(old_ctx)
|
||||
|
||||
@@ -44,6 +44,9 @@ AUTHOR_MAP = {
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"343873859@qq.com": "DrStrangerUJN",
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
"130918800+devorun@users.noreply.github.com": "devorun",
|
||||
"maks.mir@yahoo.com": "say8hi",
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -164,7 +167,10 @@ AUTHOR_MAP = {
|
||||
"socrates1024@gmail.com": "socrates1024",
|
||||
"seanalt555@gmail.com": "Salt-555",
|
||||
"satelerd@gmail.com": "satelerd",
|
||||
"dan@danlynn.com": "danklynn",
|
||||
"mattmaximo@hotmail.com": "MattMaximo",
|
||||
"numman.ali@gmail.com": "nummanali",
|
||||
"rohithsaimidigudla@gmail.com": "whitehatjr1001",
|
||||
"0xNyk@users.noreply.github.com": "0xNyk",
|
||||
"0xnykcd@googlemail.com": "0xNyk",
|
||||
"buraysandro9@gmail.com": "buray",
|
||||
@@ -409,6 +415,36 @@ AUTHOR_MAP = {
|
||||
"caliberoviv@gmail.com": "vivganes",
|
||||
"michaelfackerell@gmail.com": "MikeFac",
|
||||
"18024642@qq.com": "GuyCui",
|
||||
"eumael.mkt@gmail.com": "maelrx",
|
||||
# v0.11.0 additions
|
||||
"benbarclay@gmail.com": "benbarclay",
|
||||
"lijiawen@umich.edu": "Jiawen-lee",
|
||||
"oleksiy@kovyrin.net": "kovyrin",
|
||||
"kovyrin.claw@gmail.com": "kovyrin",
|
||||
"kaiobarb@gmail.com": "liftaris",
|
||||
"me@arihantsethia.com": "arihantsethia",
|
||||
"zhuofengwang2003@gmail.com": "coekfung",
|
||||
"teknium@noreply.github.com": "teknium1",
|
||||
"2114364329@qq.com": "cuyua9",
|
||||
"2557058999@qq.com": "Disaster-Terminator",
|
||||
"cine.dreamer.one@gmail.com": "LeonSGP43",
|
||||
"leozeli@qq.com": "leozeli",
|
||||
"linlehao@cuhk.edu.cn": "LehaoLin",
|
||||
"liutong@isacas.ac.cn": "I3eg1nner",
|
||||
"peterberthelsen@Peters-MacBook-Air.local": "PeterBerthelsen",
|
||||
"root@debian.debian": "lengxii",
|
||||
"roque@priveperfumeshn.com": "priveperfumes",
|
||||
"shijianzhi@shijianzhideMacBook-Pro.local": "sjz-ks",
|
||||
"topcheer@me.com": "topcheer",
|
||||
"walli@tencent.com": "walli",
|
||||
"zhuofengwang@tencent.com": "Zhuofeng-Wang",
|
||||
# no-github-match — keep as display names
|
||||
"clio-agent@sisyphuslabs.ai": "Sisyphus",
|
||||
"marco@rutimka.de": "Marco Rutsch",
|
||||
"paul@gamma.app": "Paul Bergeron",
|
||||
"zhangxicen@example.com": "zhangxicen",
|
||||
"codex@openai.invalid": "teknium1",
|
||||
"screenmachine@gmail.com": "teknium1",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
---
|
||||
name: design-md
|
||||
description: Author, validate, diff, and export DESIGN.md files — Google's open-source format spec that gives coding agents a persistent, structured understanding of a design system (tokens + rationale in one file). Use when building a design system, porting style rules between projects, generating UI with consistent brand, or auditing accessibility/contrast.
|
||||
version: 1.0.0
|
||||
author: Hermes Agent
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [design, design-system, tokens, ui, accessibility, wcag, tailwind, dtcg, google]
|
||||
related_skills: [popular-web-designs, excalidraw, architecture-diagram]
|
||||
---
|
||||
|
||||
# DESIGN.md Skill
|
||||
|
||||
DESIGN.md is Google's open spec (Apache-2.0, `google-labs-code/design.md`) for
|
||||
describing a visual identity to coding agents. One file combines:
|
||||
|
||||
- **YAML front matter** — machine-readable design tokens (normative values)
|
||||
- **Markdown body** — human-readable rationale, organized into canonical sections
|
||||
|
||||
Tokens give exact values. Prose tells agents *why* those values exist and how to
|
||||
apply them. The CLI (`npx @google/design.md`) lints structure + WCAG contrast,
|
||||
diffs versions for regressions, and exports to Tailwind or W3C DTCG JSON.
|
||||
|
||||
## When to use this skill
|
||||
|
||||
- User asks for a DESIGN.md file, design tokens, or a design system spec
|
||||
- User wants consistent UI/brand across multiple projects or tools
|
||||
- User pastes an existing DESIGN.md and asks to lint, diff, export, or extend it
|
||||
- User asks to port a style guide into a format agents can consume
|
||||
- User wants contrast / WCAG accessibility validation on their color palette
|
||||
|
||||
For purely visual inspiration or layout examples, use `popular-web-designs`
|
||||
instead. This skill is for the *formal spec file* itself.
|
||||
|
||||
## File anatomy
|
||||
|
||||
```md
|
||||
---
|
||||
version: alpha
|
||||
name: Heritage
|
||||
description: Architectural minimalism meets journalistic gravitas.
|
||||
colors:
|
||||
primary: "#1A1C1E"
|
||||
secondary: "#6C7278"
|
||||
tertiary: "#B8422E"
|
||||
neutral: "#F7F5F2"
|
||||
typography:
|
||||
h1:
|
||||
fontFamily: Public Sans
|
||||
fontSize: 3rem
|
||||
fontWeight: 700
|
||||
lineHeight: 1.1
|
||||
letterSpacing: "-0.02em"
|
||||
body-md:
|
||||
fontFamily: Public Sans
|
||||
fontSize: 1rem
|
||||
rounded:
|
||||
sm: 4px
|
||||
md: 8px
|
||||
lg: 16px
|
||||
spacing:
|
||||
sm: 8px
|
||||
md: 16px
|
||||
lg: 24px
|
||||
components:
|
||||
button-primary:
|
||||
backgroundColor: "{colors.tertiary}"
|
||||
textColor: "#FFFFFF"
|
||||
rounded: "{rounded.sm}"
|
||||
padding: 12px
|
||||
button-primary-hover:
|
||||
backgroundColor: "{colors.primary}"
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Architectural Minimalism meets Journalistic Gravitas...
|
||||
|
||||
## Colors
|
||||
|
||||
- **Primary (#1A1C1E):** Deep ink for headlines and core text.
|
||||
- **Tertiary (#B8422E):** "Boston Clay" — the sole driver for interaction.
|
||||
|
||||
## Typography
|
||||
|
||||
Public Sans for everything except small all-caps labels...
|
||||
|
||||
## Components
|
||||
|
||||
`button-primary` is the only high-emphasis action on a page...
|
||||
```
|
||||
|
||||
## Token types
|
||||
|
||||
| Type | Format | Example |
|
||||
|------|--------|---------|
|
||||
| Color | `#` + hex (sRGB) | `"#1A1C1E"` |
|
||||
| Dimension | number + unit (`px`, `em`, `rem`) | `48px`, `-0.02em` |
|
||||
| Token reference | `{path.to.token}` | `{colors.primary}` |
|
||||
| Typography | object with `fontFamily`, `fontSize`, `fontWeight`, `lineHeight`, `letterSpacing`, `fontFeature`, `fontVariation` | see above |
|
||||
|
||||
Component property whitelist: `backgroundColor`, `textColor`, `typography`,
|
||||
`rounded`, `padding`, `size`, `height`, `width`. Variants (hover, active,
|
||||
pressed) are **separate component entries** with related key names
|
||||
(`button-primary-hover`), not nested.
|
||||
|
||||
## Canonical section order
|
||||
|
||||
Sections are optional, but present ones MUST appear in this order. Duplicate
|
||||
headings reject the file.
|
||||
|
||||
1. Overview (alias: Brand & Style)
|
||||
2. Colors
|
||||
3. Typography
|
||||
4. Layout (alias: Layout & Spacing)
|
||||
5. Elevation & Depth (alias: Elevation)
|
||||
6. Shapes
|
||||
7. Components
|
||||
8. Do's and Don'ts
|
||||
|
||||
Unknown sections are preserved, not errored. Unknown token names are accepted
|
||||
if the value type is valid. Unknown component properties produce a warning.
|
||||
|
||||
## Workflow: authoring a new DESIGN.md
|
||||
|
||||
1. **Ask the user** (or infer) the brand tone, accent color, and typography
|
||||
direction. If they provided a site, image, or vibe, translate it to the
|
||||
token shape above.
|
||||
2. **Write `DESIGN.md`** in their project root using `write_file`. Always
|
||||
include `name:` and `colors:`; other sections optional but encouraged.
|
||||
3. **Use token references** (`{colors.primary}`) in the `components:` section
|
||||
instead of re-typing hex values. Keeps the palette single-source.
|
||||
4. **Lint it** (see below). Fix any broken references or WCAG failures
|
||||
before returning.
|
||||
5. **If the user has an existing project**, also write Tailwind or DTCG
|
||||
exports next to the file (`tailwind.theme.json`, `tokens.json`).
|
||||
|
||||
## Workflow: lint / diff / export
|
||||
|
||||
The CLI is `@google/design.md` (Node). Use `npx` — no global install needed.
|
||||
|
||||
```bash
|
||||
# Validate structure + token references + WCAG contrast
|
||||
npx -y @google/design.md lint DESIGN.md
|
||||
|
||||
# Compare two versions, fail on regression (exit 1 = regression)
|
||||
npx -y @google/design.md diff DESIGN.md DESIGN-v2.md
|
||||
|
||||
# Export to Tailwind theme JSON
|
||||
npx -y @google/design.md export --format tailwind DESIGN.md > tailwind.theme.json
|
||||
|
||||
# Export to W3C DTCG (Design Tokens Format Module) JSON
|
||||
npx -y @google/design.md export --format dtcg DESIGN.md > tokens.json
|
||||
|
||||
# Print the spec itself — useful when injecting into an agent prompt
|
||||
npx -y @google/design.md spec --rules-only --format json
|
||||
```
|
||||
|
||||
All commands accept `-` for stdin. `lint` returns exit 1 on errors. Use the
|
||||
`--format json` flag and parse the output if you need to report findings
|
||||
structurally.
|
||||
|
||||
### Lint rule reference (what the 7 rules catch)
|
||||
|
||||
- `broken-ref` (error) — `{colors.missing}` points at a non-existent token
|
||||
- `duplicate-section` (error) — same `## Heading` appears twice
|
||||
- `invalid-color`, `invalid-dimension`, `invalid-typography` (error)
|
||||
- `wcag-contrast` (warning/info) — component `textColor` vs `backgroundColor`
|
||||
ratio against WCAG AA (4.5:1) and AAA (7:1)
|
||||
- `unknown-component-property` (warning) — outside the whitelist above
|
||||
|
||||
When the user cares about accessibility, call this out explicitly in your
|
||||
summary — WCAG findings are the most load-bearing reason to use the CLI.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
- **Don't nest component variants.** `button-primary.hover` is wrong;
|
||||
`button-primary-hover` as a sibling key is right.
|
||||
- **Hex colors must be quoted strings.** YAML will otherwise choke on `#` or
|
||||
truncate values like `#1A1C1E` oddly.
|
||||
- **Negative dimensions need quotes too.** `letterSpacing: -0.02em` parses as
|
||||
a YAML flow — write `letterSpacing: "-0.02em"`.
|
||||
- **Section order is enforced.** If the user gives you prose in a random order,
|
||||
reorder it to match the canonical list before saving.
|
||||
- **`version: alpha` is the current spec version** (as of Apr 2026). The spec
|
||||
is marked alpha — watch for breaking changes.
|
||||
- **Token references resolve by dotted path.** `{colors.primary}` works;
|
||||
`{primary}` does not.
|
||||
|
||||
## Spec source of truth
|
||||
|
||||
- Repo: https://github.com/google-labs-code/design.md (Apache-2.0)
|
||||
- CLI: `@google/design.md` on npm
|
||||
- License of generated DESIGN.md files: whatever the user's project uses;
|
||||
the spec itself is Apache-2.0.
|
||||
@@ -0,0 +1,99 @@
|
||||
---
|
||||
version: alpha
|
||||
name: MyBrand
|
||||
description: One-sentence description of the visual identity.
|
||||
colors:
|
||||
primary: "#0F172A"
|
||||
secondary: "#64748B"
|
||||
tertiary: "#2563EB"
|
||||
neutral: "#F8FAFC"
|
||||
on-primary: "#FFFFFF"
|
||||
on-tertiary: "#FFFFFF"
|
||||
typography:
|
||||
h1:
|
||||
fontFamily: Inter
|
||||
fontSize: 3rem
|
||||
fontWeight: 700
|
||||
lineHeight: 1.1
|
||||
letterSpacing: "-0.02em"
|
||||
h2:
|
||||
fontFamily: Inter
|
||||
fontSize: 2rem
|
||||
fontWeight: 600
|
||||
lineHeight: 1.2
|
||||
body-md:
|
||||
fontFamily: Inter
|
||||
fontSize: 1rem
|
||||
lineHeight: 1.5
|
||||
label-caps:
|
||||
fontFamily: Inter
|
||||
fontSize: 0.75rem
|
||||
fontWeight: 600
|
||||
letterSpacing: "0.08em"
|
||||
rounded:
|
||||
sm: 4px
|
||||
md: 8px
|
||||
lg: 16px
|
||||
full: 9999px
|
||||
spacing:
|
||||
xs: 4px
|
||||
sm: 8px
|
||||
md: 16px
|
||||
lg: 24px
|
||||
xl: 48px
|
||||
components:
|
||||
button-primary:
|
||||
backgroundColor: "{colors.tertiary}"
|
||||
textColor: "{colors.on-tertiary}"
|
||||
rounded: "{rounded.sm}"
|
||||
padding: 12px
|
||||
button-primary-hover:
|
||||
backgroundColor: "{colors.primary}"
|
||||
textColor: "{colors.on-primary}"
|
||||
card:
|
||||
backgroundColor: "{colors.neutral}"
|
||||
textColor: "{colors.primary}"
|
||||
rounded: "{rounded.md}"
|
||||
padding: 24px
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Describe the voice and feel of the brand in one or two paragraphs. What mood
|
||||
does it evoke? What emotional response should a user have on first impression?
|
||||
|
||||
## Colors
|
||||
|
||||
- **Primary ({colors.primary}):** Core text, headlines, high-emphasis surfaces.
|
||||
- **Secondary ({colors.secondary}):** Supporting text, borders, metadata.
|
||||
- **Tertiary ({colors.tertiary}):** Interaction driver — buttons, links,
|
||||
selected states. Use sparingly to preserve its signal.
|
||||
- **Neutral ({colors.neutral}):** Page background and surface fills.
|
||||
|
||||
## Typography
|
||||
|
||||
Inter for everything. Weight and size carry hierarchy, not font family. Tight
|
||||
letter-spacing on display sizes; default tracking on body.
|
||||
|
||||
## Layout
|
||||
|
||||
Spacing scale is a 4px baseline. Use `md` (16px) for intra-component gaps,
|
||||
`lg` (24px) for inter-component gaps, `xl` (48px) for section breaks.
|
||||
|
||||
## Shapes
|
||||
|
||||
Rounded corners are modest — `sm` on interactive elements, `md` on cards.
|
||||
`full` is reserved for avatars and pill badges.
|
||||
|
||||
## Components
|
||||
|
||||
- `button-primary` is the only high-emphasis action per screen.
|
||||
- `card` is the default surface for grouped content. No shadow by default.
|
||||
|
||||
## Do's and Don'ts
|
||||
|
||||
- **Do** use token references (`{colors.primary}`) instead of literal hex in
|
||||
component definitions.
|
||||
- **Don't** introduce colors outside the palette — extend the palette first.
|
||||
- **Don't** nest component variants. `button-primary-hover` is a sibling,
|
||||
not a child.
|
||||
@@ -447,6 +447,34 @@ class TestExplicitProviderRouting:
|
||||
adapter = client.chat.completions
|
||||
assert adapter._is_oauth is False
|
||||
|
||||
def test_explicit_openrouter_pool_exhausted_logs_precise_warning(self, monkeypatch, caplog):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)):
|
||||
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
client, model = resolve_provider_client("openrouter")
|
||||
assert client is None
|
||||
assert model is None
|
||||
assert any(
|
||||
"credential pool has no usable entries" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
assert not any(
|
||||
"OPENROUTER_API_KEY not set" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
|
||||
def test_explicit_openrouter_missing_env_keeps_not_set_warning(self, monkeypatch, caplog):
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
|
||||
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
|
||||
client, model = resolve_provider_client("openrouter")
|
||||
assert client is None
|
||||
assert model is None
|
||||
assert any(
|
||||
"OPENROUTER_API_KEY not set" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
|
||||
class TestGetTextAuxiliaryClient:
|
||||
"""Test the full resolution chain for get_text_auxiliary_client."""
|
||||
|
||||
|
||||
@@ -245,7 +245,7 @@ class TestResolveVisionMainFirst:
|
||||
assert model == "xiaomi/mimo-v2-omni"
|
||||
|
||||
def test_exotic_provider_with_vision_override_preserved(self):
|
||||
"""xiaomi → mimo-v2-omni override still wins over main_model."""
|
||||
"""xiaomi → mimo-v2.5 override still wins over main_model."""
|
||||
with patch(
|
||||
"agent.auxiliary_client._read_main_provider", return_value="xiaomi",
|
||||
), patch(
|
||||
@@ -257,15 +257,15 @@ class TestResolveVisionMainFirst:
|
||||
"agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("auto", None, None, None, None),
|
||||
):
|
||||
mock_resolve.return_value = (MagicMock(), "mimo-v2-omni")
|
||||
mock_resolve.return_value = (MagicMock(), "mimo-v2.5")
|
||||
|
||||
from agent.auxiliary_client import resolve_vision_provider_client
|
||||
|
||||
provider, client, model = resolve_vision_provider_client()
|
||||
|
||||
assert provider == "xiaomi"
|
||||
# Should use mimo-v2-omni (vision override), not mimo-v2-pro (text main)
|
||||
assert mock_resolve.call_args.args[1] == "mimo-v2-omni"
|
||||
# Should use mimo-v2.5 (vision override), not mimo-v2-pro (text main)
|
||||
assert mock_resolve.call_args.args[1] == "mimo-v2.5"
|
||||
|
||||
def test_main_unavailable_vision_falls_through_to_aggregators(self):
|
||||
"""Main provider fails → fall back to OpenRouter/Nous strict backends."""
|
||||
|
||||
@@ -333,66 +333,6 @@ def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch):
|
||||
assert persisted["last_error_code"] == 402
|
||||
|
||||
|
||||
def test_try_refresh_current_updates_only_current_entry(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"openai-codex": [
|
||||
{
|
||||
"id": "cred-1",
|
||||
"label": "primary",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "device_code",
|
||||
"access_token": "access-old",
|
||||
"refresh_token": "refresh-old",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
},
|
||||
{
|
||||
"id": "cred-2",
|
||||
"label": "secondary",
|
||||
"auth_type": "oauth",
|
||||
"priority": 1,
|
||||
"source": "device_code",
|
||||
"access_token": "access-other",
|
||||
"refresh_token": "refresh-other",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
},
|
||||
]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth.refresh_codex_oauth_pure",
|
||||
lambda access_token, refresh_token, timeout_seconds=20.0: {
|
||||
"access_token": "access-new",
|
||||
"refresh_token": "refresh-new",
|
||||
},
|
||||
)
|
||||
|
||||
pool = load_pool("openai-codex")
|
||||
current = pool.select()
|
||||
assert current.id == "cred-1"
|
||||
|
||||
refreshed = pool.try_refresh_current()
|
||||
|
||||
assert refreshed is not None
|
||||
assert refreshed.access_token == "access-new"
|
||||
|
||||
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
primary, secondary = auth_payload["credential_pool"]["openai-codex"]
|
||||
assert primary["access_token"] == "access-new"
|
||||
assert primary["refresh_token"] == "refresh-new"
|
||||
assert secondary["access_token"] == "access-other"
|
||||
assert secondary["refresh_token"] == "refresh-other"
|
||||
|
||||
|
||||
def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded")
|
||||
|
||||
@@ -56,6 +56,7 @@ class TestFailoverReason:
|
||||
"overloaded", "server_error", "timeout",
|
||||
"context_overflow", "payload_too_large",
|
||||
"model_not_found", "format_error",
|
||||
"provider_policy_blocked",
|
||||
"thinking_signature", "long_context_tier", "unknown",
|
||||
}
|
||||
actual = {r.value for r in FailoverReason}
|
||||
@@ -308,6 +309,59 @@ class TestClassifyApiError:
|
||||
assert result.retryable is True
|
||||
assert result.should_fallback is False
|
||||
|
||||
# ── Provider policy-block (OpenRouter privacy/guardrail) ──
|
||||
|
||||
def test_404_openrouter_policy_blocked(self):
|
||||
# Real OpenRouter error when the user's account privacy setting
|
||||
# excludes the only endpoint serving a model (e.g. DeepSeek V4 Pro
|
||||
# which is hosted only by DeepSeek, and their endpoint may log
|
||||
# inputs). Must NOT classify as model_not_found — the model
|
||||
# exists, falling back won't help (same account setting applies),
|
||||
# and the error body already tells the user where to fix it.
|
||||
e = MockAPIError(
|
||||
"No endpoints available matching your guardrail restrictions "
|
||||
"and data policy. Configure: https://openrouter.ai/settings/privacy",
|
||||
status_code=404,
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.provider_policy_blocked
|
||||
assert result.retryable is False
|
||||
assert result.should_fallback is False
|
||||
|
||||
def test_400_openrouter_policy_blocked(self):
|
||||
# Defense-in-depth: if OpenRouter ever returns this as 400 instead
|
||||
# of 404, still classify it distinctly rather than as format_error
|
||||
# or model_not_found.
|
||||
e = MockAPIError(
|
||||
"No endpoints available matching your data policy",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.provider_policy_blocked
|
||||
assert result.retryable is False
|
||||
assert result.should_fallback is False
|
||||
|
||||
def test_message_only_openrouter_policy_blocked(self):
|
||||
# No status code — classifier should still catch the fingerprint
|
||||
# via the message-pattern fallback.
|
||||
e = Exception(
|
||||
"No endpoints available matching your guardrail restrictions "
|
||||
"and data policy"
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.provider_policy_blocked
|
||||
|
||||
def test_404_model_not_found_still_works(self):
|
||||
# Regression guard: the new policy-block check must not swallow
|
||||
# genuine model_not_found 404s.
|
||||
e = MockAPIError(
|
||||
"openrouter/nonexistent-model is not a valid model ID",
|
||||
status_code=404,
|
||||
)
|
||||
result = classify_api_error(e)
|
||||
assert result.reason == FailoverReason.model_not_found
|
||||
assert result.should_fallback is True
|
||||
|
||||
# ── Payload too large ──
|
||||
|
||||
def test_413_payload_too_large(self):
|
||||
|
||||
@@ -200,6 +200,126 @@ class TestDefaultContextLengths:
|
||||
assert len(DEFAULT_CONTEXT_LENGTHS) >= 10
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Codex OAuth context-window resolution (provider="openai-codex")
|
||||
# =========================================================================
|
||||
|
||||
class TestCodexOAuthContextLength:
|
||||
"""ChatGPT Codex OAuth imposes lower context limits than the direct
|
||||
OpenAI API for the same slugs. Verified Apr 2026 via live probe of
|
||||
chatgpt.com/backend-api/codex/models: every model returns 272k, while
|
||||
models.dev reports 1.05M for gpt-5.5/gpt-5.4 and 400k for the rest.
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
import agent.model_metadata as mm
|
||||
mm._codex_oauth_context_cache = {}
|
||||
mm._codex_oauth_context_cache_time = 0.0
|
||||
|
||||
def test_fallback_table_used_without_token(self):
|
||||
"""With no access token, the hardcoded Codex fallback table wins
|
||||
over models.dev (which reports 1.05M for gpt-5.5 but Codex is 272k).
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
with patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
for model in (
|
||||
"gpt-5.5",
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.1-codex-max",
|
||||
"gpt-5.1-codex-mini",
|
||||
):
|
||||
ctx = get_model_context_length(
|
||||
model=model,
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx == 272_000, (
|
||||
f"Codex {model}: expected 272000 fallback, got {ctx} "
|
||||
"(models.dev leakage?)"
|
||||
)
|
||||
|
||||
def test_live_probe_overrides_fallback(self):
|
||||
"""When a token is provided, the live /models probe is preferred
|
||||
and its context_window drives the result."""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.status_code = 200
|
||||
fake_response.json.return_value = {
|
||||
"models": [
|
||||
{"slug": "gpt-5.5", "context_window": 300_000},
|
||||
{"slug": "gpt-5.4", "context_window": 400_000},
|
||||
]
|
||||
}
|
||||
|
||||
with patch("agent.model_metadata.requests.get", return_value=fake_response), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
ctx_55 = get_model_context_length(
|
||||
model="gpt-5.5",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="fake-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
ctx_54 = get_model_context_length(
|
||||
model="gpt-5.4",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="fake-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx_55 == 300_000
|
||||
assert ctx_54 == 400_000
|
||||
|
||||
def test_probe_failure_falls_back_to_hardcoded(self):
|
||||
"""If the probe fails (non-200 / network error), we still return
|
||||
the hardcoded 272k rather than leaking through to models.dev 1.05M."""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.status_code = 401
|
||||
fake_response.json.return_value = {}
|
||||
|
||||
with patch("agent.model_metadata.requests.get", return_value=fake_response), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.model_metadata.save_context_length"):
|
||||
ctx = get_model_context_length(
|
||||
model="gpt-5.5",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="expired-token",
|
||||
provider="openai-codex",
|
||||
)
|
||||
assert ctx == 272_000
|
||||
|
||||
def test_non_codex_providers_unaffected(self):
|
||||
"""Resolving gpt-5.5 on non-Codex providers must NOT use the Codex
|
||||
272k override — OpenRouter / direct OpenAI API have different limits.
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
# OpenRouter — should hit its own catalog path first; when mocked
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
|
||||
with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
patch("agent.models_dev.lookup_models_dev_context", return_value=None):
|
||||
ctx = get_model_context_length(
|
||||
model="openai/gpt-5.5",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_key="",
|
||||
provider="openrouter",
|
||||
)
|
||||
assert ctx == 400_000, (
|
||||
f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
|
||||
"leaked outside openai-codex provider"
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# get_model_context_length — resolution order
|
||||
# =========================================================================
|
||||
@@ -621,6 +741,10 @@ class TestParseContextLimitFromError:
|
||||
msg = "Error: context window of 4096 tokens exceeded"
|
||||
assert parse_context_limit_from_error(msg) == 4096
|
||||
|
||||
def test_minimax_delta_only_message_returns_none(self):
|
||||
msg = "invalid params, context window exceeds limit (2013)"
|
||||
assert parse_context_limit_from_error(msg) is None
|
||||
|
||||
def test_completely_unrelated_error(self):
|
||||
assert parse_context_limit_from_error("Invalid API key") is None
|
||||
|
||||
|
||||
@@ -0,0 +1,254 @@
|
||||
"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer.
|
||||
|
||||
Moonshot's tool-parameter validator rejects several shapes that the rest of
|
||||
the JSON Schema ecosystem accepts:
|
||||
|
||||
1. Properties without ``type`` — Moonshot requires ``type`` on every node.
|
||||
2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
|
||||
``anyOf`` children.
|
||||
|
||||
These tests cover the repairs applied by ``agent/moonshot_schema.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.moonshot_schema import (
|
||||
is_moonshot_model,
|
||||
sanitize_moonshot_tool_parameters,
|
||||
sanitize_moonshot_tools,
|
||||
)
|
||||
|
||||
|
||||
class TestMoonshotModelDetection:
|
||||
"""is_moonshot_model() must match across aggregator prefixes."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"kimi-k2.6",
|
||||
"kimi-k2-thinking",
|
||||
"moonshotai/Kimi-K2.6",
|
||||
"moonshotai/kimi-k2.6",
|
||||
"nous/moonshotai/kimi-k2.6",
|
||||
"openrouter/moonshotai/kimi-k2-thinking",
|
||||
"MOONSHOTAI/KIMI-K2.6",
|
||||
],
|
||||
)
|
||||
def test_positive_matches(self, model):
|
||||
assert is_moonshot_model(model) is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"",
|
||||
None,
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"openai/gpt-5.4",
|
||||
"google/gemini-3-flash-preview",
|
||||
"deepseek-chat",
|
||||
],
|
||||
)
|
||||
def test_negative_matches(self, model):
|
||||
assert is_moonshot_model(model) is False
|
||||
|
||||
|
||||
class TestMissingTypeFilled:
|
||||
"""Rule 1: every property must carry a type."""
|
||||
|
||||
def test_property_without_type_gets_string(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"query": {"description": "a bare property"}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["query"]["type"] == "string"
|
||||
|
||||
def test_property_with_enum_infers_type_from_first_value(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"flag": {"enum": [True, False]}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["flag"]["type"] == "boolean"
|
||||
|
||||
def test_nested_properties_are_repaired(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filter": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"field": {"description": "no type"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["filter"]["properties"]["field"]["type"] == "string"
|
||||
|
||||
def test_array_items_without_type_get_repaired(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {"description": "tag entry"},
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["tags"]["items"]["type"] == "string"
|
||||
|
||||
def test_ref_node_is_not_given_synthetic_type(self):
|
||||
"""$ref nodes should NOT get a synthetic type — the referenced
|
||||
definition supplies it, and Moonshot would reject the conflict."""
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"payload": {"$ref": "#/$defs/Payload"}},
|
||||
"$defs": {"Payload": {"type": "object", "properties": {}}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert "type" not in out["properties"]["payload"]
|
||||
assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload"
|
||||
|
||||
|
||||
class TestAnyOfParentType:
|
||||
"""Rule 2: type must not appear at the anyOf parent level."""
|
||||
|
||||
def test_parent_type_stripped_when_anyof_present(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from_format": {
|
||||
"type": "string",
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"type": "null"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
from_format = out["properties"]["from_format"]
|
||||
assert "type" not in from_format
|
||||
assert "anyOf" in from_format
|
||||
|
||||
def test_anyof_children_missing_type_get_filled(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"value": {
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"description": "A typeless option"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
children = out["properties"]["value"]["anyOf"]
|
||||
assert children[0]["type"] == "string"
|
||||
assert "type" in children[1]
|
||||
|
||||
|
||||
class TestTopLevelGuarantees:
|
||||
"""The returned top-level schema is always a well-formed object."""
|
||||
|
||||
def test_non_dict_input_returns_empty_object(self):
|
||||
assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}}
|
||||
assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}}
|
||||
assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}}
|
||||
|
||||
def test_non_object_top_level_coerced(self):
|
||||
params = {"type": "string"}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["type"] == "object"
|
||||
assert "properties" in out
|
||||
|
||||
def test_does_not_mutate_input(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "no type"}},
|
||||
}
|
||||
snapshot = {
|
||||
"type": params["type"],
|
||||
"properties": {"q": dict(params["properties"]["q"])},
|
||||
}
|
||||
sanitize_moonshot_tool_parameters(params)
|
||||
assert params["type"] == snapshot["type"]
|
||||
assert "type" not in params["properties"]["q"]
|
||||
|
||||
|
||||
class TestToolListSanitizer:
|
||||
"""sanitize_moonshot_tools() walks an OpenAI-format tool list."""
|
||||
|
||||
def test_applies_per_tool(self):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "query"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "noop",
|
||||
"description": "Does nothing",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
},
|
||||
]
|
||||
out = sanitize_moonshot_tools(tools)
|
||||
assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
|
||||
# Second tool already clean — should be structurally equivalent
|
||||
assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_empty_list_is_passthrough(self):
|
||||
assert sanitize_moonshot_tools([]) == []
|
||||
assert sanitize_moonshot_tools(None) is None
|
||||
|
||||
def test_skips_malformed_entries(self):
|
||||
"""Entries without a function dict are passed through untouched."""
|
||||
tools = [{"type": "function"}, {"not": "a tool"}]
|
||||
out = sanitize_moonshot_tools(tools)
|
||||
assert out == tools
|
||||
|
||||
|
||||
class TestRealWorldMCPShape:
|
||||
"""End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
|
||||
|
||||
def test_combined_rewrites(self):
|
||||
# Shape: missing type on a property, anyOf with parent type, array
|
||||
# items without type — all in one tool.
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"description": "search text"},
|
||||
"filter": {
|
||||
"type": "string",
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"type": "null"},
|
||||
],
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {"description": "tag"},
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["query"]["type"] == "string"
|
||||
assert "type" not in out["properties"]["filter"]
|
||||
assert out["properties"]["filter"]["anyOf"][0]["type"] == "string"
|
||||
assert out["properties"]["tags"]["items"]["type"] == "string"
|
||||
assert out["required"] == ["query"]
|
||||
@@ -38,6 +38,18 @@ description: Description for {name}.
|
||||
return skill_dir
|
||||
|
||||
|
||||
def _symlink_category(skills_dir: Path, linked_root: Path, category: str) -> Path:
|
||||
"""Create a category symlink under skills_dir pointing outside the tree."""
|
||||
external_category = linked_root / category
|
||||
external_category.mkdir(parents=True, exist_ok=True)
|
||||
symlink_path = skills_dir / category
|
||||
try:
|
||||
symlink_path.symlink_to(external_category, target_is_directory=True)
|
||||
except (OSError, NotImplementedError) as exc:
|
||||
pytest.skip(f"symlinks unavailable in test environment: {exc}")
|
||||
return external_category
|
||||
|
||||
|
||||
class TestScanSkillCommands:
|
||||
def test_finds_skills(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
@@ -101,6 +113,20 @@ class TestScanSkillCommands:
|
||||
assert "/enabled-skill" in result
|
||||
assert "/disabled-skill" not in result
|
||||
|
||||
def test_finds_skills_in_symlinked_category_dir(self, tmp_path):
|
||||
external_root = tmp_path / "repo"
|
||||
skills_root = tmp_path / "skills"
|
||||
skills_root.mkdir()
|
||||
|
||||
external_category = _symlink_category(skills_root, external_root, "linked")
|
||||
_make_skill(external_category.parent, "knowledge-brain", category="linked")
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", skills_root):
|
||||
result = scan_skill_commands()
|
||||
|
||||
assert "/knowledge-brain" in result
|
||||
assert result["/knowledge-brain"]["name"] == "knowledge-brain"
|
||||
|
||||
|
||||
def test_special_chars_stripped_from_cmd_key(self, tmp_path):
|
||||
"""Skill names with +, /, or other special chars produce clean cmd keys."""
|
||||
|
||||
@@ -238,6 +238,56 @@ class TestChatCompletionsKimi:
|
||||
)
|
||||
assert kw["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
|
||||
def test_moonshot_tool_schemas_are_sanitized_by_model_name(self, transport):
|
||||
"""Aggregator routes (Nous, OpenRouter) hit Moonshot by model name, not base URL."""
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"q": {"description": "query"}, # missing type
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
kw = transport.build_kwargs(
|
||||
model="moonshotai/kimi-k2.6",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
tools=tools,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
assert kw["tools"][0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
|
||||
|
||||
def test_non_moonshot_tools_are_not_mutated(self, transport):
|
||||
"""Other models don't go through the Moonshot sanitizer."""
|
||||
original_params = {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "query"}}, # missing type
|
||||
}
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": original_params,
|
||||
},
|
||||
},
|
||||
]
|
||||
kw = transport.build_kwargs(
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
tools=tools,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
# The parameters dict is passed through untouched (no synthetic type)
|
||||
assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"]
|
||||
|
||||
|
||||
class TestChatCompletionsValidate:
|
||||
|
||||
|
||||
@@ -200,6 +200,35 @@ class TestToolCallBackwardCompat:
|
||||
tc_no_pd = ToolCall(id="1", name="fn", arguments="{}")
|
||||
assert getattr(tc_no_pd, "call_id", None) is None
|
||||
|
||||
def test_extra_content_from_provider_data(self):
|
||||
"""Gemini thought_signature stored in provider_data is exposed via property."""
|
||||
ec = {"google": {"thought_signature": "SIG_ABC123"}}
|
||||
tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec})
|
||||
assert tc.extra_content == ec
|
||||
|
||||
def test_extra_content_none_when_no_provider_data(self):
|
||||
tc = ToolCall(id="1", name="fn", arguments="{}", provider_data=None)
|
||||
assert tc.extra_content is None
|
||||
|
||||
def test_extra_content_none_when_key_absent(self):
|
||||
tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
|
||||
assert tc.extra_content is None
|
||||
|
||||
def test_extra_content_getattr_pattern(self):
|
||||
"""_build_assistant_message uses getattr(tc, 'extra_content', None).
|
||||
|
||||
This is the exact pattern that was broken before the extra_content
|
||||
property was added — ToolCall lacked the property so getattr always
|
||||
returned None, silently dropping the Gemini thought_signature and
|
||||
causing HTTP 400 on subsequent turns (issue #14488).
|
||||
"""
|
||||
ec = {"google": {"thought_signature": "SIG_ABC123"}}
|
||||
tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec})
|
||||
assert getattr(tc, "extra_content", None) == ec
|
||||
|
||||
tc_no_extra = ToolCall(id="1", name="fn", arguments="{}")
|
||||
assert getattr(tc_no_extra, "extra_content", None) is None
|
||||
|
||||
|
||||
class TestNormalizedResponseBackwardCompat:
|
||||
"""Test properties that replaced _nr_to_assistant_message() shim."""
|
||||
|
||||
@@ -566,6 +566,35 @@ class TestGetDueJobs:
|
||||
assert get_job("oneshot-stale")["next_run_at"] is None
|
||||
|
||||
|
||||
class TestEnabledToolsets:
|
||||
def test_enabled_toolsets_stored(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "terminal"])
|
||||
assert job["enabled_toolsets"] == ["web", "terminal"]
|
||||
|
||||
def test_enabled_toolsets_persisted(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "file"])
|
||||
fetched = get_job(job["id"])
|
||||
assert fetched["enabled_toolsets"] == ["web", "file"]
|
||||
|
||||
def test_enabled_toolsets_none_when_omitted(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h")
|
||||
assert job["enabled_toolsets"] is None
|
||||
|
||||
def test_enabled_toolsets_empty_list_normalizes_to_none(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=[])
|
||||
assert job["enabled_toolsets"] is None
|
||||
|
||||
def test_enabled_toolsets_whitespace_entries_stripped(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", " ", "file"])
|
||||
assert job["enabled_toolsets"] == ["web", "file"]
|
||||
|
||||
def test_enabled_toolsets_updated_via_update_job(self, tmp_cron_dir):
|
||||
job = create_job(prompt="monitor", schedule="every 1h")
|
||||
update_job(job["id"], {"enabled_toolsets": ["web", "delegation"]})
|
||||
fetched = get_job(job["id"])
|
||||
assert fetched["enabled_toolsets"] == ["web", "delegation"]
|
||||
|
||||
|
||||
class TestSaveJobOutput:
|
||||
def test_creates_output_file(self, tmp_cron_dir):
|
||||
output_file = save_job_output("test123", "# Results\nEverything ok.")
|
||||
|
||||
@@ -673,6 +673,100 @@ class TestRunJobSessionPersistence:
|
||||
assert call_args[0][1] == "cron_complete"
|
||||
fake_db.close.assert_called_once()
|
||||
|
||||
def _make_run_job_patches(self, tmp_path):
|
||||
"""Common patches for run_job tests."""
|
||||
fake_db = MagicMock()
|
||||
return fake_db, [
|
||||
patch("cron.scheduler._hermes_home", tmp_path),
|
||||
patch("cron.scheduler._resolve_origin", return_value=None),
|
||||
patch("dotenv.load_dotenv"),
|
||||
patch("hermes_state.SessionDB", return_value=fake_db),
|
||||
patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value={
|
||||
"api_key": "test-key",
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
def test_run_job_passes_enabled_toolsets_to_agent(self, tmp_path):
|
||||
job = {
|
||||
"id": "toolset-job",
|
||||
"name": "test",
|
||||
"prompt": "hello",
|
||||
"enabled_toolsets": ["web", "terminal", "file"],
|
||||
}
|
||||
fake_db, patches = self._make_run_job_patches(tmp_path)
|
||||
with patches[0], patches[1], patches[2], patches[3], patches[4], \
|
||||
patch("run_agent.AIAgent") as mock_agent_cls:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent_cls.return_value = mock_agent
|
||||
run_job(job)
|
||||
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"]
|
||||
|
||||
def test_run_job_enabled_toolsets_resolves_from_platform_config_when_not_set(self, tmp_path):
|
||||
"""When a job has no explicit enabled_toolsets, the scheduler now
|
||||
resolves them from ``hermes tools`` platform config for ``cron``
|
||||
(PR #14xxx — blanket fix for Norbert's surprise ``moa`` run).
|
||||
|
||||
The legacy "pass None → AIAgent loads full default" path is still
|
||||
reachable, but only when ``_get_platform_tools`` raises (safety net
|
||||
for any unexpected config shape).
|
||||
"""
|
||||
job = {
|
||||
"id": "no-toolset-job",
|
||||
"name": "test",
|
||||
"prompt": "hello",
|
||||
}
|
||||
fake_db, patches = self._make_run_job_patches(tmp_path)
|
||||
with patches[0], patches[1], patches[2], patches[3], patches[4], \
|
||||
patch("run_agent.AIAgent") as mock_agent_cls:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent_cls.return_value = mock_agent
|
||||
run_job(job)
|
||||
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
# Resolution happened — not None, is a list.
|
||||
assert isinstance(kwargs["enabled_toolsets"], list)
|
||||
# The cron default is _HERMES_CORE_TOOLS with _DEFAULT_OFF_TOOLSETS
|
||||
# (``moa``, ``homeassistant``, ``rl``) removed. The most important
|
||||
# invariant: ``moa`` is NOT in the default cron toolset, so a cron
|
||||
# run cannot accidentally spin up frontier models.
|
||||
assert "moa" not in kwargs["enabled_toolsets"]
|
||||
|
||||
def test_run_job_per_job_toolsets_win_over_platform_config(self, tmp_path):
|
||||
"""Per-job enabled_toolsets (via cronjob tool) always take precedence
|
||||
over the platform-level ``hermes tools`` config."""
|
||||
job = {
|
||||
"id": "override-job",
|
||||
"name": "test",
|
||||
"prompt": "hello",
|
||||
"enabled_toolsets": ["terminal"],
|
||||
}
|
||||
fake_db, patches = self._make_run_job_patches(tmp_path)
|
||||
# Even if the user has ``hermes tools`` configured to enable web+file
|
||||
# for cron, the per-job override wins.
|
||||
with patches[0], patches[1], patches[2], patches[3], patches[4], \
|
||||
patch("run_agent.AIAgent") as mock_agent_cls, \
|
||||
patch(
|
||||
"hermes_cli.tools_config._get_platform_tools",
|
||||
return_value={"web", "file"},
|
||||
):
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent_cls.return_value = mock_agent
|
||||
run_job(job)
|
||||
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
assert kwargs["enabled_toolsets"] == ["terminal"]
|
||||
|
||||
def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path):
|
||||
"""Empty final_response should stay empty for delivery logic (issue #2234).
|
||||
|
||||
|
||||
@@ -95,6 +95,7 @@ class TestBusySessionAck:
|
||||
async def test_sends_ack_when_agent_running(self):
|
||||
"""First message during busy session should get a status ack."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="Are you working?")
|
||||
@@ -127,16 +128,42 @@ class TestBusySessionAck:
|
||||
assert "Interrupting" in content or "respond" in content
|
||||
assert "/stop" not in content # no need — we ARE interrupting
|
||||
|
||||
# Verify message was queued in adapter pending
|
||||
assert sk in adapter._pending_messages
|
||||
|
||||
# Verify agent interrupt was called
|
||||
agent.interrupt.assert_called_once_with("Are you working?")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_queue_mode_suppresses_interrupt_and_updates_ack(self):
|
||||
"""When busy_input_mode is 'queue', message is queued WITHOUT interrupt."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "queue"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="Add this to queue")
|
||||
sk = build_session_key(event.source)
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
agent = MagicMock()
|
||||
runner._running_agents[sk] = agent
|
||||
|
||||
with patch("gateway.run.merge_pending_message_event"):
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
# VERIFY: Agent was NOT interrupted
|
||||
agent.interrupt.assert_not_called()
|
||||
|
||||
# VERIFY: Ack sent with queue-specific wording
|
||||
adapter._send_with_retry.assert_called_once()
|
||||
call_kwargs = adapter._send_with_retry.call_args
|
||||
content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
|
||||
assert "Queued for the next turn" in content
|
||||
assert "respond once the current task finishes" in content
|
||||
assert "Interrupting" not in content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_debounce_suppresses_rapid_acks(self):
|
||||
"""Second message within 30s should NOT send another ack."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event1 = _make_event(text="hello?")
|
||||
@@ -172,13 +199,14 @@ class TestBusySessionAck:
|
||||
assert result2 is True
|
||||
assert adapter._send_with_retry.call_count == 1 # still 1, no new ack
|
||||
|
||||
# But interrupt should still be called for both
|
||||
# But interrupt should still be called for both (since we are in interrupt mode)
|
||||
assert agent.interrupt.call_count == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ack_after_cooldown_expires(self):
|
||||
"""After 30s cooldown, a new message should send a fresh ack."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="hello?")
|
||||
@@ -212,6 +240,7 @@ class TestBusySessionAck:
|
||||
async def test_includes_status_detail(self):
|
||||
"""Ack message should include iteration and tool info when available."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="yo")
|
||||
@@ -243,6 +272,7 @@ class TestBusySessionAck:
|
||||
"""Draining case should still produce the drain-specific message."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._draining = True
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="hello")
|
||||
@@ -264,6 +294,7 @@ class TestBusySessionAck:
|
||||
async def test_pending_sentinel_no_interrupt(self):
|
||||
"""When agent is PENDING_SENTINEL, don't call interrupt (it has no method)."""
|
||||
runner, sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="hey")
|
||||
|
||||
@@ -1,22 +1,28 @@
|
||||
"""Regression tests for the TUI gateway's `complete.path` handler.
|
||||
|
||||
Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
|
||||
with no colon yet) still surfaced files alongside directories in the
|
||||
TUI composer, because the gateway-side completion lives in
|
||||
`tui_gateway/server.py` and was never touched by the earlier fix to
|
||||
`hermes_cli/commands.py`.
|
||||
Reported during the TUI v2 blitz retest:
|
||||
- typing `@folder:` (and `@folder` with no colon yet) surfaced files
|
||||
alongside directories — the gateway-side completion lives in
|
||||
`tui_gateway/server.py` and was never touched by the earlier fix to
|
||||
`hermes_cli/commands.py`.
|
||||
- typing `@appChrome` required the full `@ui-tui/src/components/app…`
|
||||
path to find the file — users expect Cmd-P-style fuzzy basename
|
||||
matching across the repo, not a strict directory prefix filter.
|
||||
|
||||
Covers:
|
||||
- `@folder:` only yields directories
|
||||
- `@file:` only yields regular files
|
||||
- Bare `@folder` / `@file` (no colon) lists cwd directly
|
||||
- Explicit prefix is preserved in the completion text
|
||||
- `@<name>` with no slash fuzzy-matches basenames anywhere in the tree
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tui_gateway import server
|
||||
|
||||
|
||||
@@ -33,6 +39,15 @@ def _items(word: str):
|
||||
return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_fuzzy_cache(monkeypatch):
|
||||
# Each test walks a fresh tmp dir; clear the cached listing so prior
|
||||
# roots can't leak through the TTL window.
|
||||
server._fuzzy_cache.clear()
|
||||
yield
|
||||
server._fuzzy_cache.clear()
|
||||
|
||||
|
||||
def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_fixture(tmp_path)
|
||||
@@ -89,3 +104,176 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
|
||||
|
||||
for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
|
||||
assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
|
||||
|
||||
|
||||
# ── Fuzzy basename matching ──────────────────────────────────────────────
|
||||
# Users shouldn't have to know the full path — typing `@appChrome` should
|
||||
# find `ui-tui/src/components/appChrome.tsx`.
|
||||
|
||||
|
||||
def _nested_fixture(tmp_path: Path):
|
||||
(tmp_path / "readme.md").write_text("x")
|
||||
(tmp_path / ".env").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components").mkdir(parents=True)
|
||||
(tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/hooks").mkdir(parents=True)
|
||||
(tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x")
|
||||
(tmp_path / "tui_gateway").mkdir()
|
||||
(tmp_path / "tui_gateway/server.py").write_text("x")
|
||||
|
||||
|
||||
def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch):
|
||||
"""`@appChrome` — with no slash — should surface the nested file."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
entries = _items("@appChrome")
|
||||
texts = [t for t, _, _ in entries]
|
||||
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
# Display is the basename, meta is the containing directory, so the
|
||||
# picker can show `appChrome.tsx ui-tui/src/components` on one row.
|
||||
row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx")
|
||||
assert row[1] == "appChrome.tsx"
|
||||
assert row[2] == "ui-tui/src/components"
|
||||
|
||||
|
||||
def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch):
|
||||
"""Better matches sort before weaker matches regardless of path depth."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
(tmp_path / "server.py").write_text("x") # exact basename match at root
|
||||
|
||||
texts = [t for t, _, _ in _items("@server")]
|
||||
|
||||
# Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both
|
||||
# rank 1 on basename but exact basename wins on the sort key; shorter
|
||||
# rel path breaks ties.
|
||||
assert texts[0] == "@file:server.py", texts
|
||||
assert "@file:tui_gateway/server.py" in texts
|
||||
|
||||
|
||||
def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch):
|
||||
"""Mid-basename camelCase pieces match without substring scanning."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@Chrome")]
|
||||
|
||||
# `Chrome` starts a camelCase word inside `appChrome.tsx`.
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch):
|
||||
"""`@uCo` → `useCompletion.ts` via subsequence, last-resort tier."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@uCo")]
|
||||
|
||||
assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch):
|
||||
"""Explicit `@file:` prefix still wins the completion tag."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@file:appChrome")]
|
||||
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch):
|
||||
"""Any `/` in the query = user is navigating; keep directory listing."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@ui-tui/src/components/app")]
|
||||
|
||||
# Directory-listing mode prefixes with `@file:` / `@folder:` per entry.
|
||||
# It should only surface direct children of the named dir — not the
|
||||
# nested `useCompletion.ts`.
|
||||
assert any("appChrome.tsx" in t for t in texts), texts
|
||||
assert not any("useCompletion.ts" in t for t in texts), texts
|
||||
|
||||
|
||||
def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch):
|
||||
"""`@folder:<name>` still lists directories — fuzzy scanner only walks
|
||||
files (git-tracked + untracked), so defer to the dir-listing path."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@folder:ui")]
|
||||
|
||||
# Root has `ui-tui/` as a directory; the listing branch should surface it.
|
||||
assert any(t.startswith("@folder:ui-tui") for t in texts), texts
|
||||
|
||||
|
||||
def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch):
|
||||
"""`.env` doesn't leak into `@env` but does show for `@.env`."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
assert not any(".env" in t for t, _, _ in _items("@env"))
|
||||
assert any(t.endswith(".env") for t, _, _ in _items("@.env"))
|
||||
|
||||
|
||||
def test_fuzzy_caps_results(tmp_path, monkeypatch):
|
||||
"""The 30-item cap survives a big tree."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
for i in range(60):
|
||||
(tmp_path / f"mod_{i:03d}.py").write_text("x")
|
||||
|
||||
items = _items("@mod")
|
||||
|
||||
assert len(items) == 30
|
||||
|
||||
|
||||
def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch):
|
||||
"""When the gateway runs from a subdirectory of a git repo, fuzzy
|
||||
completion paths must resolve under that cwd — not under the repo root.
|
||||
|
||||
Without this, `@appChrome` from inside `apps/web/` would suggest
|
||||
`@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would
|
||||
look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every
|
||||
`git ls-files` result back to a `relpath(root)` and drop anything
|
||||
outside `root` so the completion contract stays "paths are cwd-relative".
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True)
|
||||
|
||||
(tmp_path / "apps" / "web" / "src").mkdir(parents=True)
|
||||
(tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x")
|
||||
(tmp_path / "apps" / "api" / "src").mkdir(parents=True)
|
||||
(tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x")
|
||||
(tmp_path / "README.md").write_text("x")
|
||||
|
||||
subprocess.run(["git", "add", "."], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True)
|
||||
|
||||
# Run from `apps/web/` — completions should be relative to here, and
|
||||
# files outside this subtree (apps/api, README.md at root) shouldn't
|
||||
# appear at all.
|
||||
monkeypatch.chdir(tmp_path / "apps" / "web")
|
||||
|
||||
texts = [t for t, _, _ in _items("@appChrome")]
|
||||
|
||||
assert "@file:src/appChrome.tsx" in texts, texts
|
||||
assert not any("apps/web/" in t for t in texts), texts
|
||||
|
||||
server._fuzzy_cache.clear()
|
||||
other_texts = [t for t, _, _ in _items("@server")]
|
||||
|
||||
assert not any("server.ts" in t for t in other_texts), other_texts
|
||||
|
||||
server._fuzzy_cache.clear()
|
||||
readme_texts = [t for t, _, _ in _items("@README")]
|
||||
|
||||
assert not any("README.md" in t for t in readme_texts), readme_texts
|
||||
|
||||
@@ -73,18 +73,29 @@ from gateway.platforms.discord import DiscordAdapter # noqa: E402
|
||||
class FakeTree:
|
||||
def __init__(self):
|
||||
self.sync = AsyncMock(return_value=[])
|
||||
self.fetch_commands = AsyncMock(return_value=[])
|
||||
self._commands = []
|
||||
|
||||
def command(self, *args, **kwargs):
|
||||
return lambda fn: fn
|
||||
|
||||
def get_commands(self, *args, **kwargs):
|
||||
return list(self._commands)
|
||||
|
||||
|
||||
class FakeBot:
|
||||
def __init__(self, *, intents, proxy=None, allowed_mentions=None, **_):
|
||||
self.intents = intents
|
||||
self.allowed_mentions = allowed_mentions
|
||||
self.application_id = 999
|
||||
self.user = SimpleNamespace(id=999, name="Hermes")
|
||||
self._events = {}
|
||||
self.tree = FakeTree()
|
||||
self.http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
|
||||
def event(self, fn):
|
||||
self._events[fn.__name__] = fn
|
||||
@@ -199,6 +210,7 @@ async def test_connect_releases_token_lock_on_timeout(monkeypatch):
|
||||
async def test_connect_does_not_wait_for_slash_sync(monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "bulk")
|
||||
monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
|
||||
monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
|
||||
|
||||
@@ -226,3 +238,420 @@ async def test_connect_does_not_wait_for_slash_sync(monkeypatch):
|
||||
created["bot"].tree.allow_finish.set()
|
||||
await asyncio.sleep(0)
|
||||
await adapter.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_respects_slash_commands_opt_out(monkeypatch):
|
||||
adapter = DiscordAdapter(
|
||||
PlatformConfig(enabled=True, token="test-token", extra={"slash_commands": False})
|
||||
)
|
||||
|
||||
monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "off")
|
||||
monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
|
||||
monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
|
||||
|
||||
intents = SimpleNamespace(message_content=False, dm_messages=False, guild_messages=False, members=False, voice_states=False)
|
||||
monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents)
|
||||
monkeypatch.setattr(
|
||||
discord_platform.commands,
|
||||
"Bot",
|
||||
lambda **kwargs: FakeBot(
|
||||
intents=kwargs["intents"],
|
||||
proxy=kwargs.get("proxy"),
|
||||
allowed_mentions=kwargs.get("allowed_mentions"),
|
||||
),
|
||||
)
|
||||
register_mock = MagicMock()
|
||||
monkeypatch.setattr(adapter, "_register_slash_commands", register_mock)
|
||||
monkeypatch.setattr(adapter, "_resolve_allowed_usernames", AsyncMock())
|
||||
|
||||
ok = await adapter.connect()
|
||||
|
||||
assert ok is True
|
||||
register_mock.assert_not_called()
|
||||
|
||||
await adapter.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_slash_commands_only_mutates_diffs():
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
assert tree is not None
|
||||
return dict(self._payload)
|
||||
|
||||
class _ExistingCommand:
|
||||
def __init__(self, command_id, payload):
|
||||
self.id = command_id
|
||||
self.name = payload["name"]
|
||||
self.type = SimpleNamespace(value=payload["type"])
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"application_id": 999,
|
||||
**self._payload,
|
||||
"name_localizations": {},
|
||||
"description_localizations": {},
|
||||
}
|
||||
|
||||
desired_same = {
|
||||
"name": "status",
|
||||
"description": "Show Hermes session status",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
}
|
||||
desired_updated = {
|
||||
"name": "help",
|
||||
"description": "Show available commands",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
}
|
||||
desired_created = {
|
||||
"name": "metricas",
|
||||
"description": "Show Colmeio metrics dashboard",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
}
|
||||
existing_same = _ExistingCommand(11, desired_same)
|
||||
existing_updated = _ExistingCommand(
|
||||
12,
|
||||
{
|
||||
**desired_updated,
|
||||
"description": "Old help text",
|
||||
},
|
||||
)
|
||||
existing_deleted = _ExistingCommand(
|
||||
13,
|
||||
{
|
||||
"name": "old-command",
|
||||
"description": "To be deleted",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
},
|
||||
)
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [
|
||||
_DesiredCommand(desired_same),
|
||||
_DesiredCommand(desired_updated),
|
||||
_DesiredCommand(desired_created),
|
||||
],
|
||||
fetch_commands=AsyncMock(return_value=[existing_same, existing_updated, existing_deleted]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
assert summary == {
|
||||
"total": 3,
|
||||
"unchanged": 1,
|
||||
"updated": 1,
|
||||
"recreated": 0,
|
||||
"created": 1,
|
||||
"deleted": 1,
|
||||
}
|
||||
fake_http.edit_global_command.assert_awaited_once_with(999, 12, desired_updated)
|
||||
fake_http.upsert_global_command.assert_awaited_once_with(999, desired_created)
|
||||
fake_http.delete_global_command.assert_awaited_once_with(999, 13)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_slash_commands_recreates_metadata_only_diffs():
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
assert tree is not None
|
||||
return dict(self._payload)
|
||||
|
||||
class _ExistingCommand:
|
||||
def __init__(self, command_id, payload):
|
||||
self.id = command_id
|
||||
self.name = payload["name"]
|
||||
self.type = SimpleNamespace(value=payload["type"])
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"application_id": 999,
|
||||
**self._payload,
|
||||
"name_localizations": {},
|
||||
"description_localizations": {},
|
||||
}
|
||||
|
||||
desired = {
|
||||
"name": "help",
|
||||
"description": "Show available commands",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": "8",
|
||||
}
|
||||
existing = _ExistingCommand(
|
||||
12,
|
||||
{
|
||||
**desired,
|
||||
"default_member_permissions": None,
|
||||
},
|
||||
)
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [_DesiredCommand(desired)],
|
||||
fetch_commands=AsyncMock(return_value=[existing]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
assert summary == {
|
||||
"total": 1,
|
||||
"unchanged": 0,
|
||||
"updated": 0,
|
||||
"recreated": 1,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
fake_http.edit_global_command.assert_not_awaited()
|
||||
fake_http.delete_global_command.assert_awaited_once_with(999, 12)
|
||||
fake_http.upsert_global_command.assert_awaited_once_with(999, desired)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_post_connect_initialization_skips_sync_when_policy_off(monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "off")
|
||||
|
||||
fake_tree = SimpleNamespace(sync=AsyncMock())
|
||||
adapter._client = SimpleNamespace(tree=fake_tree)
|
||||
|
||||
await adapter._run_post_connect_initialization()
|
||||
|
||||
fake_tree.sync.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_reads_permission_attrs_from_existing_command():
|
||||
"""Regression: AppCommand.to_dict() in discord.py does NOT include
|
||||
nsfw, dm_permission, or default_member_permissions — they live only
|
||||
on the attributes. Without reading those attrs, any command with
|
||||
non-default permissions false-diffs on every startup.
|
||||
"""
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
return dict(self._payload)
|
||||
|
||||
class _ExistingCommand:
|
||||
"""Mirrors discord.py's AppCommand — to_dict() omits nsfw/dm/perms."""
|
||||
|
||||
def __init__(self, command_id, name, description, *, nsfw, guild_only, default_permissions):
|
||||
self.id = command_id
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.type = SimpleNamespace(value=1)
|
||||
self.nsfw = nsfw
|
||||
self.guild_only = guild_only
|
||||
self.default_member_permissions = (
|
||||
SimpleNamespace(value=default_permissions)
|
||||
if default_permissions is not None
|
||||
else None
|
||||
)
|
||||
|
||||
def to_dict(self):
|
||||
# Match real AppCommand.to_dict() — no nsfw/dm_permission/default_member_permissions
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": 1,
|
||||
"application_id": 999,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"name_localizations": {},
|
||||
"description_localizations": {},
|
||||
"options": [],
|
||||
}
|
||||
|
||||
desired = {
|
||||
"name": "admin",
|
||||
"description": "Admin-only command",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": True,
|
||||
"dm_permission": False,
|
||||
"default_member_permissions": "8",
|
||||
}
|
||||
# Existing command has matching attrs — should report unchanged, NOT falsely diff.
|
||||
existing = _ExistingCommand(
|
||||
42,
|
||||
"admin",
|
||||
"Admin-only command",
|
||||
nsfw=True,
|
||||
guild_only=True,
|
||||
default_permissions=8,
|
||||
)
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [_DesiredCommand(desired)],
|
||||
fetch_commands=AsyncMock(return_value=[existing]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
# Without the fix, this would be unchanged=0, recreated=1 (false diff).
|
||||
assert summary == {
|
||||
"total": 1,
|
||||
"unchanged": 1,
|
||||
"updated": 0,
|
||||
"recreated": 0,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
fake_http.edit_global_command.assert_not_awaited()
|
||||
fake_http.delete_global_command.assert_not_awaited()
|
||||
fake_http.upsert_global_command.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_safe_sync_detects_contexts_drift():
|
||||
"""Regression: contexts and integration_types must be canonicalized
|
||||
so drift in those fields triggers reconciliation. Without this, the
|
||||
diff silently reports 'unchanged' and never reconciles.
|
||||
"""
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
class _DesiredCommand:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self, tree):
|
||||
return dict(self._payload)
|
||||
|
||||
class _ExistingCommand:
|
||||
def __init__(self, command_id, payload):
|
||||
self.id = command_id
|
||||
self.name = payload["name"]
|
||||
self.description = payload["description"]
|
||||
self.type = SimpleNamespace(value=1)
|
||||
self.nsfw = payload.get("nsfw", False)
|
||||
self.guild_only = not payload.get("dm_permission", True)
|
||||
self.default_member_permissions = None
|
||||
self._payload = payload
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": 1,
|
||||
"application_id": 999,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"name_localizations": {},
|
||||
"description_localizations": {},
|
||||
"options": [],
|
||||
"contexts": self._payload.get("contexts"),
|
||||
"integration_types": self._payload.get("integration_types"),
|
||||
}
|
||||
|
||||
desired = {
|
||||
"name": "help",
|
||||
"description": "Show available commands",
|
||||
"type": 1,
|
||||
"options": [],
|
||||
"nsfw": False,
|
||||
"dm_permission": True,
|
||||
"default_member_permissions": None,
|
||||
"contexts": [0, 1, 2],
|
||||
"integration_types": [0, 1],
|
||||
}
|
||||
existing = _ExistingCommand(
|
||||
77,
|
||||
{
|
||||
**desired,
|
||||
"contexts": [0], # server-side only
|
||||
"integration_types": [0],
|
||||
},
|
||||
)
|
||||
|
||||
fake_tree = SimpleNamespace(
|
||||
get_commands=lambda: [_DesiredCommand(desired)],
|
||||
fetch_commands=AsyncMock(return_value=[existing]),
|
||||
)
|
||||
fake_http = SimpleNamespace(
|
||||
upsert_global_command=AsyncMock(),
|
||||
edit_global_command=AsyncMock(),
|
||||
delete_global_command=AsyncMock(),
|
||||
)
|
||||
adapter._client = SimpleNamespace(
|
||||
tree=fake_tree,
|
||||
http=fake_http,
|
||||
application_id=999,
|
||||
user=SimpleNamespace(id=999),
|
||||
)
|
||||
|
||||
summary = await adapter._safe_sync_slash_commands()
|
||||
|
||||
# contexts and integration_types are not patchable by
|
||||
# edit_global_command, so the command must be recreated.
|
||||
assert summary["unchanged"] == 0
|
||||
assert summary["recreated"] == 1
|
||||
assert summary["updated"] == 0
|
||||
fake_http.edit_global_command.assert_not_awaited()
|
||||
fake_http.delete_global_command.assert_awaited_once_with(999, 77)
|
||||
fake_http.upsert_global_command.assert_awaited_once_with(999, desired)
|
||||
|
||||
@@ -145,3 +145,86 @@ async def test_drain_active_agents_throttles_status_updates():
|
||||
# Start, one count-change update, and final update. Allow one extra update
|
||||
# if the loop observes the zero-agent state before exiting.
|
||||
assert 3 <= runner._update_runtime_status.call_count <= 4
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_stop_kills_tool_subprocesses_before_adapter_disconnect_on_timeout(monkeypatch):
|
||||
"""On drain timeout, tool subprocesses must be killed BEFORE adapter
|
||||
disconnect so systemd's TimeoutStopSec doesn't SIGKILL the cgroup with
|
||||
bash/sleep children still attached (#8202)."""
|
||||
runner, adapter = make_restart_runner()
|
||||
runner._restart_drain_timeout = 0.01 # force timeout path
|
||||
|
||||
call_order: list[str] = []
|
||||
|
||||
def _fake_kill_all(task_id=None):
|
||||
call_order.append("kill_all")
|
||||
return 2
|
||||
|
||||
def _fake_cleanup_envs():
|
||||
call_order.append("cleanup_environments")
|
||||
|
||||
def _fake_cleanup_browsers():
|
||||
call_order.append("cleanup_browsers")
|
||||
|
||||
async def _disconnect():
|
||||
call_order.append("disconnect")
|
||||
|
||||
# Patch the module-level names the stop() helper imports lazily.
|
||||
import tools.process_registry as _pr
|
||||
import tools.terminal_tool as _tt
|
||||
import tools.browser_tool as _bt
|
||||
monkeypatch.setattr(_pr.process_registry, "kill_all", _fake_kill_all)
|
||||
monkeypatch.setattr(_tt, "cleanup_all_environments", _fake_cleanup_envs)
|
||||
monkeypatch.setattr(_bt, "cleanup_all_browsers", _fake_cleanup_browsers)
|
||||
|
||||
adapter.disconnect = _disconnect
|
||||
|
||||
runner._running_agents = {"session": MagicMock()}
|
||||
|
||||
with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
|
||||
await runner.stop()
|
||||
|
||||
# First kill_all must precede the first disconnect. (Both the eager
|
||||
# post-interrupt cleanup and the final catch-all call _kill_tool_
|
||||
# subprocesses, so we expect kill_all to appear twice total.)
|
||||
assert "kill_all" in call_order
|
||||
assert "disconnect" in call_order
|
||||
first_kill = call_order.index("kill_all")
|
||||
first_disconnect = call_order.index("disconnect")
|
||||
assert first_kill < first_disconnect, (
|
||||
f"Tool subprocesses must be killed before adapter disconnect on "
|
||||
f"drain timeout, got order: {call_order}"
|
||||
)
|
||||
# Defense-in-depth final cleanup still runs.
|
||||
assert call_order.count("kill_all") >= 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gateway_stop_kills_tool_subprocesses_on_graceful_path(monkeypatch):
|
||||
"""Graceful shutdown (no drain timeout) must still kill tool subprocesses
|
||||
exactly once via the final catch-all — regression guard against
|
||||
accidentally removing that call when refactoring."""
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.disconnect = AsyncMock()
|
||||
|
||||
kill_count = 0
|
||||
|
||||
def _fake_kill_all(task_id=None):
|
||||
nonlocal kill_count
|
||||
kill_count += 1
|
||||
return 0
|
||||
|
||||
import tools.process_registry as _pr
|
||||
import tools.terminal_tool as _tt
|
||||
import tools.browser_tool as _bt
|
||||
monkeypatch.setattr(_pr.process_registry, "kill_all", _fake_kill_all)
|
||||
monkeypatch.setattr(_tt, "cleanup_all_environments", lambda: None)
|
||||
monkeypatch.setattr(_bt, "cleanup_all_browsers", lambda: None)
|
||||
|
||||
# No running agents → drain returns immediately, no timeout, no eager cleanup.
|
||||
with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
|
||||
await runner.stop()
|
||||
|
||||
# Only the final catch-all fires on the graceful path.
|
||||
assert kill_count == 1
|
||||
|
||||
@@ -193,7 +193,10 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
|
||||
_pid_state["alive"] = False
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
|
||||
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.release_all_scoped_locks",
|
||||
lambda **kwargs: 0,
|
||||
)
|
||||
monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
|
||||
monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
|
||||
monkeypatch.setattr("gateway.run.os.kill", lambda pid, sig: None)
|
||||
@@ -267,7 +270,10 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
|
||||
_pid_state["alive"] = False
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
|
||||
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
|
||||
monkeypatch.setattr(
|
||||
"gateway.status.release_all_scoped_locks",
|
||||
lambda **kwargs: 0,
|
||||
)
|
||||
monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
|
||||
monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)
|
||||
monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
|
||||
|
||||
@@ -404,6 +404,53 @@ class TestScopedLocks:
|
||||
status.release_scoped_lock("telegram-bot-token", "secret")
|
||||
assert not lock_path.exists()
|
||||
|
||||
def test_release_all_scoped_locks_can_target_single_owner(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_dir = tmp_path / "locks"
|
||||
lock_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
target_lock = lock_dir / "telegram-bot-token-target.lock"
|
||||
other_lock = lock_dir / "slack-app-token-other.lock"
|
||||
target_lock.write_text(json.dumps({
|
||||
"pid": 111,
|
||||
"start_time": 222,
|
||||
"kind": "hermes-gateway",
|
||||
}))
|
||||
other_lock.write_text(json.dumps({
|
||||
"pid": 999,
|
||||
"start_time": 333,
|
||||
"kind": "hermes-gateway",
|
||||
}))
|
||||
|
||||
removed = status.release_all_scoped_locks(
|
||||
owner_pid=111,
|
||||
owner_start_time=222,
|
||||
)
|
||||
|
||||
assert removed == 1
|
||||
assert not target_lock.exists()
|
||||
assert other_lock.exists()
|
||||
|
||||
def test_release_all_scoped_locks_skips_pid_reuse_mismatch(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_dir = tmp_path / "locks"
|
||||
lock_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
reused_pid_lock = lock_dir / "telegram-bot-token-reused.lock"
|
||||
reused_pid_lock.write_text(json.dumps({
|
||||
"pid": 111,
|
||||
"start_time": 999,
|
||||
"kind": "hermes-gateway",
|
||||
}))
|
||||
|
||||
removed = status.release_all_scoped_locks(
|
||||
owner_pid=111,
|
||||
owner_start_time=222,
|
||||
)
|
||||
|
||||
assert removed == 0
|
||||
assert reused_pid_lock.exists()
|
||||
|
||||
|
||||
class TestTakeoverMarker:
|
||||
"""Tests for the --replace takeover marker.
|
||||
|
||||
@@ -68,3 +68,68 @@ def test_build_welcome_banner_uses_normalized_toolset_names():
|
||||
assert "homeassistant_tools:" not in output
|
||||
assert "honcho_tools:" not in output
|
||||
assert "web_tools:" not in output
|
||||
|
||||
|
||||
def test_build_welcome_banner_title_is_hyperlinked_to_release():
|
||||
"""Panel title (version label) is wrapped in an OSC-8 hyperlink to the GitHub release."""
|
||||
import io
|
||||
from unittest.mock import patch as _patch
|
||||
import hermes_cli.banner as _banner
|
||||
import model_tools as _mt
|
||||
import tools.mcp_tool as _mcp
|
||||
|
||||
_banner._latest_release_cache = None
|
||||
tag_url = ("v2026.4.23", "https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.23")
|
||||
|
||||
buf = io.StringIO()
|
||||
with (
|
||||
_patch.object(_mt, "check_tool_availability", return_value=(["web"], [])),
|
||||
_patch.object(_banner, "get_available_skills", return_value={}),
|
||||
_patch.object(_banner, "get_update_result", return_value=None),
|
||||
_patch.object(_mcp, "get_mcp_status", return_value=[]),
|
||||
_patch.object(_banner, "get_latest_release_tag", return_value=tag_url),
|
||||
):
|
||||
console = Console(file=buf, force_terminal=True, color_system="truecolor", width=160)
|
||||
_banner.build_welcome_banner(
|
||||
console=console, model="x", cwd="/tmp",
|
||||
session_id="abc123",
|
||||
tools=[{"function": {"name": "read_file"}}],
|
||||
get_toolset_for_tool=lambda n: "file",
|
||||
)
|
||||
|
||||
raw = buf.getvalue()
|
||||
# The existing version label must still be present in the title
|
||||
assert "Hermes Agent v" in raw, "Version label missing from title"
|
||||
# OSC-8 hyperlink escape sequence present with the release URL
|
||||
assert "\x1b]8;" in raw, "OSC-8 hyperlink not emitted"
|
||||
assert "releases/tag/v2026.4.23" in raw, "Release URL missing from banner output"
|
||||
|
||||
|
||||
def test_build_welcome_banner_title_falls_back_when_no_tag():
|
||||
"""Without a resolvable tag, the panel title renders as plain text (no hyperlink escape)."""
|
||||
import io
|
||||
from unittest.mock import patch as _patch
|
||||
import hermes_cli.banner as _banner
|
||||
import model_tools as _mt
|
||||
import tools.mcp_tool as _mcp
|
||||
|
||||
_banner._latest_release_cache = None
|
||||
buf = io.StringIO()
|
||||
with (
|
||||
_patch.object(_mt, "check_tool_availability", return_value=(["web"], [])),
|
||||
_patch.object(_banner, "get_available_skills", return_value={}),
|
||||
_patch.object(_banner, "get_update_result", return_value=None),
|
||||
_patch.object(_mcp, "get_mcp_status", return_value=[]),
|
||||
_patch.object(_banner, "get_latest_release_tag", return_value=None),
|
||||
):
|
||||
console = Console(file=buf, force_terminal=True, color_system="truecolor", width=160)
|
||||
_banner.build_welcome_banner(
|
||||
console=console, model="x", cwd="/tmp",
|
||||
session_id="abc123",
|
||||
tools=[{"function": {"name": "read_file"}}],
|
||||
get_toolset_for_tool=lambda n: "file",
|
||||
)
|
||||
|
||||
raw = buf.getvalue()
|
||||
assert "Hermes Agent v" in raw, "Version label missing from title"
|
||||
assert "\x1b]8;" not in raw, "OSC-8 hyperlink should not be emitted without a tag"
|
||||
|
||||
@@ -95,7 +95,10 @@ class TestGeneratedSystemdUnits:
|
||||
assert "ExecStop=" not in unit
|
||||
assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
|
||||
assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
|
||||
assert "TimeoutStopSec=60" in unit
|
||||
# TimeoutStopSec must exceed the default drain_timeout (60s) so
|
||||
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
|
||||
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
|
||||
assert "TimeoutStopSec=90" in unit
|
||||
|
||||
def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None)
|
||||
@@ -111,7 +114,10 @@ class TestGeneratedSystemdUnits:
|
||||
assert "ExecStop=" not in unit
|
||||
assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
|
||||
assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
|
||||
assert "TimeoutStopSec=60" in unit
|
||||
# TimeoutStopSec must exceed the default drain_timeout (60s) so
|
||||
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
|
||||
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
|
||||
assert "TimeoutStopSec=90" in unit
|
||||
assert "WantedBy=multi-user.target" in unit
|
||||
|
||||
|
||||
|
||||
@@ -463,7 +463,7 @@ class TestPlatformToolsetConsistency:
|
||||
|
||||
gateway_includes = set(TOOLSETS["hermes-gateway"]["includes"])
|
||||
# Exclude non-messaging platforms from the check
|
||||
non_messaging = {"cli", "api_server"}
|
||||
non_messaging = {"cli", "api_server", "cron"}
|
||||
for platform, meta in PLATFORMS.items():
|
||||
if platform in non_messaging:
|
||||
continue
|
||||
@@ -601,3 +601,122 @@ class TestImagegenModelPicker:
|
||||
_configure_imagegen_model("fal", config)
|
||||
assert isinstance(config["image_gen"], dict)
|
||||
assert config["image_gen"]["model"] == "fal-ai/flux-2/klein/9b"
|
||||
|
||||
|
||||
def test_get_platform_tools_recovers_non_configurable_toolsets_from_composite():
|
||||
"""Non-configurable toolsets whose tools are in the composite but not in
|
||||
CONFIGURABLE_TOOLSETS should still appear in the result.
|
||||
"""
|
||||
from toolsets import TOOLSETS
|
||||
from hermes_cli.tools_config import PLATFORMS
|
||||
from unittest.mock import patch as mock_patch
|
||||
|
||||
fake_toolsets = dict(TOOLSETS)
|
||||
fake_toolsets["_test_platform_tool"] = {
|
||||
"description": "test",
|
||||
"tools": ["_test_special_tool"],
|
||||
"includes": [],
|
||||
}
|
||||
fake_toolsets["hermes-_test_platform"] = {
|
||||
"description": "test composite",
|
||||
"tools": ["web_search", "web_extract", "terminal", "process", "_test_special_tool"],
|
||||
"includes": [],
|
||||
}
|
||||
|
||||
test_platforms = {
|
||||
"_test_platform": {"label": "Test", "default_toolset": "hermes-_test_platform"},
|
||||
}
|
||||
|
||||
with mock_patch("hermes_cli.tools_config.PLATFORMS", {**PLATFORMS, **test_platforms}):
|
||||
with mock_patch("toolsets.TOOLSETS", fake_toolsets):
|
||||
enabled = _get_platform_tools({}, "_test_platform")
|
||||
|
||||
assert "_test_platform_tool" in enabled
|
||||
assert "web" in enabled
|
||||
assert "terminal" in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_second_pass_skips_fully_claimed_toolsets():
|
||||
"""Toolsets whose tools are fully covered by configurable keys should NOT
|
||||
be added by the second pass (prevents 'search', 'hermes-acp' noise).
|
||||
"""
|
||||
enabled = _get_platform_tools({}, "cli")
|
||||
|
||||
assert "search" not in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_discord_includes_discord_not_admin():
|
||||
enabled = _get_platform_tools({}, "discord")
|
||||
assert "discord" in enabled
|
||||
assert "discord_admin" not in enabled
|
||||
|
||||
|
||||
def test_discord_admin_in_configurable_toolsets():
|
||||
assert any(ts_key == "discord_admin" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
|
||||
|
||||
|
||||
def test_discord_admin_in_default_off():
|
||||
assert "discord_admin" in _DEFAULT_OFF_TOOLSETS
|
||||
|
||||
|
||||
def test_get_platform_tools_feishu_includes_doc_and_drive():
|
||||
enabled = _get_platform_tools({}, "feishu")
|
||||
assert "feishu_doc" in enabled
|
||||
assert "feishu_drive" in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_feishu_tools_not_on_other_platforms():
|
||||
for plat in ["cli", "telegram", "discord"]:
|
||||
enabled = _get_platform_tools({}, plat)
|
||||
assert "feishu_doc" not in enabled, f"feishu_doc leaked onto {plat}"
|
||||
assert "feishu_drive" not in enabled, f"feishu_drive leaked onto {plat}"
|
||||
|
||||
|
||||
def test_save_platform_tools_normalizes_numeric_entries():
|
||||
"""YAML may parse bare numeric toolset names as int. They should be
|
||||
normalized to str so they survive the save round-trip.
|
||||
"""
|
||||
config = {
|
||||
"platform_toolsets": {
|
||||
"cli": ["web", "terminal", 12306, "custom-mcp"]
|
||||
}
|
||||
}
|
||||
|
||||
with patch("hermes_cli.tools_config.save_config"):
|
||||
_save_platform_tools(config, "cli", {"web", "browser"})
|
||||
|
||||
saved = config["platform_toolsets"]["cli"]
|
||||
assert "12306" in saved
|
||||
assert 12306 not in saved
|
||||
|
||||
|
||||
def test_save_platform_tools_clears_stale_no_mcp():
|
||||
"""When the new selection doesn't include no_mcp, the sentinel should
|
||||
be stripped from preserved entries so MCP servers are re-enabled.
|
||||
"""
|
||||
config = {
|
||||
"platform_toolsets": {
|
||||
"cli": ["web", "terminal", "no_mcp"]
|
||||
}
|
||||
}
|
||||
|
||||
with patch("hermes_cli.tools_config.save_config"):
|
||||
_save_platform_tools(config, "cli", {"web", "browser"})
|
||||
|
||||
saved = config["platform_toolsets"]["cli"]
|
||||
assert "no_mcp" not in saved
|
||||
|
||||
|
||||
def test_save_platform_tools_preserves_explicit_no_mcp():
|
||||
"""When the new selection explicitly includes no_mcp, it should be kept."""
|
||||
config = {
|
||||
"platform_toolsets": {
|
||||
"cli": ["web", "no_mcp"]
|
||||
}
|
||||
}
|
||||
|
||||
with patch("hermes_cli.tools_config.save_config"):
|
||||
_save_platform_tools(config, "cli", {"web", "no_mcp"})
|
||||
|
||||
saved = config["platform_toolsets"]["cli"]
|
||||
assert "no_mcp" in saved
|
||||
|
||||
@@ -422,6 +422,152 @@ class TestCmdUpdateLaunchdRestart:
|
||||
]
|
||||
assert len(restart_calls) == 1
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_update_prefers_sigusr1_over_systemctl_restart_when_mainpid_known(
|
||||
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
||||
):
|
||||
"""Drain-aware update: when systemctl show reports a MainPID, the
|
||||
update path sends SIGUSR1 and waits for graceful exit + respawn,
|
||||
instead of ``systemctl restart`` (which SIGKILLs in-flight agents).
|
||||
"""
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
|
||||
# Track state: before kill → "active" (old PID),
|
||||
# after kill + exit → briefly inactive, then "active" again (new PID).
|
||||
state = {"killed": False}
|
||||
|
||||
def side_effect(cmd, **kwargs):
|
||||
joined = " ".join(str(c) for c in cmd)
|
||||
|
||||
if "rev-parse" in joined and "--abbrev-ref" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
|
||||
if "rev-parse" in joined and "--verify" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
if "rev-list" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
|
||||
|
||||
# Only expose a user-scope service.
|
||||
if "systemctl" in joined and "list-units" in joined:
|
||||
if "--user" in joined:
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0,
|
||||
stdout="hermes-gateway.service loaded active running\n",
|
||||
stderr="",
|
||||
)
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
if "systemctl" in joined and "is-active" in joined:
|
||||
# Pre-kill: active. Post-kill: active again (respawned by
|
||||
# Restart=on-failure). The drain loop verifies liveness
|
||||
# separately via os.kill(pid, 0).
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
|
||||
|
||||
# The new code path.
|
||||
if "systemctl" in joined and "show" in joined and "MainPID" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
|
||||
|
||||
# If systemctl restart is called, this test fails its intent —
|
||||
# but still let it succeed so we can assert it was NOT called.
|
||||
if "systemctl" in joined and "restart" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
mock_run.side_effect = side_effect
|
||||
|
||||
# Track SIGUSR1 delivery and simulate the gateway draining + exiting.
|
||||
sigusr1_sent = {"value": False}
|
||||
|
||||
def fake_kill(pid, sig):
|
||||
import signal as _s
|
||||
if pid == 4242 and sig == _s.SIGUSR1:
|
||||
sigusr1_sent["value"] = True
|
||||
state["killed"] = True
|
||||
return
|
||||
if pid == 4242 and sig == 0:
|
||||
# Liveness probe — report dead once SIGUSR1 has been sent.
|
||||
if state["killed"]:
|
||||
raise ProcessLookupError()
|
||||
return
|
||||
# For any other PID/sig combination, succeed silently.
|
||||
return
|
||||
|
||||
monkeypatch.setattr("os.kill", fake_kill)
|
||||
|
||||
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
||||
cmd_update(mock_args)
|
||||
|
||||
# SIGUSR1 must have been delivered to the gateway MainPID.
|
||||
assert sigusr1_sent["value"], "Expected SIGUSR1 to be sent to MainPID"
|
||||
|
||||
# And `systemctl restart` must NOT have been used (that's the
|
||||
# non-draining kill-everything path we're moving away from).
|
||||
restart_calls = [
|
||||
c for c in mock_run.call_args_list
|
||||
if "systemctl" in " ".join(str(a) for a in c.args[0])
|
||||
and "restart" in " ".join(str(a) for a in c.args[0])
|
||||
]
|
||||
assert restart_calls == [], (
|
||||
"Graceful SIGUSR1 succeeded; `systemctl restart` should not "
|
||||
f"have been called. Got: {restart_calls}"
|
||||
)
|
||||
|
||||
captured = capsys.readouterr().out
|
||||
assert "draining" in captured.lower()
|
||||
assert "Restarted hermes-gateway" in captured
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_update_falls_back_to_systemctl_restart_when_sigusr1_times_out(
|
||||
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
|
||||
):
|
||||
"""If the gateway doesn't exit within the drain budget (e.g. old unit
|
||||
missing ``Restart=on-failure`` or an agent ignoring SIGUSR1), the
|
||||
update path falls back to ``systemctl restart``.
|
||||
"""
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
|
||||
|
||||
mock_run.side_effect = _make_run_side_effect(
|
||||
commit_count="3",
|
||||
systemd_active=True,
|
||||
)
|
||||
|
||||
# Patch systemctl show to report MainPID=4242 so cmd_update attempts
|
||||
# the graceful path.
|
||||
orig = mock_run.side_effect
|
||||
def wrapped(cmd, **kwargs):
|
||||
joined = " ".join(str(c) for c in cmd)
|
||||
if "systemctl" in joined and "show" in joined and "MainPID" in joined:
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
|
||||
return orig(cmd, **kwargs)
|
||||
mock_run.side_effect = wrapped
|
||||
|
||||
# Simulate the drain helper failing to confirm a clean exit — either
|
||||
# because the gateway ignored SIGUSR1 or the drain budget was
|
||||
# exceeded. cmd_update() should detect this and escalate.
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.gateway._graceful_restart_via_sigusr1",
|
||||
lambda pid, drain_timeout: False,
|
||||
)
|
||||
|
||||
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
|
||||
cmd_update(mock_args)
|
||||
|
||||
# Fallback kicked in → systemctl restart was called.
|
||||
restart_calls = [
|
||||
c for c in mock_run.call_args_list
|
||||
if "systemctl" in " ".join(str(a) for a in c.args[0])
|
||||
and "restart" in " ".join(str(a) for a in c.args[0])
|
||||
]
|
||||
assert len(restart_calls) >= 1, (
|
||||
"Drain path failed; expected fallback `systemctl restart`."
|
||||
)
|
||||
|
||||
@patch("shutil.which", return_value=None)
|
||||
@patch("subprocess.run")
|
||||
def test_update_no_gateway_running_skips_restart(
|
||||
|
||||
@@ -0,0 +1,255 @@
|
||||
"""Tests for ``hermes_cli.voice`` — the TUI gateway's voice wrapper.
|
||||
|
||||
The module is imported *lazily* by ``tui_gateway/server.py`` so that a
|
||||
box with missing audio deps fails at call time (returning a clean RPC
|
||||
error) rather than at gateway startup. These tests therefore only
|
||||
assert the public contract the gateway depends on: the three symbols
|
||||
exist, ``stop_and_transcribe`` is a no-op when nothing is recording,
|
||||
and ``speak_text`` tolerates empty input without touching the provider
|
||||
stack.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
|
||||
class TestPublicAPI:
|
||||
def test_gateway_symbols_importable(self):
|
||||
"""Match the exact import shape tui_gateway/server.py uses."""
|
||||
from hermes_cli.voice import (
|
||||
speak_text,
|
||||
start_recording,
|
||||
stop_and_transcribe,
|
||||
)
|
||||
|
||||
assert callable(start_recording)
|
||||
assert callable(stop_and_transcribe)
|
||||
assert callable(speak_text)
|
||||
|
||||
|
||||
class TestStopWithoutStart:
|
||||
def test_returns_none_when_no_recording_active(self, monkeypatch):
|
||||
"""Idempotent no-op: stop before start must not raise or touch state."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_recorder", None)
|
||||
|
||||
assert voice.stop_and_transcribe() is None
|
||||
|
||||
|
||||
class TestSpeakTextGuards:
|
||||
@pytest.mark.parametrize("text", ["", " ", "\n\t "])
|
||||
def test_empty_text_is_noop(self, text):
|
||||
"""Empty / whitespace-only text must return without importing tts_tool
|
||||
(the gateway spawns a thread per call, so a no-op on empty input
|
||||
keeps the thread pool from churning on trivial inputs)."""
|
||||
from hermes_cli.voice import speak_text
|
||||
|
||||
# Should simply return None without raising.
|
||||
assert speak_text(text) is None
|
||||
|
||||
|
||||
class TestContinuousAPI:
|
||||
"""Continuous (VAD) mode API — CLI-parity loop entry points."""
|
||||
|
||||
def test_continuous_exports(self):
|
||||
from hermes_cli.voice import (
|
||||
is_continuous_active,
|
||||
start_continuous,
|
||||
stop_continuous,
|
||||
)
|
||||
|
||||
assert callable(start_continuous)
|
||||
assert callable(stop_continuous)
|
||||
assert callable(is_continuous_active)
|
||||
|
||||
def test_not_active_by_default(self, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Isolate from any state left behind by other tests in the session.
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_stop_continuous_idempotent_when_inactive(self, monkeypatch):
|
||||
"""stop_continuous must not raise when no loop is active — the
|
||||
gateway's voice.toggle off path calls it unconditionally."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
|
||||
# Should return cleanly without exceptions
|
||||
assert voice.stop_continuous() is None
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_double_start_is_idempotent(self, monkeypatch):
|
||||
"""A second start_continuous while already active is a no-op — prevents
|
||||
two overlapping capture threads fighting over the microphone when the
|
||||
UI double-fires (e.g. both /voice on and Ctrl+B within the same tick)."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_active", True)
|
||||
called = {"n": 0}
|
||||
|
||||
class FakeRecorder:
|
||||
def start(self, on_silence_stop=None):
|
||||
called["n"] += 1
|
||||
|
||||
def cancel(self):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())
|
||||
|
||||
voice.start_continuous(on_transcript=lambda _t: None)
|
||||
|
||||
# The guard inside start_continuous short-circuits before rec.start()
|
||||
assert called["n"] == 0
|
||||
|
||||
|
||||
class TestContinuousLoopSimulation:
|
||||
"""End-to-end simulation of the VAD loop with a fake recorder.
|
||||
|
||||
Proves auto-restart works: the silence callback must trigger transcribe →
|
||||
on_transcript → re-call rec.start(on_silence_stop=same_cb). Also covers
|
||||
the 3-strikes no-speech halt.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def fake_recorder(self, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Reset module state between tests.
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
monkeypatch.setattr(voice, "_continuous_no_speech_count", 0)
|
||||
monkeypatch.setattr(voice, "_continuous_on_transcript", None)
|
||||
monkeypatch.setattr(voice, "_continuous_on_status", None)
|
||||
monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
|
||||
|
||||
class FakeRecorder:
|
||||
_silence_threshold = 200
|
||||
_silence_duration = 3.0
|
||||
is_recording = False
|
||||
|
||||
def __init__(self):
|
||||
self.start_calls = 0
|
||||
self.last_callback = None
|
||||
self.stopped = 0
|
||||
self.cancelled = 0
|
||||
# Preset WAV path returned by stop()
|
||||
self.next_stop_wav = "/tmp/fake.wav"
|
||||
|
||||
def start(self, on_silence_stop=None):
|
||||
self.start_calls += 1
|
||||
self.last_callback = on_silence_stop
|
||||
self.is_recording = True
|
||||
|
||||
def stop(self):
|
||||
self.stopped += 1
|
||||
self.is_recording = False
|
||||
return self.next_stop_wav
|
||||
|
||||
def cancel(self):
|
||||
self.cancelled += 1
|
||||
self.is_recording = False
|
||||
|
||||
rec = FakeRecorder()
|
||||
monkeypatch.setattr(voice, "create_audio_recorder", lambda: rec)
|
||||
# Skip real file ops in the silence callback.
|
||||
monkeypatch.setattr(voice.os.path, "isfile", lambda _p: False)
|
||||
return rec
|
||||
|
||||
def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": "hello world"},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
statuses = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_status=lambda s: statuses.append(s),
|
||||
)
|
||||
|
||||
assert fake_recorder.start_calls == 1
|
||||
assert statuses == ["listening"]
|
||||
|
||||
# Simulate AudioRecorder's silence detector firing.
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert transcripts == ["hello world"]
|
||||
assert fake_recorder.start_calls == 2 # auto-restarted
|
||||
assert statuses == ["listening", "transcribing", "listening"]
|
||||
assert voice.is_continuous_active() is True
|
||||
|
||||
voice.stop_continuous()
|
||||
|
||||
def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Transcription returns no speech — fake_recorder.stop() returns the
|
||||
# path, but transcribe returns empty text, counting as silence.
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": ""},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
silent_limit_fired = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_silent_limit=lambda: silent_limit_fired.append(True),
|
||||
)
|
||||
|
||||
# Fire silence callback 3 times
|
||||
for _ in range(3):
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert transcripts == []
|
||||
assert silent_limit_fired == [True]
|
||||
assert voice.is_continuous_active() is False
|
||||
assert fake_recorder.cancelled >= 1
|
||||
|
||||
def test_stop_during_transcription_discards_restart(self, fake_recorder, monkeypatch):
|
||||
"""User hits Ctrl+B mid-transcription: the in-flight transcript must
|
||||
still fire (it's a real utterance), but the loop must NOT restart."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
stop_triggered = {"flag": False}
|
||||
|
||||
def late_transcribe(_p):
|
||||
# Simulate stop_continuous arriving while we're inside transcribe
|
||||
voice.stop_continuous()
|
||||
stop_triggered["flag"] = True
|
||||
return {"success": True, "transcript": "final word"}
|
||||
|
||||
monkeypatch.setattr(voice, "transcribe_recording", late_transcribe)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
voice.start_continuous(on_transcript=lambda t: transcripts.append(t))
|
||||
|
||||
initial_starts = fake_recorder.start_calls # 1
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert stop_triggered["flag"] is True
|
||||
# Loop is stopped — no auto-restart
|
||||
assert fake_recorder.start_calls == initial_starts
|
||||
# The in-flight transcript was suppressed because we stopped mid-flight
|
||||
assert transcripts == []
|
||||
assert voice.is_continuous_active() is False
|
||||
@@ -110,12 +110,12 @@ class TestWebServerEndpoints:
|
||||
|
||||
import hermes_state
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_cli.web_server import app, _SESSION_TOKEN
|
||||
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
|
||||
|
||||
self.client = TestClient(app)
|
||||
self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
|
||||
self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
|
||||
|
||||
def test_get_status(self):
|
||||
resp = self.client.get("/api/status")
|
||||
@@ -221,12 +221,12 @@ class TestWebServerEndpoints:
|
||||
def test_reveal_env_var(self, tmp_path):
|
||||
"""POST /api/env/reveal should return the real unredacted value."""
|
||||
from hermes_cli.config import save_env_value
|
||||
from hermes_cli.web_server import _SESSION_TOKEN
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
save_env_value("TEST_REVEAL_KEY", "super-secret-value-12345")
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "TEST_REVEAL_KEY"},
|
||||
headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
|
||||
headers={_SESSION_HEADER_NAME: _SESSION_TOKEN},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
@@ -235,11 +235,11 @@ class TestWebServerEndpoints:
|
||||
|
||||
def test_reveal_env_var_not_found(self):
|
||||
"""POST /api/env/reveal should 404 for unknown keys."""
|
||||
from hermes_cli.web_server import _SESSION_TOKEN
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "NONEXISTENT_KEY_XYZ"},
|
||||
headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
|
||||
headers={_SESSION_HEADER_NAME: _SESSION_TOKEN},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
@@ -249,7 +249,7 @@ class TestWebServerEndpoints:
|
||||
from hermes_cli.web_server import app
|
||||
from hermes_cli.config import save_env_value
|
||||
save_env_value("TEST_REVEAL_NOAUTH", "secret-value")
|
||||
# Use a fresh client WITHOUT the Authorization header
|
||||
# Use a fresh client WITHOUT the dashboard session header
|
||||
unauth_client = TestClient(app)
|
||||
resp = unauth_client.post(
|
||||
"/api/env/reveal",
|
||||
@@ -260,14 +260,47 @@ class TestWebServerEndpoints:
|
||||
def test_reveal_env_var_bad_token(self, tmp_path):
|
||||
"""POST /api/env/reveal with wrong token should return 401."""
|
||||
from hermes_cli.config import save_env_value
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME
|
||||
save_env_value("TEST_REVEAL_BADAUTH", "secret-value")
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "TEST_REVEAL_BADAUTH"},
|
||||
headers={"Authorization": "Bearer wrong-token-here"},
|
||||
headers={_SESSION_HEADER_NAME: "wrong-token-here"},
|
||||
)
|
||||
assert resp.status_code == 401
|
||||
|
||||
def test_reveal_env_var_custom_session_header_ignores_proxy_authorization(self, tmp_path):
|
||||
"""A valid dashboard session header should coexist with proxy auth."""
|
||||
from hermes_cli.config import save_env_value
|
||||
from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
|
||||
save_env_value("TEST_REVEAL_PROXY_AUTH", "secret-value")
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "TEST_REVEAL_PROXY_AUTH"},
|
||||
headers={
|
||||
_SESSION_HEADER_NAME: _SESSION_TOKEN,
|
||||
"Authorization": "Basic dXNlcjpwYXNz",
|
||||
},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["value"] == "secret-value"
|
||||
|
||||
def test_reveal_env_var_legacy_authorization_header_still_works(self, tmp_path):
|
||||
"""Keep old dashboard bundles working while the new header rolls out."""
|
||||
from hermes_cli.config import save_env_value
|
||||
from hermes_cli.web_server import _SESSION_TOKEN
|
||||
|
||||
save_env_value("TEST_REVEAL_LEGACY_AUTH", "secret-value")
|
||||
resp = self.client.post(
|
||||
"/api/env/reveal",
|
||||
json={"key": "TEST_REVEAL_LEGACY_AUTH"},
|
||||
headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 200
|
||||
|
||||
def test_session_token_endpoint_removed(self):
|
||||
"""GET /api/auth/session-token should no longer exist (token injected via HTML)."""
|
||||
resp = self.client.get("/api/auth/session-token")
|
||||
@@ -285,7 +318,7 @@ class TestWebServerEndpoints:
|
||||
"""API requests without the session token should be rejected."""
|
||||
from starlette.testclient import TestClient
|
||||
from hermes_cli.web_server import app
|
||||
# Create a client WITHOUT the Authorization header
|
||||
# Create a client WITHOUT the dashboard session header
|
||||
unauth_client = TestClient(app)
|
||||
resp = unauth_client.get("/api/env")
|
||||
assert resp.status_code == 401
|
||||
@@ -388,9 +421,9 @@ class TestConfigRoundTrip:
|
||||
from starlette.testclient import TestClient
|
||||
except ImportError:
|
||||
pytest.skip("fastapi/starlette not installed")
|
||||
from hermes_cli.web_server import app, _SESSION_TOKEN
|
||||
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
self.client = TestClient(app)
|
||||
self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
|
||||
self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
|
||||
|
||||
def test_get_config_no_internal_keys(self):
|
||||
"""GET /api/config should not expose _config_version or _model_meta."""
|
||||
@@ -524,12 +557,12 @@ class TestNewEndpoints:
|
||||
|
||||
import hermes_state
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_cli.web_server import app, _SESSION_TOKEN
|
||||
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
|
||||
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
|
||||
|
||||
self.client = TestClient(app)
|
||||
self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
|
||||
self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
|
||||
|
||||
def test_get_logs_default(self):
|
||||
resp = self.client.get("/api/logs")
|
||||
@@ -1176,9 +1209,9 @@ class TestStatusRemoteGateway:
|
||||
except ImportError:
|
||||
pytest.skip("fastapi/starlette not installed")
|
||||
|
||||
from hermes_cli.web_server import app, _SESSION_TOKEN
|
||||
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
|
||||
self.client = TestClient(app)
|
||||
self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
|
||||
self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
|
||||
|
||||
def test_status_falls_back_to_remote_probe(self, monkeypatch):
|
||||
"""When local PID check fails and remote probe succeeds, gateway shows running."""
|
||||
@@ -1256,3 +1289,391 @@ class TestStatusRemoteGateway:
|
||||
assert data["gateway_running"] is True
|
||||
assert data["gateway_pid"] is None
|
||||
assert data["gateway_state"] == "running"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dashboard theme normaliser tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNormaliseThemeDefinition:
|
||||
"""Tests for _normalise_theme_definition() — parses YAML theme files."""
|
||||
|
||||
def test_rejects_missing_name(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
assert _normalise_theme_definition({}) is None
|
||||
assert _normalise_theme_definition({"name": ""}) is None
|
||||
assert _normalise_theme_definition({"name": " "}) is None
|
||||
|
||||
def test_rejects_non_dict(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
assert _normalise_theme_definition("string") is None
|
||||
assert _normalise_theme_definition(None) is None
|
||||
assert _normalise_theme_definition([1, 2, 3]) is None
|
||||
|
||||
def test_loose_colors_shorthand(self):
|
||||
"""Bare hex strings under `colors` parse as {hex, alpha=1.0}."""
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "loose",
|
||||
"colors": {"background": "#000000", "midground": "#ffffff"},
|
||||
})
|
||||
assert result is not None
|
||||
assert result["palette"]["background"] == {"hex": "#000000", "alpha": 1.0}
|
||||
assert result["palette"]["midground"] == {"hex": "#ffffff", "alpha": 1.0}
|
||||
# foreground falls back to default (transparent white)
|
||||
assert result["palette"]["foreground"]["hex"] == "#ffffff"
|
||||
assert result["palette"]["foreground"]["alpha"] == 0.0
|
||||
|
||||
def test_full_palette_form(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "full",
|
||||
"palette": {
|
||||
"background": {"hex": "#0a1628", "alpha": 1.0},
|
||||
"midground": {"hex": "#a8d0ff", "alpha": 0.9},
|
||||
"warmGlow": "rgba(255, 0, 0, 0.5)",
|
||||
"noiseOpacity": 0.5,
|
||||
},
|
||||
})
|
||||
assert result["palette"]["background"]["hex"] == "#0a1628"
|
||||
assert result["palette"]["midground"]["alpha"] == 0.9
|
||||
assert result["palette"]["warmGlow"] == "rgba(255, 0, 0, 0.5)"
|
||||
assert result["palette"]["noiseOpacity"] == 0.5
|
||||
|
||||
def test_default_typography_applied_when_missing(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({"name": "minimal"})
|
||||
typo = result["typography"]
|
||||
assert "fontSans" in typo
|
||||
assert "fontMono" in typo
|
||||
assert typo["baseSize"] == "15px"
|
||||
assert typo["lineHeight"] == "1.55"
|
||||
assert typo["letterSpacing"] == "0"
|
||||
|
||||
def test_partial_typography_merges_with_defaults(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "partial",
|
||||
"typography": {
|
||||
"fontSans": "MyFont, sans-serif",
|
||||
"baseSize": "12px",
|
||||
},
|
||||
})
|
||||
assert result["typography"]["fontSans"] == "MyFont, sans-serif"
|
||||
assert result["typography"]["baseSize"] == "12px"
|
||||
# fontMono defaulted
|
||||
assert "monospace" in result["typography"]["fontMono"]
|
||||
|
||||
def test_layout_defaults(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({"name": "minimal"})
|
||||
assert result["layout"]["radius"] == "0.5rem"
|
||||
assert result["layout"]["density"] == "comfortable"
|
||||
|
||||
def test_invalid_density_falls_back(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "bad",
|
||||
"layout": {"density": "ultra-spacious"},
|
||||
})
|
||||
assert result["layout"]["density"] == "comfortable"
|
||||
|
||||
def test_valid_densities_accepted(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
for d in ("compact", "comfortable", "spacious"):
|
||||
r = _normalise_theme_definition({"name": "x", "layout": {"density": d}})
|
||||
assert r["layout"]["density"] == d
|
||||
|
||||
def test_color_overrides_filter_unknown_keys(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({
|
||||
"name": "o",
|
||||
"colorOverrides": {
|
||||
"card": "#123456",
|
||||
"fakeToken": "#abcdef",
|
||||
"primary": 42, # non-string rejected
|
||||
"destructive": "#ff0000",
|
||||
},
|
||||
})
|
||||
assert result["colorOverrides"] == {
|
||||
"card": "#123456",
|
||||
"destructive": "#ff0000",
|
||||
}
|
||||
|
||||
def test_color_overrides_omitted_when_empty(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({"name": "x"})
|
||||
assert "colorOverrides" not in result
|
||||
|
||||
def test_alpha_clamped_to_unit_range(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "c",
|
||||
"palette": {"background": {"hex": "#000", "alpha": 99.5}},
|
||||
})
|
||||
assert r["palette"]["background"]["alpha"] == 1.0
|
||||
r2 = _normalise_theme_definition({
|
||||
"name": "c",
|
||||
"palette": {"background": {"hex": "#000", "alpha": -5}},
|
||||
})
|
||||
assert r2["palette"]["background"]["alpha"] == 0.0
|
||||
|
||||
def test_invalid_alpha_uses_default(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "c",
|
||||
"palette": {"background": {"hex": "#000", "alpha": "not a number"}},
|
||||
})
|
||||
assert r["palette"]["background"]["alpha"] == 1.0
|
||||
|
||||
|
||||
class TestDiscoverUserThemes:
|
||||
"""Tests for _discover_user_themes() — scans ~/.hermes/dashboard-themes/."""
|
||||
|
||||
def test_returns_empty_when_dir_missing(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
from hermes_cli import web_server
|
||||
assert web_server._discover_user_themes() == []
|
||||
|
||||
def test_loads_and_normalises_yaml(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
themes_dir = tmp_path / "dashboard-themes"
|
||||
themes_dir.mkdir()
|
||||
(themes_dir / "ocean.yaml").write_text(
|
||||
"name: ocean\n"
|
||||
"label: Ocean\n"
|
||||
"palette:\n"
|
||||
" background:\n"
|
||||
" hex: \"#0a1628\"\n"
|
||||
" alpha: 1.0\n"
|
||||
"layout:\n"
|
||||
" density: spacious\n"
|
||||
)
|
||||
from hermes_cli import web_server
|
||||
results = web_server._discover_user_themes()
|
||||
assert len(results) == 1
|
||||
assert results[0]["name"] == "ocean"
|
||||
assert results[0]["label"] == "Ocean"
|
||||
assert results[0]["palette"]["background"]["hex"] == "#0a1628"
|
||||
assert results[0]["layout"]["density"] == "spacious"
|
||||
# defaults filled in
|
||||
assert "fontSans" in results[0]["typography"]
|
||||
|
||||
def test_malformed_yaml_skipped(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
themes_dir = tmp_path / "dashboard-themes"
|
||||
themes_dir.mkdir()
|
||||
(themes_dir / "bad.yaml").write_text("::: not valid yaml :::\n\tindent wrong")
|
||||
(themes_dir / "nameless.yaml").write_text("label: No Name Here\n")
|
||||
(themes_dir / "ok.yaml").write_text("name: ok\n")
|
||||
from hermes_cli import web_server
|
||||
results = web_server._discover_user_themes()
|
||||
names = [r["name"] for r in results]
|
||||
assert "ok" in names
|
||||
assert "bad" not in names # malformed YAML
|
||||
assert len(results) == 1 # only the valid one
|
||||
|
||||
|
||||
class TestNormaliseThemeExtensions:
|
||||
"""Tests for the extended normaliser fields (assets, customCSS,
|
||||
componentStyles, layoutVariant) — the surfaces themes use to reskin
|
||||
the dashboard without shipping code."""
|
||||
|
||||
def test_layout_variant_defaults_to_standard(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
result = _normalise_theme_definition({"name": "t"})
|
||||
assert result["layoutVariant"] == "standard"
|
||||
|
||||
def test_layout_variant_accepts_known_values(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
for variant in ("standard", "cockpit", "tiled"):
|
||||
r = _normalise_theme_definition({"name": "t", "layoutVariant": variant})
|
||||
assert r["layoutVariant"] == variant
|
||||
|
||||
def test_layout_variant_rejects_unknown(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({"name": "t", "layoutVariant": "warship"})
|
||||
assert r["layoutVariant"] == "standard"
|
||||
r2 = _normalise_theme_definition({"name": "t", "layoutVariant": 12})
|
||||
assert r2["layoutVariant"] == "standard"
|
||||
|
||||
def test_assets_named_slots_passthrough(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"assets": {
|
||||
"bg": "https://example.com/bg.jpg",
|
||||
"hero": "linear-gradient(180deg, red, blue)",
|
||||
"crest": "/ds-assets/crest.svg",
|
||||
"logo": " ", # whitespace-only — dropped
|
||||
"notAKnownKey": "ignored",
|
||||
},
|
||||
})
|
||||
assert r["assets"]["bg"] == "https://example.com/bg.jpg"
|
||||
assert r["assets"]["hero"].startswith("linear-gradient")
|
||||
assert r["assets"]["crest"] == "/ds-assets/crest.svg"
|
||||
assert "logo" not in r["assets"] # whitespace-only rejected
|
||||
assert "notAKnownKey" not in r["assets"] # unknown slot ignored
|
||||
|
||||
def test_assets_custom_block(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"assets": {
|
||||
"custom": {
|
||||
"scan-lines": "/img/scan.png",
|
||||
"my_overlay": "/img/ov.png",
|
||||
"bad key!": "x", # non-alnum key — rejected
|
||||
"empty": "", # empty value — rejected
|
||||
},
|
||||
},
|
||||
})
|
||||
assert r["assets"]["custom"] == {
|
||||
"scan-lines": "/img/scan.png",
|
||||
"my_overlay": "/img/ov.png",
|
||||
}
|
||||
|
||||
def test_assets_absent_means_no_field(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({"name": "t"})
|
||||
assert "assets" not in r
|
||||
|
||||
def test_custom_css_passthrough_and_capped(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
# Small CSS passes through verbatim.
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"customCSS": "body { color: red; }",
|
||||
})
|
||||
assert r["customCSS"] == "body { color: red; }"
|
||||
|
||||
# 40 KiB of CSS gets clipped to the 32 KiB cap.
|
||||
huge = "/* x */ " * (40 * 1024 // 8 + 10)
|
||||
r2 = _normalise_theme_definition({"name": "t", "customCSS": huge})
|
||||
assert len(r2["customCSS"]) <= 32 * 1024
|
||||
|
||||
def test_custom_css_empty_dropped(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
for val in ("", " \n\t", None):
|
||||
r = _normalise_theme_definition({"name": "t", "customCSS": val})
|
||||
assert "customCSS" not in r
|
||||
|
||||
def test_component_styles_per_bucket(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"componentStyles": {
|
||||
"card": {
|
||||
"clipPath": "polygon(0 0, 100% 0, 100% 100%, 0 100%)",
|
||||
"boxShadow": "inset 0 0 0 1px red",
|
||||
"bad prop!": "ignored", # non-alnum prop rejected
|
||||
},
|
||||
"header": {"background": "linear-gradient(red, blue)"},
|
||||
"rogueBucket": {"foo": "bar"}, # not a known bucket — rejected
|
||||
},
|
||||
})
|
||||
assert r["componentStyles"]["card"] == {
|
||||
"clipPath": "polygon(0 0, 100% 0, 100% 100%, 0 100%)",
|
||||
"boxShadow": "inset 0 0 0 1px red",
|
||||
}
|
||||
assert r["componentStyles"]["header"]["background"].startswith("linear-gradient")
|
||||
assert "rogueBucket" not in r["componentStyles"]
|
||||
|
||||
def test_component_styles_empty_buckets_dropped(self):
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"componentStyles": {
|
||||
"card": {}, # empty — dropped entirely
|
||||
"header": {"bad prop!": "ignored"}, # all props rejected — bucket dropped
|
||||
"footer": {"background": "black"},
|
||||
},
|
||||
})
|
||||
assert "card" not in r.get("componentStyles", {})
|
||||
assert "header" not in r.get("componentStyles", {})
|
||||
assert r["componentStyles"]["footer"]["background"] == "black"
|
||||
|
||||
def test_component_styles_accepts_numeric_values(self):
|
||||
"""Numeric values (e.g. opacity: 0.8) are coerced to strings."""
|
||||
from hermes_cli.web_server import _normalise_theme_definition
|
||||
r = _normalise_theme_definition({
|
||||
"name": "t",
|
||||
"componentStyles": {"card": {"opacity": 0.8, "zIndex": 5}},
|
||||
})
|
||||
assert r["componentStyles"]["card"] == {"opacity": "0.8", "zIndex": "5"}
|
||||
|
||||
|
||||
class TestDashboardPluginManifestExtensions:
|
||||
"""Tests for the extended plugin manifest fields (tab.override,
|
||||
tab.hidden, slots) read by _discover_dashboard_plugins()."""
|
||||
|
||||
def _write_plugin(self, tmp_path, name, manifest):
|
||||
import json
|
||||
plug_dir = tmp_path / "plugins" / name / "dashboard"
|
||||
plug_dir.mkdir(parents=True)
|
||||
(plug_dir / "manifest.json").write_text(json.dumps(manifest))
|
||||
return plug_dir
|
||||
|
||||
def test_override_and_hidden_carried_through(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self._write_plugin(tmp_path, "skin-home", {
|
||||
"name": "skin-home",
|
||||
"label": "Skin Home",
|
||||
"tab": {"path": "/skin-home", "override": "/", "hidden": True},
|
||||
"slots": ["sidebar", "header-left"],
|
||||
"entry": "dist/index.js",
|
||||
})
|
||||
from hermes_cli import web_server
|
||||
# Bust the process-level cache so the test plugin is picked up.
|
||||
web_server._dashboard_plugins_cache = None
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "skin-home")
|
||||
assert entry["tab"]["override"] == "/"
|
||||
assert entry["tab"]["hidden"] is True
|
||||
assert entry["slots"] == ["sidebar", "header-left"]
|
||||
|
||||
def test_override_requires_leading_slash(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self._write_plugin(tmp_path, "bad-override", {
|
||||
"name": "bad-override",
|
||||
"label": "Bad",
|
||||
"tab": {"path": "/bad", "override": "no-leading-slash"},
|
||||
"entry": "dist/index.js",
|
||||
})
|
||||
from hermes_cli import web_server
|
||||
web_server._dashboard_plugins_cache = None
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "bad-override")
|
||||
assert "override" not in entry["tab"]
|
||||
|
||||
def test_slots_default_empty(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self._write_plugin(tmp_path, "no-slots", {
|
||||
"name": "no-slots",
|
||||
"label": "No Slots",
|
||||
"tab": {"path": "/no-slots"},
|
||||
"entry": "dist/index.js",
|
||||
})
|
||||
from hermes_cli import web_server
|
||||
web_server._dashboard_plugins_cache = None
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "no-slots")
|
||||
assert entry["slots"] == []
|
||||
assert "hidden" not in entry["tab"]
|
||||
assert "override" not in entry["tab"]
|
||||
|
||||
def test_slots_filters_non_string_entries(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
self._write_plugin(tmp_path, "mixed-slots", {
|
||||
"name": "mixed-slots",
|
||||
"label": "Mixed",
|
||||
"tab": {"path": "/mixed-slots"},
|
||||
"slots": ["sidebar", "", 42, None, "header-right"],
|
||||
"entry": "dist/index.js",
|
||||
})
|
||||
from hermes_cli import web_server
|
||||
web_server._dashboard_plugins_cache = None
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "mixed-slots")
|
||||
assert entry["slots"] == ["sidebar", "header-right"]
|
||||
|
||||
@@ -287,10 +287,10 @@ class TestXiaomiAuxiliary:
|
||||
assert "xiaomi" not in _API_KEY_PROVIDER_AUX_MODELS
|
||||
|
||||
def test_vision_model_override(self):
|
||||
"""Xiaomi vision tasks should use mimo-v2-omni (multimodal), not the main model."""
|
||||
"""Xiaomi vision tasks should use mimo-v2.5 (multimodal), not the main model."""
|
||||
from agent.auxiliary_client import _PROVIDER_VISION_MODELS
|
||||
assert "xiaomi" in _PROVIDER_VISION_MODELS
|
||||
assert _PROVIDER_VISION_MODELS["xiaomi"] == "mimo-v2-omni"
|
||||
assert _PROVIDER_VISION_MODELS["xiaomi"] == "mimo-v2.5"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for xAI image generation provider."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _fake_api_key(monkeypatch):
|
||||
"""Ensure XAI_API_KEY is set for all tests."""
|
||||
monkeypatch.setenv("XAI_API_KEY", "test-key-12345")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider class tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestXAIImageGenProvider:
|
||||
def test_name(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
assert provider.name == "xai"
|
||||
|
||||
def test_display_name(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
assert provider.display_name == "xAI (Grok)"
|
||||
|
||||
def test_is_available_with_key(self, monkeypatch):
|
||||
monkeypatch.setenv("XAI_API_KEY", "sk-xxx")
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
assert provider.is_available() is True
|
||||
|
||||
def test_is_available_without_key(self, monkeypatch):
|
||||
monkeypatch.delenv("XAI_API_KEY", raising=False)
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
assert provider.is_available() is False
|
||||
|
||||
def test_list_models(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
models = provider.list_models()
|
||||
assert len(models) >= 1
|
||||
assert models[0]["id"] == "grok-imagine-image"
|
||||
|
||||
def test_default_model(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
assert provider.default_model() == "grok-imagine-image"
|
||||
|
||||
def test_get_setup_schema(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
schema = provider.get_setup_schema()
|
||||
assert schema["name"] == "xAI (Grok)"
|
||||
assert schema["badge"] == "paid"
|
||||
assert len(schema["env_vars"]) == 1
|
||||
assert schema["env_vars"][0]["key"] == "XAI_API_KEY"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestConfig:
|
||||
def test_default_model(self):
|
||||
from plugins.image_gen.xai import _resolve_model
|
||||
|
||||
model_id, meta = _resolve_model()
|
||||
assert model_id == "grok-imagine-image"
|
||||
|
||||
def test_default_resolution(self):
|
||||
from plugins.image_gen.xai import _resolve_resolution
|
||||
|
||||
assert _resolve_resolution() == "1k"
|
||||
|
||||
def test_custom_model(self, monkeypatch):
|
||||
monkeypatch.setenv("XAI_IMAGE_MODEL", "grok-imagine-image")
|
||||
from plugins.image_gen.xai import _resolve_model
|
||||
|
||||
model_id, _ = _resolve_model()
|
||||
assert model_id == "grok-imagine-image"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Generate tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGenerate:
|
||||
def test_missing_api_key(self, monkeypatch):
|
||||
monkeypatch.delenv("XAI_API_KEY", raising=False)
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(prompt="test")
|
||||
assert result["success"] is False
|
||||
assert "XAI_API_KEY" in result["error"]
|
||||
|
||||
def test_successful_generation(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {
|
||||
"data": [{"b64_json": "dGVzdC1pbWFnZS1kYXRh"}], # base64 "test-image-data"
|
||||
}
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp):
|
||||
with patch("plugins.image_gen.xai.save_b64_image", return_value="/tmp/test.png"):
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(prompt="A cat playing piano")
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["image"] == "/tmp/test.png"
|
||||
assert result["provider"] == "xai"
|
||||
assert result["model"] == "grok-imagine-image"
|
||||
|
||||
def test_successful_url_response(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {
|
||||
"data": [{"url": "https://xai.image/result.png"}],
|
||||
}
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp):
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(prompt="A cat playing piano")
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["image"] == "https://xai.image/result.png"
|
||||
|
||||
def test_api_error(self):
|
||||
import requests as req_lib
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 401
|
||||
mock_resp.text = "Unauthorized"
|
||||
mock_resp.json.return_value = {"error": {"message": "Invalid API key"}}
|
||||
mock_resp.raise_for_status.side_effect = req_lib.HTTPError(response=mock_resp)
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp):
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(prompt="test")
|
||||
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "api_error"
|
||||
|
||||
def test_timeout(self):
|
||||
import requests as req_lib
|
||||
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", side_effect=req_lib.Timeout()):
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(prompt="test")
|
||||
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "timeout"
|
||||
|
||||
def test_empty_response(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {"data": []}
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp):
|
||||
provider = XAIImageGenProvider()
|
||||
result = provider.generate(prompt="test")
|
||||
|
||||
assert result["success"] is False
|
||||
assert result["error_type"] == "empty_response"
|
||||
|
||||
def test_auth_header(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
mock_resp.json.return_value = {
|
||||
"data": [{"url": "https://xai.image/test.png"}],
|
||||
}
|
||||
|
||||
with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post:
|
||||
provider = XAIImageGenProvider()
|
||||
provider.generate(prompt="test")
|
||||
|
||||
call_args = mock_post.call_args
|
||||
headers = call_args.kwargs.get("headers") or call_args[1].get("headers")
|
||||
assert "Bearer test-key-12345" in headers["Authorization"]
|
||||
assert "Hermes-Agent" in headers["User-Agent"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registration test
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRegistration:
|
||||
def test_register(self):
|
||||
from plugins.image_gen.xai import XAIImageGenProvider, register
|
||||
|
||||
mock_ctx = MagicMock()
|
||||
register(mock_ctx)
|
||||
mock_ctx.register_image_gen_provider.assert_called_once()
|
||||
provider = mock_ctx.register_image_gen_provider.call_args[0][0]
|
||||
assert isinstance(provider, XAIImageGenProvider)
|
||||
assert provider.name == "xai"
|
||||
@@ -0,0 +1,65 @@
|
||||
"""Tests for agent.api_max_retries config surface.
|
||||
|
||||
Closes #11616 — make the hardcoded ``max_retries = 3`` in the agent's API
|
||||
retry loop user-configurable so fallback-provider setups can fail over
|
||||
faster on flaky primaries instead of burning ~3x180s on the same stall.
|
||||
"""
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def _make_agent(api_max_retries=None):
|
||||
"""Build an AIAgent with a mocked config.load_config that returns a
|
||||
config tree containing the given agent.api_max_retries (or default)."""
|
||||
cfg = {"agent": {}}
|
||||
if api_max_retries is not None:
|
||||
cfg["agent"]["api_max_retries"] = api_max_retries
|
||||
|
||||
with patch("run_agent.OpenAI"), \
|
||||
patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
return AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model="test/model",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
|
||||
def test_default_api_max_retries_is_three():
|
||||
"""No config override → legacy default of 3 retries preserved."""
|
||||
agent = _make_agent()
|
||||
assert agent._api_max_retries == 3
|
||||
|
||||
|
||||
def test_api_max_retries_honors_config_override():
|
||||
"""Setting agent.api_max_retries in config propagates to the agent."""
|
||||
agent = _make_agent(api_max_retries=1)
|
||||
assert agent._api_max_retries == 1
|
||||
|
||||
agent2 = _make_agent(api_max_retries=5)
|
||||
assert agent2._api_max_retries == 5
|
||||
|
||||
|
||||
def test_api_max_retries_clamps_below_one_to_one():
|
||||
"""0 or negative values would disable the retry loop entirely
|
||||
(the ``while retry_count < max_retries`` guard would never execute),
|
||||
so clamp to 1 = single attempt, no retry."""
|
||||
agent = _make_agent(api_max_retries=0)
|
||||
assert agent._api_max_retries == 1
|
||||
|
||||
agent2 = _make_agent(api_max_retries=-3)
|
||||
assert agent2._api_max_retries == 1
|
||||
|
||||
|
||||
def test_api_max_retries_falls_back_on_invalid_value():
|
||||
"""Garbage values in config don't crash agent init — fall back to 3."""
|
||||
agent = _make_agent(api_max_retries="not-a-number")
|
||||
assert agent._api_max_retries == 3
|
||||
|
||||
agent2 = _make_agent(api_max_retries=None)
|
||||
# None with dict.get default fires → default(3), then int(None) raises
|
||||
# TypeError → except branch sets to 3.
|
||||
assert agent2._api_max_retries == 3
|
||||
@@ -44,6 +44,14 @@ def _make_tool_defs(*names: str) -> list:
|
||||
]
|
||||
|
||||
|
||||
def test_is_destructive_command_treats_cp_as_mutating():
|
||||
assert run_agent._is_destructive_command("cp .env.local .env") is True
|
||||
|
||||
|
||||
def test_is_destructive_command_treats_install_as_mutating():
|
||||
assert run_agent._is_destructive_command("install template.env .env") is True
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def agent():
|
||||
"""Minimal AIAgent with mocked OpenAI client and tool loading."""
|
||||
@@ -2567,6 +2575,89 @@ class TestRunConversation:
|
||||
assert result["final_response"] == "Recovered after compression"
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_minimax_delta_overflow_keeps_known_context_length(self, agent):
|
||||
"""MiniMax reports overflow deltas like 'limit (2013)' without the real window.
|
||||
|
||||
Keep the known 204,800-token window and compress instead of probing down
|
||||
to the generic 128K fallback tier.
|
||||
"""
|
||||
self._setup_agent(agent)
|
||||
agent.provider = "minimax"
|
||||
agent.model = "MiniMax-M2.7-highspeed"
|
||||
agent.base_url = "https://api.minimax.io/anthropic"
|
||||
agent.context_compressor.context_length = 204_800
|
||||
agent.context_compressor.threshold_tokens = int(
|
||||
agent.context_compressor.context_length * agent.context_compressor.threshold_percent
|
||||
)
|
||||
|
||||
err_400 = Exception(
|
||||
"HTTP 400: invalid params, context window exceeds limit (2013)"
|
||||
)
|
||||
err_400.status_code = 400
|
||||
ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
|
||||
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
||||
prefill = [
|
||||
{"role": "user", "content": "previous question"},
|
||||
{"role": "assistant", "content": "previous answer"},
|
||||
]
|
||||
|
||||
with (
|
||||
patch.object(agent, "_compress_context") as mock_compress,
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
mock_compress.return_value = (
|
||||
[{"role": "user", "content": "hello"}],
|
||||
"compressed system prompt",
|
||||
)
|
||||
result = agent.run_conversation("hello", conversation_history=prefill)
|
||||
|
||||
mock_compress.assert_called_once()
|
||||
assert agent.context_compressor.context_length == 204_800
|
||||
assert agent.context_compressor._context_probed is False
|
||||
assert result["final_response"] == "Recovered after compression"
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_non_minimax_delta_overflow_still_probes_down(self, agent):
|
||||
"""Non-MiniMax providers should keep the generic probe-down behavior."""
|
||||
self._setup_agent(agent)
|
||||
agent.provider = "openrouter"
|
||||
agent.model = "some/unknown-model"
|
||||
agent.base_url = "https://openrouter.ai/api/v1"
|
||||
agent.context_compressor.context_length = 200_000
|
||||
agent.context_compressor.threshold_tokens = int(
|
||||
agent.context_compressor.context_length * agent.context_compressor.threshold_percent
|
||||
)
|
||||
|
||||
err_400 = Exception(
|
||||
"HTTP 400: invalid params, context window exceeds limit (2013)"
|
||||
)
|
||||
err_400.status_code = 400
|
||||
ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
|
||||
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
|
||||
prefill = [
|
||||
{"role": "user", "content": "previous question"},
|
||||
{"role": "assistant", "content": "previous answer"},
|
||||
]
|
||||
|
||||
with (
|
||||
patch.object(agent, "_compress_context") as mock_compress,
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
mock_compress.return_value = (
|
||||
[{"role": "user", "content": "hello"}],
|
||||
"compressed system prompt",
|
||||
)
|
||||
result = agent.run_conversation("hello", conversation_history=prefill)
|
||||
|
||||
mock_compress.assert_called_once()
|
||||
assert agent.context_compressor.context_length == 128_000
|
||||
assert result["final_response"] == "Recovered after compression"
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_length_finish_reason_requests_continuation(self, agent):
|
||||
"""Normal truncation (partial real content) triggers continuation."""
|
||||
self._setup_agent(agent)
|
||||
|
||||
@@ -134,6 +134,31 @@ class TestCoerceValue:
|
||||
"""A non-numeric string in [number, string] should stay a string."""
|
||||
assert _coerce_value("hello", ["number", "string"]) == "hello"
|
||||
|
||||
def test_array_type_parsed_from_json_string(self):
|
||||
"""Stringified JSON arrays are parsed into native lists."""
|
||||
assert _coerce_value('["a", "b"]', "array") == ["a", "b"]
|
||||
assert _coerce_value("[1, 2, 3]", "array") == [1, 2, 3]
|
||||
|
||||
def test_object_type_parsed_from_json_string(self):
|
||||
"""Stringified JSON objects are parsed into native dicts."""
|
||||
assert _coerce_value('{"k": "v"}', "object") == {"k": "v"}
|
||||
assert _coerce_value('{"n": 1}', "object") == {"n": 1}
|
||||
|
||||
def test_array_invalid_json_preserved(self):
|
||||
"""Unparseable strings are returned unchanged."""
|
||||
assert _coerce_value("not-json", "array") == "not-json"
|
||||
|
||||
def test_object_invalid_json_preserved(self):
|
||||
assert _coerce_value("not-json", "object") == "not-json"
|
||||
|
||||
def test_array_type_wrong_shape_preserved(self):
|
||||
"""A JSON object passed for an 'array' slot is preserved as a string."""
|
||||
assert _coerce_value('{"k": "v"}', "array") == '{"k": "v"}'
|
||||
|
||||
def test_object_type_wrong_shape_preserved(self):
|
||||
"""A JSON array passed for an 'object' slot is preserved as a string."""
|
||||
assert _coerce_value('["a"]', "object") == '["a"]'
|
||||
|
||||
|
||||
# ── Full coerce_tool_args with registry ───────────────────────────────────
|
||||
|
||||
@@ -212,6 +237,32 @@ class TestCoerceToolArgs:
|
||||
assert result["items"] == [1, 2, 3]
|
||||
assert result["config"] == {"key": "val"}
|
||||
|
||||
def test_coerces_stringified_array_arg(self):
|
||||
"""Regression for #3947 — MCP servers using z.array() expect lists, not strings."""
|
||||
schema = self._mock_schema({
|
||||
"messageIds": {"type": "array", "items": {"type": "string"}},
|
||||
})
|
||||
with patch("model_tools.registry.get_schema", return_value=schema):
|
||||
args = {"messageIds": '["abc", "def"]'}
|
||||
result = coerce_tool_args("test_tool", args)
|
||||
assert result["messageIds"] == ["abc", "def"]
|
||||
|
||||
def test_coerces_stringified_object_arg(self):
|
||||
"""Stringified JSON objects get parsed into dicts."""
|
||||
schema = self._mock_schema({"config": {"type": "object"}})
|
||||
with patch("model_tools.registry.get_schema", return_value=schema):
|
||||
args = {"config": '{"max": 50}'}
|
||||
result = coerce_tool_args("test_tool", args)
|
||||
assert result["config"] == {"max": 50}
|
||||
|
||||
def test_invalid_json_array_preserved_as_string(self):
|
||||
"""If the string isn't valid JSON, pass it through — let the tool decide."""
|
||||
schema = self._mock_schema({"items": {"type": "array"}})
|
||||
with patch("model_tools.registry.get_schema", return_value=schema):
|
||||
args = {"items": "not-json"}
|
||||
result = coerce_tool_args("test_tool", args)
|
||||
assert result["items"] == "not-json"
|
||||
|
||||
def test_extra_args_without_schema_left_alone(self):
|
||||
"""Args not in the schema properties are not touched."""
|
||||
schema = self._mock_schema({"limit": {"type": "integer"}})
|
||||
|
||||
@@ -200,8 +200,8 @@ class TestToolsetConsistency:
|
||||
def test_hermes_platforms_share_core_tools(self):
|
||||
"""All hermes-* platform toolsets share the same core tools.
|
||||
|
||||
Platform-specific additions (e.g. ``discord_server`` on
|
||||
hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
|
||||
Platform-specific additions (e.g. ``discord`` / ``discord_admin``
|
||||
on hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
|
||||
the invariant is that the core set is identical across platforms.
|
||||
"""
|
||||
platforms = ["hermes-cli", "hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant"]
|
||||
|
||||
@@ -434,6 +434,76 @@ class TestSensitiveRedirectPattern:
|
||||
assert dangerous is False
|
||||
assert key is None
|
||||
|
||||
def test_redirect_to_local_dotenv_requires_approval(self):
|
||||
dangerous, key, desc = detect_dangerous_command("echo TOKEN=x > .env")
|
||||
assert dangerous is True
|
||||
assert key is not None
|
||||
assert "project env/config" in desc.lower()
|
||||
|
||||
def test_redirect_to_nested_config_yaml_requires_approval(self):
|
||||
dangerous, key, desc = detect_dangerous_command("echo mode: prod > deploy/config.yaml")
|
||||
assert dangerous is True
|
||||
assert key is not None
|
||||
assert "project env/config" in desc.lower()
|
||||
|
||||
def test_redirect_from_local_dotenv_source_is_safe(self):
|
||||
dangerous, key, desc = detect_dangerous_command("cat .env > backup.txt")
|
||||
assert dangerous is False
|
||||
assert key is None
|
||||
assert desc is None
|
||||
|
||||
|
||||
class TestProjectSensitiveCopyPattern:
|
||||
def test_cp_to_local_dotenv_requires_approval(self):
|
||||
dangerous, key, desc = detect_dangerous_command("cp .env.local .env")
|
||||
assert dangerous is True
|
||||
assert key is not None
|
||||
assert "project env/config" in desc.lower()
|
||||
|
||||
def test_cp_absolute_path_to_dotenv_requires_approval(self):
|
||||
# Regression: the real-world bug report was `cp /opt/data/.env.local /opt/data/.env`.
|
||||
# The regex must cover absolute paths, not just `./` / bare relative paths.
|
||||
dangerous, key, desc = detect_dangerous_command(
|
||||
"cp /opt/data/.env.local /opt/data/.env"
|
||||
)
|
||||
assert dangerous is True
|
||||
assert key is not None
|
||||
assert "project env/config" in desc.lower()
|
||||
|
||||
def test_redirect_absolute_path_to_dotenv_requires_approval(self):
|
||||
dangerous, key, desc = detect_dangerous_command(
|
||||
"cat /opt/data/.env.local > /opt/data/.env"
|
||||
)
|
||||
assert dangerous is True
|
||||
assert key is not None
|
||||
assert "project env/config" in desc.lower()
|
||||
|
||||
def test_mv_to_nested_config_yaml_requires_approval(self):
|
||||
dangerous, key, desc = detect_dangerous_command("mv tmp/generated.yaml config/config.yaml")
|
||||
assert dangerous is True
|
||||
assert key is not None
|
||||
assert "project env/config" in desc.lower()
|
||||
|
||||
def test_install_to_dotenv_requires_approval(self):
|
||||
dangerous, key, desc = detect_dangerous_command("install -m 600 template.env .env.production")
|
||||
assert dangerous is True
|
||||
assert key is not None
|
||||
assert "project env/config" in desc.lower()
|
||||
|
||||
def test_cp_from_config_yaml_source_is_safe(self):
|
||||
dangerous, key, desc = detect_dangerous_command("cp config.yaml backup.yaml")
|
||||
assert dangerous is False
|
||||
assert key is None
|
||||
assert desc is None
|
||||
|
||||
|
||||
class TestProjectSensitiveTeePattern:
|
||||
def test_tee_to_local_dotenv_requires_approval(self):
|
||||
dangerous, key, desc = detect_dangerous_command("printenv | tee .env.local")
|
||||
assert dangerous is True
|
||||
assert key is not None
|
||||
assert "project env/config" in desc.lower()
|
||||
|
||||
|
||||
class TestPatternKeyUniqueness:
|
||||
"""Bug: pattern_key is derived by splitting on \\b and taking [1], so
|
||||
@@ -836,4 +906,3 @@ class TestChmodExecuteCombo:
|
||||
cmd = "chmod +x script.sh"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
|
||||
@@ -0,0 +1,563 @@
|
||||
"""Integration tests for tools.browser_supervisor.
|
||||
|
||||
Exercises the supervisor end-to-end against a real local Chrome
|
||||
(``--remote-debugging-port``). Skipped when Chrome is not installed
|
||||
— these are the tests that actually verify the CDP wire protocol
|
||||
works, since mock-CDP unit tests can only prove the happy paths we
|
||||
thought to model.
|
||||
|
||||
Run manually:
|
||||
scripts/run_tests.sh tests/tools/test_browser_supervisor.py
|
||||
|
||||
Automated: skipped in CI unless ``HERMES_E2E_BROWSER=1`` is set.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not shutil.which("google-chrome") and not shutil.which("chromium"),
|
||||
reason="Chrome/Chromium not installed",
|
||||
)
|
||||
|
||||
|
||||
def _find_chrome() -> str:
|
||||
for candidate in ("google-chrome", "chromium", "chromium-browser"):
|
||||
path = shutil.which(candidate)
|
||||
if path:
|
||||
return path
|
||||
pytest.skip("no Chrome binary found")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def chrome_cdp(worker_id):
|
||||
"""Start a headless Chrome with --remote-debugging-port, yield its WS URL.
|
||||
|
||||
Uses a unique port per xdist worker to avoid cross-worker collisions.
|
||||
Always launches with ``--site-per-process`` so cross-origin iframes
|
||||
become real OOPIFs (needed by the iframe interaction tests).
|
||||
"""
|
||||
import socket
|
||||
|
||||
# xdist worker_id is "master" in single-process mode or "gw0".."gwN" otherwise.
|
||||
if worker_id == "master":
|
||||
port_offset = 0
|
||||
else:
|
||||
port_offset = int(worker_id.lstrip("gw"))
|
||||
port = 9225 + port_offset
|
||||
profile = tempfile.mkdtemp(prefix="hermes-supervisor-test-")
|
||||
proc = subprocess.Popen(
|
||||
[
|
||||
_find_chrome(),
|
||||
f"--remote-debugging-port={port}",
|
||||
f"--user-data-dir={profile}",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--headless=new",
|
||||
"--disable-gpu",
|
||||
"--site-per-process", # force OOPIFs for cross-origin iframes
|
||||
],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
ws_url = None
|
||||
deadline = time.monotonic() + 15
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
import urllib.request
|
||||
with urllib.request.urlopen(
|
||||
f"http://127.0.0.1:{port}/json/version", timeout=1
|
||||
) as r:
|
||||
info = json.loads(r.read().decode())
|
||||
ws_url = info["webSocketDebuggerUrl"]
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(0.25)
|
||||
if ws_url is None:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=5)
|
||||
shutil.rmtree(profile, ignore_errors=True)
|
||||
pytest.skip("Chrome didn't expose CDP in time")
|
||||
|
||||
yield ws_url, port
|
||||
|
||||
proc.terminate()
|
||||
try:
|
||||
proc.wait(timeout=3)
|
||||
except Exception:
|
||||
proc.kill()
|
||||
shutil.rmtree(profile, ignore_errors=True)
|
||||
|
||||
|
||||
def _test_page_url() -> str:
|
||||
html = """<!doctype html>
|
||||
<html><head><title>Supervisor pytest</title></head><body>
|
||||
<h1>Supervisor pytest</h1>
|
||||
<iframe id="inner" srcdoc="<body><h2>frame-marker</h2></body>" width="400" height="100"></iframe>
|
||||
</body></html>"""
|
||||
return "data:text/html;base64," + base64.b64encode(html.encode()).decode()
|
||||
|
||||
|
||||
def _fire_on_page(cdp_url: str, expression: str) -> None:
|
||||
"""Navigate the first page target to a data URL and fire `expression`."""
|
||||
import asyncio
|
||||
import websockets as _ws_mod
|
||||
|
||||
async def run():
|
||||
async with _ws_mod.connect(cdp_url, max_size=50 * 1024 * 1024) as ws:
|
||||
next_id = [1]
|
||||
|
||||
async def call(method, params=None, session_id=None):
|
||||
cid = next_id[0]
|
||||
next_id[0] += 1
|
||||
p = {"id": cid, "method": method}
|
||||
if params:
|
||||
p["params"] = params
|
||||
if session_id:
|
||||
p["sessionId"] = session_id
|
||||
await ws.send(json.dumps(p))
|
||||
async for raw in ws:
|
||||
m = json.loads(raw)
|
||||
if m.get("id") == cid:
|
||||
return m
|
||||
|
||||
targets = (await call("Target.getTargets"))["result"]["targetInfos"]
|
||||
page = next(t for t in targets if t.get("type") == "page")
|
||||
attach = await call(
|
||||
"Target.attachToTarget", {"targetId": page["targetId"], "flatten": True}
|
||||
)
|
||||
sid = attach["result"]["sessionId"]
|
||||
await call("Page.navigate", {"url": _test_page_url()}, session_id=sid)
|
||||
await asyncio.sleep(1.5) # let the page load
|
||||
await call(
|
||||
"Runtime.evaluate",
|
||||
{"expression": expression, "returnByValue": True},
|
||||
session_id=sid,
|
||||
)
|
||||
|
||||
asyncio.run(run())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def supervisor_registry():
|
||||
"""Yield the global registry and tear down any supervisors after the test."""
|
||||
from tools.browser_supervisor import SUPERVISOR_REGISTRY
|
||||
|
||||
yield SUPERVISOR_REGISTRY
|
||||
SUPERVISOR_REGISTRY.stop_all()
|
||||
|
||||
|
||||
def _wait_for_dialog(supervisor, timeout: float = 5.0):
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
snap = supervisor.snapshot()
|
||||
if snap.pending_dialogs:
|
||||
return snap.pending_dialogs
|
||||
time.sleep(0.1)
|
||||
return ()
|
||||
|
||||
|
||||
def test_supervisor_start_and_snapshot(chrome_cdp, supervisor_registry):
|
||||
"""Supervisor attaches, exposes an active snapshot with a top frame."""
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor = supervisor_registry.get_or_start(task_id="pytest-1", cdp_url=cdp_url)
|
||||
|
||||
# Navigate so the frame tree populates.
|
||||
_fire_on_page(cdp_url, "/* no dialog */ void 0")
|
||||
|
||||
# Give a moment for frame events to propagate
|
||||
time.sleep(1.0)
|
||||
snap = supervisor.snapshot()
|
||||
assert snap.active is True
|
||||
assert snap.task_id == "pytest-1"
|
||||
assert snap.pending_dialogs == ()
|
||||
# At minimum a top frame should exist after the navigate.
|
||||
assert snap.frame_tree.get("top") is not None
|
||||
|
||||
|
||||
def test_main_frame_alert_detection_and_dismiss(chrome_cdp, supervisor_registry):
|
||||
"""alert() in the main frame surfaces and can be dismissed via the sync API."""
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor = supervisor_registry.get_or_start(task_id="pytest-2", cdp_url=cdp_url)
|
||||
|
||||
_fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-MAIN-ALERT'), 50)")
|
||||
dialogs = _wait_for_dialog(supervisor)
|
||||
assert dialogs, "no dialog detected"
|
||||
d = dialogs[0]
|
||||
assert d.type == "alert"
|
||||
assert "PYTEST-MAIN-ALERT" in d.message
|
||||
|
||||
result = supervisor.respond_to_dialog("dismiss")
|
||||
assert result["ok"] is True
|
||||
# State cleared after dismiss
|
||||
time.sleep(0.3)
|
||||
assert supervisor.snapshot().pending_dialogs == ()
|
||||
|
||||
|
||||
def test_iframe_contentwindow_alert(chrome_cdp, supervisor_registry):
|
||||
"""alert() fired from inside a same-origin iframe surfaces too."""
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor = supervisor_registry.get_or_start(task_id="pytest-3", cdp_url=cdp_url)
|
||||
|
||||
_fire_on_page(
|
||||
cdp_url,
|
||||
"setTimeout(() => document.querySelector('#inner').contentWindow.alert('PYTEST-IFRAME'), 50)",
|
||||
)
|
||||
dialogs = _wait_for_dialog(supervisor)
|
||||
assert dialogs, "no iframe dialog detected"
|
||||
assert any("PYTEST-IFRAME" in d.message for d in dialogs)
|
||||
|
||||
result = supervisor.respond_to_dialog("accept")
|
||||
assert result["ok"] is True
|
||||
|
||||
|
||||
def test_prompt_dialog_with_response_text(chrome_cdp, supervisor_registry):
|
||||
"""prompt() gets our prompt_text back inside the page."""
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor = supervisor_registry.get_or_start(task_id="pytest-4", cdp_url=cdp_url)
|
||||
|
||||
# Fire a prompt and stash the answer on window
|
||||
_fire_on_page(
|
||||
cdp_url,
|
||||
"setTimeout(() => { window.__promptResult = prompt('give me a token', 'default-x'); }, 50)",
|
||||
)
|
||||
dialogs = _wait_for_dialog(supervisor)
|
||||
assert dialogs
|
||||
d = dialogs[0]
|
||||
assert d.type == "prompt"
|
||||
assert d.default_prompt == "default-x"
|
||||
|
||||
result = supervisor.respond_to_dialog("accept", prompt_text="PYTEST-PROMPT-REPLY")
|
||||
assert result["ok"] is True
|
||||
|
||||
|
||||
def test_respond_with_no_pending_dialog_errors_cleanly(chrome_cdp, supervisor_registry):
|
||||
"""Calling respond_to_dialog when nothing is pending returns a clean error, not an exception."""
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor = supervisor_registry.get_or_start(task_id="pytest-5", cdp_url=cdp_url)
|
||||
|
||||
result = supervisor.respond_to_dialog("accept")
|
||||
assert result["ok"] is False
|
||||
assert "no dialog" in result["error"].lower()
|
||||
|
||||
|
||||
def test_auto_dismiss_policy(chrome_cdp, supervisor_registry):
|
||||
"""auto_dismiss policy clears dialogs without the agent responding."""
|
||||
from tools.browser_supervisor import DIALOG_POLICY_AUTO_DISMISS
|
||||
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor = supervisor_registry.get_or_start(
|
||||
task_id="pytest-6",
|
||||
cdp_url=cdp_url,
|
||||
dialog_policy=DIALOG_POLICY_AUTO_DISMISS,
|
||||
)
|
||||
|
||||
_fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-AUTO-DISMISS'), 50)")
|
||||
# Give the supervisor a moment to see + auto-dismiss
|
||||
time.sleep(2.0)
|
||||
snap = supervisor.snapshot()
|
||||
# Nothing pending because auto-dismiss cleared it immediately
|
||||
assert snap.pending_dialogs == ()
|
||||
|
||||
|
||||
def test_registry_idempotent_get_or_start(chrome_cdp, supervisor_registry):
|
||||
"""Calling get_or_start twice with the same (task, url) returns the same instance."""
|
||||
cdp_url, _port = chrome_cdp
|
||||
a = supervisor_registry.get_or_start(task_id="pytest-idem", cdp_url=cdp_url)
|
||||
b = supervisor_registry.get_or_start(task_id="pytest-idem", cdp_url=cdp_url)
|
||||
assert a is b
|
||||
|
||||
|
||||
def test_registry_stop(chrome_cdp, supervisor_registry):
|
||||
"""stop() tears down the supervisor and snapshot reports inactive."""
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor = supervisor_registry.get_or_start(task_id="pytest-stop", cdp_url=cdp_url)
|
||||
assert supervisor.snapshot().active is True
|
||||
supervisor_registry.stop("pytest-stop")
|
||||
# Post-stop snapshot reports inactive; supervisor obj may still exist
|
||||
assert supervisor.snapshot().active is False
|
||||
|
||||
|
||||
def test_browser_dialog_tool_no_supervisor():
|
||||
"""browser_dialog returns a clear error when no supervisor is attached."""
|
||||
from tools.browser_dialog_tool import browser_dialog
|
||||
|
||||
r = json.loads(browser_dialog(action="accept", task_id="nonexistent-task"))
|
||||
assert r["success"] is False
|
||||
assert "No CDP supervisor" in r["error"]
|
||||
|
||||
|
||||
def test_browser_dialog_invalid_action(chrome_cdp, supervisor_registry):
|
||||
"""browser_dialog rejects actions that aren't accept/dismiss."""
|
||||
from tools.browser_dialog_tool import browser_dialog
|
||||
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor_registry.get_or_start(task_id="pytest-bad-action", cdp_url=cdp_url)
|
||||
|
||||
r = json.loads(browser_dialog(action="eat", task_id="pytest-bad-action"))
|
||||
assert r["success"] is False
|
||||
assert "accept" in r["error"] and "dismiss" in r["error"]
|
||||
|
||||
|
||||
def test_recent_dialogs_ring_buffer(chrome_cdp, supervisor_registry):
|
||||
"""Closed dialogs show up in recent_dialogs with a closed_by tag."""
|
||||
from tools.browser_supervisor import DIALOG_POLICY_AUTO_DISMISS
|
||||
|
||||
cdp_url, _port = chrome_cdp
|
||||
sv = supervisor_registry.get_or_start(
|
||||
task_id="pytest-recent",
|
||||
cdp_url=cdp_url,
|
||||
dialog_policy=DIALOG_POLICY_AUTO_DISMISS,
|
||||
)
|
||||
|
||||
_fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-RECENT'), 50)")
|
||||
# Wait for auto-dismiss to cycle the dialog through
|
||||
deadline = time.time() + 5
|
||||
while time.time() < deadline:
|
||||
recent = sv.snapshot().recent_dialogs
|
||||
if recent and any("PYTEST-RECENT" in r.message for r in recent):
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
recent = sv.snapshot().recent_dialogs
|
||||
assert recent, "recent_dialogs should contain the auto-dismissed dialog"
|
||||
match = next((r for r in recent if "PYTEST-RECENT" in r.message), None)
|
||||
assert match is not None
|
||||
assert match.type == "alert"
|
||||
assert match.closed_by == "auto_policy"
|
||||
assert match.closed_at >= match.opened_at
|
||||
|
||||
|
||||
def test_browser_dialog_tool_end_to_end(chrome_cdp, supervisor_registry):
|
||||
"""Full agent-path check: fire an alert, call the tool handler directly."""
|
||||
from tools.browser_dialog_tool import browser_dialog
|
||||
|
||||
cdp_url, _port = chrome_cdp
|
||||
supervisor = supervisor_registry.get_or_start(task_id="pytest-tool", cdp_url=cdp_url)
|
||||
|
||||
_fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-TOOL-END2END'), 50)")
|
||||
assert _wait_for_dialog(supervisor), "no dialog detected via wait_for_dialog"
|
||||
|
||||
r = json.loads(browser_dialog(action="dismiss", task_id="pytest-tool"))
|
||||
assert r["success"] is True
|
||||
assert r["action"] == "dismiss"
|
||||
assert "PYTEST-TOOL-END2END" in r["dialog"]["message"]
|
||||
|
||||
|
||||
def test_browser_cdp_frame_id_routes_via_supervisor(chrome_cdp, supervisor_registry, monkeypatch):
|
||||
"""browser_cdp(frame_id=...) routes Runtime.evaluate through supervisor.
|
||||
|
||||
Mocks the supervisor with a known frame and verifies browser_cdp sends
|
||||
the call via the supervisor's loop rather than opening a stateless
|
||||
WebSocket. This is the path that makes cross-origin iframe eval work
|
||||
on Browserbase.
|
||||
"""
|
||||
cdp_url, _port = chrome_cdp
|
||||
sv = supervisor_registry.get_or_start(task_id="frame-id-test", cdp_url=cdp_url)
|
||||
assert sv.snapshot().active
|
||||
|
||||
# Inject a fake OOPIF frame pointing at the SUPERVISOR's own page session
|
||||
# so we can verify routing. We fake is_oopif=True so the code path
|
||||
# treats it as an OOPIF child.
|
||||
import tools.browser_supervisor as _bs
|
||||
with sv._state_lock:
|
||||
fake_frame_id = "FAKE-FRAME-001"
|
||||
sv._frames[fake_frame_id] = _bs.FrameInfo(
|
||||
frame_id=fake_frame_id,
|
||||
url="fake://",
|
||||
origin="",
|
||||
parent_frame_id=None,
|
||||
is_oopif=True,
|
||||
cdp_session_id=sv._page_session_id, # route at page scope
|
||||
)
|
||||
|
||||
# Route the tool through the supervisor. Should succeed and return
|
||||
# something that clearly came from CDP.
|
||||
from tools.browser_cdp_tool import browser_cdp
|
||||
result = browser_cdp(
|
||||
method="Runtime.evaluate",
|
||||
params={"expression": "1 + 1", "returnByValue": True},
|
||||
frame_id=fake_frame_id,
|
||||
task_id="frame-id-test",
|
||||
)
|
||||
r = json.loads(result)
|
||||
assert r.get("success") is True, f"expected success, got: {r}"
|
||||
assert r.get("frame_id") == fake_frame_id
|
||||
assert r.get("session_id") == sv._page_session_id
|
||||
value = r.get("result", {}).get("result", {}).get("value")
|
||||
assert value == 2, f"expected 2, got {value!r}"
|
||||
|
||||
|
||||
def test_browser_cdp_frame_id_real_oopif_smoke_documented():
|
||||
"""Document that real-OOPIF E2E was manually verified — see PR #14540.
|
||||
|
||||
A pytest version of this hits an asyncio version-quirk in the venv
|
||||
(3.11) that doesn't show up in standalone scripts (3.13 + system
|
||||
websockets). The mechanism IS verified end-to-end by two separate
|
||||
smoke scripts in /tmp/dialog-iframe-test/:
|
||||
|
||||
* smoke_local_oopif.py — local Chrome + 2 http servers on
|
||||
different hostnames + --site-per-process. Outer page on
|
||||
localhost:18905, iframe src=http://127.0.0.1:18906. Calls
|
||||
browser_cdp(method='Runtime.evaluate', frame_id=<OOPIF>) and
|
||||
verifies inner page's title comes back from the OOPIF session.
|
||||
PASSED on 2026-04-23: iframe document.title = 'INNER-FRAME-XYZ'
|
||||
|
||||
* smoke_bb_iframe_agent_path.py — Browserbase + real cross-origin
|
||||
iframe (src=https://example.com/). Same browser_cdp(frame_id=)
|
||||
path. PASSED on 2026-04-23: iframe document.title =
|
||||
'Example Domain'
|
||||
|
||||
The test_browser_cdp_frame_id_routes_via_supervisor pytest covers
|
||||
the supervisor-routing plumbing with a fake injected OOPIF.
|
||||
"""
|
||||
pytest.skip(
|
||||
"Real-OOPIF E2E verified manually with smoke_local_oopif.py and "
|
||||
"smoke_bb_iframe_agent_path.py — pytest version hits an asyncio "
|
||||
"version quirk between venv (3.11) and standalone (3.13). "
|
||||
"Smoke logs preserved in /tmp/dialog-iframe-test/."
|
||||
)
|
||||
|
||||
|
||||
def test_browser_cdp_frame_id_missing_supervisor():
|
||||
"""browser_cdp(frame_id=...) errors cleanly when no supervisor is attached."""
|
||||
from tools.browser_cdp_tool import browser_cdp
|
||||
result = browser_cdp(
|
||||
method="Runtime.evaluate",
|
||||
params={"expression": "1"},
|
||||
frame_id="any-frame-id",
|
||||
task_id="no-such-task",
|
||||
)
|
||||
r = json.loads(result)
|
||||
assert r.get("success") is not True
|
||||
assert "supervisor" in (r.get("error") or "").lower()
|
||||
|
||||
|
||||
def test_browser_cdp_frame_id_not_in_frame_tree(chrome_cdp, supervisor_registry):
|
||||
"""browser_cdp(frame_id=...) errors when the frame_id isn't known."""
|
||||
cdp_url, _port = chrome_cdp
|
||||
sv = supervisor_registry.get_or_start(task_id="bad-frame-test", cdp_url=cdp_url)
|
||||
assert sv.snapshot().active
|
||||
|
||||
from tools.browser_cdp_tool import browser_cdp
|
||||
result = browser_cdp(
|
||||
method="Runtime.evaluate",
|
||||
params={"expression": "1"},
|
||||
frame_id="nonexistent-frame",
|
||||
task_id="bad-frame-test",
|
||||
)
|
||||
r = json.loads(result)
|
||||
assert r.get("success") is not True
|
||||
assert "not found" in (r.get("error") or "").lower()
|
||||
|
||||
|
||||
def test_bridge_captures_prompt_and_returns_reply_text(chrome_cdp, supervisor_registry):
|
||||
"""End-to-end: agent's prompt_text round-trips INTO the page's JS.
|
||||
|
||||
Proves the bridge isn't just catching dialogs — it's properly round-
|
||||
tripping our reply back into the page via Fetch.fulfillRequest, so
|
||||
``prompt()`` actually returns the agent-supplied string to the page.
|
||||
"""
|
||||
import base64 as _b64
|
||||
|
||||
cdp_url, _port = chrome_cdp
|
||||
sv = supervisor_registry.get_or_start(task_id="pytest-bridge-prompt", cdp_url=cdp_url)
|
||||
|
||||
# Page fires prompt and stashes the return value on window.
|
||||
html = """<!doctype html><html><body><script>
|
||||
window.__ret = null;
|
||||
setTimeout(() => { window.__ret = prompt('PROMPT-MSG', 'default'); }, 50);
|
||||
</script></body></html>"""
|
||||
url = "data:text/html;base64," + _b64.b64encode(html.encode()).decode()
|
||||
|
||||
import asyncio as _asyncio
|
||||
import websockets as _ws_mod
|
||||
|
||||
async def nav_and_read():
|
||||
async with _ws_mod.connect(cdp_url, max_size=50 * 1024 * 1024) as ws:
|
||||
nid = [1]
|
||||
pending: dict = {}
|
||||
|
||||
async def reader_fn():
|
||||
try:
|
||||
async for raw in ws:
|
||||
m = json.loads(raw)
|
||||
if "id" in m:
|
||||
fut = pending.pop(m["id"], None)
|
||||
if fut and not fut.done():
|
||||
fut.set_result(m)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
rd = _asyncio.create_task(reader_fn())
|
||||
|
||||
async def call(method, params=None, sid=None):
|
||||
c = nid[0]; nid[0] += 1
|
||||
p = {"id": c, "method": method}
|
||||
if params: p["params"] = params
|
||||
if sid: p["sessionId"] = sid
|
||||
fut = _asyncio.get_event_loop().create_future()
|
||||
pending[c] = fut
|
||||
await ws.send(json.dumps(p))
|
||||
return await _asyncio.wait_for(fut, timeout=20)
|
||||
|
||||
try:
|
||||
t = (await call("Target.getTargets"))["result"]["targetInfos"]
|
||||
pg = next(x for x in t if x.get("type") == "page")
|
||||
a = await call("Target.attachToTarget", {"targetId": pg["targetId"], "flatten": True})
|
||||
sid = a["result"]["sessionId"]
|
||||
|
||||
# Fire navigate but don't await — prompt() blocks the page
|
||||
nav_id = nid[0]; nid[0] += 1
|
||||
nav_fut = _asyncio.get_event_loop().create_future()
|
||||
pending[nav_id] = nav_fut
|
||||
await ws.send(json.dumps({"id": nav_id, "method": "Page.navigate", "params": {"url": url}, "sessionId": sid}))
|
||||
|
||||
# Wait for supervisor to see the prompt
|
||||
deadline = time.monotonic() + 10
|
||||
dialog = None
|
||||
while time.monotonic() < deadline:
|
||||
snap = sv.snapshot()
|
||||
if snap.pending_dialogs:
|
||||
dialog = snap.pending_dialogs[0]
|
||||
break
|
||||
await _asyncio.sleep(0.05)
|
||||
assert dialog is not None, "no dialog captured"
|
||||
assert dialog.bridge_request_id is not None, "expected bridge path"
|
||||
assert dialog.type == "prompt"
|
||||
|
||||
# Agent responds
|
||||
resp = sv.respond_to_dialog("accept", prompt_text="AGENT-SUPPLIED-REPLY")
|
||||
assert resp["ok"] is True
|
||||
|
||||
# Wait for nav to complete + read back
|
||||
try:
|
||||
await _asyncio.wait_for(nav_fut, timeout=10)
|
||||
except Exception:
|
||||
pass
|
||||
await _asyncio.sleep(0.5)
|
||||
r = await call(
|
||||
"Runtime.evaluate",
|
||||
{"expression": "window.__ret", "returnByValue": True},
|
||||
sid=sid,
|
||||
)
|
||||
return r.get("result", {}).get("result", {}).get("value")
|
||||
finally:
|
||||
rd.cancel()
|
||||
try: await rd
|
||||
except BaseException: pass
|
||||
|
||||
value = asyncio.run(nav_and_read())
|
||||
assert value == "AGENT-SUPPLIED-REPLY", f"expected AGENT-SUPPLIED-REPLY, got {value!r}"
|
||||
@@ -69,7 +69,10 @@ class TestDelegateRequirements(unittest.TestCase):
|
||||
self.assertIn("tasks", props)
|
||||
self.assertIn("context", props)
|
||||
self.assertIn("toolsets", props)
|
||||
self.assertIn("max_iterations", props)
|
||||
# max_iterations is intentionally NOT exposed to the model — it's
|
||||
# config-authoritative via delegation.max_iterations so users get
|
||||
# predictable budgets.
|
||||
self.assertNotIn("max_iterations", props)
|
||||
self.assertNotIn("maxItems", props["tasks"]) # removed — limit is now runtime-configurable
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,8 @@ import pytest
|
||||
from tools.discord_tool import (
|
||||
DiscordAPIError,
|
||||
_ACTIONS,
|
||||
_ADMIN_ACTIONS,
|
||||
_CORE_ACTIONS,
|
||||
_available_actions,
|
||||
_build_schema,
|
||||
_channel_type_name,
|
||||
@@ -21,8 +23,11 @@ from tools.discord_tool import (
|
||||
_load_allowed_actions_config,
|
||||
_reset_capability_cache,
|
||||
check_discord_tool_requirements,
|
||||
discord_server,
|
||||
discord_admin_handler,
|
||||
discord_core,
|
||||
get_dynamic_schema,
|
||||
get_dynamic_schema_admin,
|
||||
get_dynamic_schema_core,
|
||||
)
|
||||
|
||||
|
||||
@@ -147,32 +152,32 @@ class TestDiscordRequest:
|
||||
class TestDiscordServerValidation:
|
||||
def test_no_token(self, monkeypatch):
|
||||
monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "error" in result
|
||||
assert "DISCORD_BOT_TOKEN" in result["error"]
|
||||
|
||||
def test_unknown_action(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
result = json.loads(discord_server(action="bad_action"))
|
||||
result = json.loads(discord_core(action="bad_action"))
|
||||
assert "error" in result
|
||||
assert "Unknown action" in result["error"]
|
||||
assert "available_actions" in result
|
||||
|
||||
def test_missing_required_guild_id(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
result = json.loads(discord_server(action="list_channels"))
|
||||
result = json.loads(discord_admin_handler(action="list_channels"))
|
||||
assert "error" in result
|
||||
assert "guild_id" in result["error"]
|
||||
|
||||
def test_missing_required_channel_id(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
result = json.loads(discord_server(action="fetch_messages"))
|
||||
result = json.loads(discord_core(action="fetch_messages"))
|
||||
assert "error" in result
|
||||
assert "channel_id" in result["error"]
|
||||
|
||||
def test_missing_multiple_params(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
result = json.loads(discord_server(action="add_role"))
|
||||
result = json.loads(discord_admin_handler(action="add_role"))
|
||||
assert "error" in result
|
||||
assert "guild_id" in result["error"]
|
||||
assert "user_id" in result["error"]
|
||||
@@ -191,7 +196,7 @@ class TestListGuilds:
|
||||
{"id": "111", "name": "Test Server", "icon": "abc", "owner": True, "permissions": "123"},
|
||||
{"id": "222", "name": "Other Server", "icon": None, "owner": False, "permissions": "456"},
|
||||
]
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert result["count"] == 2
|
||||
assert result["guilds"][0]["name"] == "Test Server"
|
||||
assert result["guilds"][1]["id"] == "222"
|
||||
@@ -219,7 +224,7 @@ class TestServerInfo:
|
||||
"premium_subscription_count": 5,
|
||||
"verification_level": 1,
|
||||
}
|
||||
result = json.loads(discord_server(action="server_info", guild_id="111"))
|
||||
result = json.loads(discord_admin_handler(action="server_info", guild_id="111"))
|
||||
assert result["name"] == "My Server"
|
||||
assert result["member_count"] == 42
|
||||
assert result["online_count"] == 10
|
||||
@@ -242,7 +247,7 @@ class TestListChannels:
|
||||
{"id": "12", "name": "voice", "type": 2, "position": 1, "parent_id": "10", "topic": None, "nsfw": False},
|
||||
{"id": "13", "name": "no-category", "type": 0, "position": 0, "parent_id": None, "topic": None, "nsfw": False},
|
||||
]
|
||||
result = json.loads(discord_server(action="list_channels", guild_id="111"))
|
||||
result = json.loads(discord_admin_handler(action="list_channels", guild_id="111"))
|
||||
assert result["total_channels"] == 3 # excludes the category itself
|
||||
groups = result["channel_groups"]
|
||||
# Uncategorized first
|
||||
@@ -257,7 +262,7 @@ class TestListChannels:
|
||||
def test_empty_guild(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = []
|
||||
result = json.loads(discord_server(action="list_channels", guild_id="111"))
|
||||
result = json.loads(discord_admin_handler(action="list_channels", guild_id="111"))
|
||||
assert result["total_channels"] == 0
|
||||
|
||||
|
||||
@@ -274,7 +279,7 @@ class TestChannelInfo:
|
||||
"topic": "Welcome!", "nsfw": False, "position": 0,
|
||||
"parent_id": "10", "rate_limit_per_user": 0, "last_message_id": "999",
|
||||
}
|
||||
result = json.loads(discord_server(action="channel_info", channel_id="11"))
|
||||
result = json.loads(discord_admin_handler(action="channel_info", channel_id="11"))
|
||||
assert result["name"] == "general"
|
||||
assert result["type"] == "text"
|
||||
assert result["guild_id"] == "111"
|
||||
@@ -293,7 +298,7 @@ class TestListRoles:
|
||||
{"id": "2", "name": "Admin", "position": 2, "color": 16711680, "mentionable": True, "managed": False, "hoist": True},
|
||||
{"id": "3", "name": "Mod", "position": 1, "color": 255, "mentionable": True, "managed": False, "hoist": True},
|
||||
]
|
||||
result = json.loads(discord_server(action="list_roles", guild_id="111"))
|
||||
result = json.loads(discord_admin_handler(action="list_roles", guild_id="111"))
|
||||
assert result["count"] == 3
|
||||
# Should be sorted by position descending
|
||||
assert result["roles"][0]["name"] == "Admin"
|
||||
@@ -317,7 +322,7 @@ class TestMemberInfo:
|
||||
"joined_at": "2024-01-01T00:00:00Z",
|
||||
"premium_since": None,
|
||||
}
|
||||
result = json.loads(discord_server(action="member_info", guild_id="111", user_id="42"))
|
||||
result = json.loads(discord_admin_handler(action="member_info", guild_id="111", user_id="42"))
|
||||
assert result["username"] == "testuser"
|
||||
assert result["nickname"] == "Testy"
|
||||
assert result["roles"] == ["2", "3"]
|
||||
@@ -334,7 +339,7 @@ class TestSearchMembers:
|
||||
mock_req.return_value = [
|
||||
{"user": {"id": "42", "username": "testuser", "global_name": "Test", "bot": False}, "nick": None, "roles": []},
|
||||
]
|
||||
result = json.loads(discord_server(action="search_members", guild_id="111", query="test"))
|
||||
result = json.loads(discord_core(action="search_members", guild_id="111", query="test"))
|
||||
assert result["count"] == 1
|
||||
assert result["members"][0]["username"] == "testuser"
|
||||
mock_req.assert_called_once_with(
|
||||
@@ -346,7 +351,7 @@ class TestSearchMembers:
|
||||
def test_search_members_limit_capped(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = []
|
||||
discord_server(action="search_members", guild_id="111", query="x", limit=200)
|
||||
discord_core(action="search_members", guild_id="111", query="x", limit=200)
|
||||
call_params = mock_req.call_args[1]["params"]
|
||||
assert call_params["limit"] == "100" # Capped at 100
|
||||
|
||||
@@ -370,7 +375,7 @@ class TestFetchMessages:
|
||||
"pinned": False,
|
||||
},
|
||||
]
|
||||
result = json.loads(discord_server(action="fetch_messages", channel_id="11"))
|
||||
result = json.loads(discord_core(action="fetch_messages", channel_id="11"))
|
||||
assert result["count"] == 1
|
||||
assert result["messages"][0]["content"] == "Hello world"
|
||||
assert result["messages"][0]["author"]["username"] == "user1"
|
||||
@@ -379,7 +384,7 @@ class TestFetchMessages:
|
||||
def test_fetch_messages_with_pagination(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = []
|
||||
discord_server(action="fetch_messages", channel_id="11", before="999", limit=10)
|
||||
discord_core(action="fetch_messages", channel_id="11", before="999", limit=10)
|
||||
call_params = mock_req.call_args[1]["params"]
|
||||
assert call_params["before"] == "999"
|
||||
assert call_params["limit"] == "10"
|
||||
@@ -396,7 +401,7 @@ class TestListPins:
|
||||
mock_req.return_value = [
|
||||
{"id": "500", "content": "Important announcement", "author": {"username": "admin"}, "timestamp": "2024-01-01T00:00:00Z"},
|
||||
]
|
||||
result = json.loads(discord_server(action="list_pins", channel_id="11"))
|
||||
result = json.loads(discord_admin_handler(action="list_pins", channel_id="11"))
|
||||
assert result["count"] == 1
|
||||
assert result["pinned_messages"][0]["content"] == "Important announcement"
|
||||
|
||||
@@ -410,7 +415,7 @@ class TestPinUnpin:
|
||||
def test_pin_message(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = None # 204
|
||||
result = json.loads(discord_server(action="pin_message", channel_id="11", message_id="500"))
|
||||
result = json.loads(discord_admin_handler(action="pin_message", channel_id="11", message_id="500"))
|
||||
assert result["success"] is True
|
||||
mock_req.assert_called_once_with("PUT", "/channels/11/pins/500", "test-token")
|
||||
|
||||
@@ -418,7 +423,7 @@ class TestPinUnpin:
|
||||
def test_unpin_message(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = None
|
||||
result = json.loads(discord_server(action="unpin_message", channel_id="11", message_id="500"))
|
||||
result = json.loads(discord_admin_handler(action="unpin_message", channel_id="11", message_id="500"))
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
@@ -431,7 +436,7 @@ class TestCreateThread:
|
||||
def test_create_standalone_thread(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = {"id": "800", "name": "New Thread"}
|
||||
result = json.loads(discord_server(action="create_thread", channel_id="11", name="New Thread"))
|
||||
result = json.loads(discord_core(action="create_thread", channel_id="11", name="New Thread"))
|
||||
assert result["success"] is True
|
||||
assert result["thread_id"] == "800"
|
||||
# Verify the API call
|
||||
@@ -444,7 +449,7 @@ class TestCreateThread:
|
||||
def test_create_thread_from_message(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = {"id": "801", "name": "Discussion"}
|
||||
result = json.loads(discord_server(
|
||||
result = json.loads(discord_core(
|
||||
action="create_thread", channel_id="11", name="Discussion", message_id="1001",
|
||||
))
|
||||
assert result["success"] is True
|
||||
@@ -463,7 +468,7 @@ class TestRoleManagement:
|
||||
def test_add_role(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = None
|
||||
result = json.loads(discord_server(
|
||||
result = json.loads(discord_admin_handler(
|
||||
action="add_role", guild_id="111", user_id="42", role_id="2",
|
||||
))
|
||||
assert result["success"] is True
|
||||
@@ -475,7 +480,7 @@ class TestRoleManagement:
|
||||
def test_remove_role(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = None
|
||||
result = json.loads(discord_server(
|
||||
result = json.loads(discord_admin_handler(
|
||||
action="remove_role", guild_id="111", user_id="42", role_id="2",
|
||||
))
|
||||
assert result["success"] is True
|
||||
@@ -490,15 +495,23 @@ class TestErrorHandling:
|
||||
def test_api_error_handled(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.side_effect = DiscordAPIError(403, '{"message": "Missing Access"}')
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "error" in result
|
||||
assert "403" in result["error"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_unexpected_error_handled(self, mock_req, monkeypatch):
|
||||
def test_unexpected_error_handled_admin(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.side_effect = RuntimeError("something broke")
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "error" in result
|
||||
assert "something broke" in result["error"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_unexpected_error_handled_core(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.side_effect = RuntimeError("something broke")
|
||||
result = json.loads(discord_core(action="fetch_messages", channel_id="11"))
|
||||
assert "error" in result
|
||||
assert "something broke" in result["error"]
|
||||
|
||||
@@ -508,79 +521,109 @@ class TestErrorHandling:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRegistration:
|
||||
def test_tool_registered(self):
|
||||
def test_core_tool_registered(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools.get("discord_server")
|
||||
entry = registry._tools.get("discord")
|
||||
assert entry is not None
|
||||
assert entry.schema["name"] == "discord_server"
|
||||
assert entry.schema["name"] == "discord"
|
||||
assert entry.toolset == "discord"
|
||||
assert entry.check_fn is not None
|
||||
assert entry.requires_env == ["DISCORD_BOT_TOKEN"]
|
||||
|
||||
def test_schema_actions(self):
|
||||
"""Static schema should list all actions (the model_tools post-processing
|
||||
narrows this per-session; static registration is the superset)."""
|
||||
def test_admin_tool_registered(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_server"]
|
||||
actions = entry.schema["parameters"]["properties"]["action"]["enum"]
|
||||
expected = [
|
||||
"list_guilds", "server_info", "list_channels", "channel_info",
|
||||
"list_roles", "member_info", "search_members", "fetch_messages",
|
||||
"list_pins", "pin_message", "unpin_message", "create_thread",
|
||||
"add_role", "remove_role",
|
||||
]
|
||||
assert set(actions) == set(expected)
|
||||
assert set(_ACTIONS.keys()) == set(expected)
|
||||
entry = registry._tools.get("discord_admin")
|
||||
assert entry is not None
|
||||
assert entry.schema["name"] == "discord_admin"
|
||||
assert entry.toolset == "discord_admin"
|
||||
assert entry.check_fn is not None
|
||||
assert entry.requires_env == ["DISCORD_BOT_TOKEN"]
|
||||
|
||||
def test_core_schema_actions(self):
|
||||
"""Core static schema should list only core actions."""
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord"]
|
||||
actions = set(entry.schema["parameters"]["properties"]["action"]["enum"])
|
||||
assert actions == {"fetch_messages", "search_members", "create_thread"}
|
||||
|
||||
def test_admin_schema_actions(self):
|
||||
"""Admin static schema should list only admin actions."""
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_admin"]
|
||||
actions = set(entry.schema["parameters"]["properties"]["action"]["enum"])
|
||||
expected_admin = set(_ACTIONS.keys()) - {"fetch_messages", "search_members", "create_thread"}
|
||||
assert actions == expected_admin
|
||||
|
||||
def test_all_actions_covered(self):
|
||||
"""Core + admin actions should cover all known actions."""
|
||||
assert set(_CORE_ACTIONS.keys()) | set(_ADMIN_ACTIONS.keys()) == set(_ACTIONS.keys())
|
||||
assert set(_CORE_ACTIONS.keys()) & set(_ADMIN_ACTIONS.keys()) == set()
|
||||
|
||||
def test_schema_parameter_bounds(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_server"]
|
||||
entry = registry._tools["discord"]
|
||||
props = entry.schema["parameters"]["properties"]
|
||||
assert props["limit"]["minimum"] == 1
|
||||
assert props["limit"]["maximum"] == 100
|
||||
assert props["auto_archive_duration"]["enum"] == [60, 1440, 4320, 10080]
|
||||
|
||||
def test_schema_description_is_action_manifest(self):
|
||||
"""The top-level description should include the action manifest
|
||||
(one-line signatures per action) so the model can find required
|
||||
params without re-reading every parameter description."""
|
||||
def test_core_schema_description(self):
|
||||
"""Core schema description should mention core actions."""
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_server"]
|
||||
entry = registry._tools["discord"]
|
||||
desc = entry.schema["description"]
|
||||
# Spot-check a few entries
|
||||
assert "list_guilds()" in desc
|
||||
assert "fetch_messages(channel_id)" in desc
|
||||
assert "search_members(guild_id, query)" in desc
|
||||
assert "create_thread(channel_id, name)" in desc
|
||||
# Admin actions should NOT be in core description
|
||||
assert "list_guilds()" not in desc
|
||||
assert "add_role(" not in desc
|
||||
|
||||
def test_admin_schema_description(self):
|
||||
"""Admin schema description should mention admin actions."""
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_admin"]
|
||||
desc = entry.schema["description"]
|
||||
assert "list_guilds()" in desc
|
||||
assert "add_role(guild_id, user_id, role_id)" in desc
|
||||
# Core actions should NOT be in admin description
|
||||
assert "fetch_messages(" not in desc
|
||||
assert "create_thread(" not in desc
|
||||
|
||||
def test_handler_callable(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_server"]
|
||||
entry = registry._tools["discord"]
|
||||
assert callable(entry.handler)
|
||||
entry_admin = registry._tools["discord_admin"]
|
||||
assert callable(entry_admin.handler)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Toolset: discord_server only in hermes-discord
|
||||
# Toolset: discord / discord_admin only in hermes-discord
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestToolsetInclusion:
|
||||
def test_discord_server_in_hermes_discord_toolset(self):
|
||||
def test_discord_tools_in_hermes_discord_toolset(self):
|
||||
from toolsets import TOOLSETS
|
||||
assert "discord_server" in TOOLSETS["hermes-discord"]["tools"]
|
||||
assert "discord" in TOOLSETS["hermes-discord"]["tools"]
|
||||
assert "discord_admin" in TOOLSETS["hermes-discord"]["tools"]
|
||||
|
||||
def test_discord_server_not_in_core_tools(self):
|
||||
def test_discord_tools_not_in_core_tools(self):
|
||||
from toolsets import _HERMES_CORE_TOOLS
|
||||
assert "discord_server" not in _HERMES_CORE_TOOLS
|
||||
assert "discord" not in _HERMES_CORE_TOOLS
|
||||
assert "discord_admin" not in _HERMES_CORE_TOOLS
|
||||
|
||||
def test_discord_server_not_in_other_toolsets(self):
|
||||
def test_discord_tools_not_in_other_toolsets(self):
|
||||
from toolsets import TOOLSETS
|
||||
for name, ts in TOOLSETS.items():
|
||||
if name == "hermes-discord":
|
||||
if name in ("hermes-discord", "hermes-gateway", "discord", "discord_admin"):
|
||||
continue
|
||||
# The gateway toolset might include it if it unions all platform tools
|
||||
if name == "hermes-gateway":
|
||||
continue
|
||||
assert "discord_server" not in ts.get("tools", []), (
|
||||
f"discord_server should not be in toolset '{name}'"
|
||||
tools = ts.get("tools", [])
|
||||
assert "discord" not in tools or name == "discord", (
|
||||
f"discord tool should not be in toolset '{name}'"
|
||||
)
|
||||
assert "discord_admin" not in tools or name == "discord_admin", (
|
||||
f"discord_admin tool should not be in toolset '{name}'"
|
||||
)
|
||||
|
||||
|
||||
@@ -798,40 +841,69 @@ class TestDynamicSchema:
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_no_token_returns_none(self, mock_req, monkeypatch):
|
||||
monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
|
||||
assert get_dynamic_schema() is None
|
||||
assert get_dynamic_schema_core() is None
|
||||
assert get_dynamic_schema_admin() is None
|
||||
mock_req.assert_not_called()
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_full_intents_full_schema(self, mock_req, monkeypatch):
|
||||
def test_full_intents_core_schema(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
schema = get_dynamic_schema()
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert set(actions) == set(_ACTIONS.keys())
|
||||
# No content warning
|
||||
schema = get_dynamic_schema_core()
|
||||
actions = set(schema["parameters"]["properties"]["action"]["enum"])
|
||||
assert actions == set(_CORE_ACTIONS.keys())
|
||||
assert schema["name"] == "discord"
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_full_intents_admin_schema(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
schema = get_dynamic_schema_admin()
|
||||
actions = set(schema["parameters"]["properties"]["action"]["enum"])
|
||||
assert actions == set(_ADMIN_ACTIONS.keys())
|
||||
assert schema["name"] == "discord_admin"
|
||||
# No content warning when MESSAGE_CONTENT is enabled
|
||||
assert "MESSAGE_CONTENT" not in schema["description"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_no_members_intent_removes_member_actions_from_schema(
|
||||
def test_no_members_intent_removes_member_actions_from_admin_schema(
|
||||
self, mock_req, monkeypatch,
|
||||
):
|
||||
"""member_info is an admin action; it should be hidden when
|
||||
GUILD_MEMBERS intent is missing."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": 1 << 18} # only MESSAGE_CONTENT
|
||||
schema = get_dynamic_schema()
|
||||
schema = get_dynamic_schema_admin()
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert "member_info" not in actions
|
||||
assert "member_info" not in schema["description"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_no_members_intent_hides_search_members_from_core(
|
||||
self, mock_req, monkeypatch,
|
||||
):
|
||||
"""search_members is a core action gated by GUILD_MEMBERS intent."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": 1 << 18} # only MESSAGE_CONTENT
|
||||
schema = get_dynamic_schema_core()
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert "search_members" not in actions
|
||||
assert "member_info" not in actions
|
||||
# Manifest description should also not advertise them
|
||||
assert "search_members" not in schema["description"]
|
||||
assert "member_info" not in schema["description"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_no_message_content_adds_warning_note(self, mock_req, monkeypatch):
|
||||
@@ -841,41 +913,53 @@ class TestDynamicSchema:
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": 1 << 14} # only GUILD_MEMBERS
|
||||
schema = get_dynamic_schema()
|
||||
schema = get_dynamic_schema_core()
|
||||
assert "MESSAGE_CONTENT" in schema["description"]
|
||||
# But fetch_messages is still available
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert "fetch_messages" in actions
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_config_allowlist_narrows_schema(self, mock_req, monkeypatch):
|
||||
def test_config_allowlist_narrows_admin_schema(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": "list_guilds,list_channels"}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
schema = get_dynamic_schema()
|
||||
schema = get_dynamic_schema_admin()
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert actions == ["list_guilds", "list_channels"]
|
||||
# Manifest description should only show allowed ones (check for
|
||||
# the signature marker, which is specific to manifest lines)
|
||||
assert "list_guilds()" in schema["description"]
|
||||
assert "add_role(" not in schema["description"]
|
||||
assert "create_thread(" not in schema["description"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_empty_allowlist_with_valid_values_hides_tool(self, mock_req, monkeypatch):
|
||||
def test_empty_allowlist_with_valid_values_hides_tools(self, mock_req, monkeypatch):
|
||||
"""If the allowlist resolves to zero valid actions (e.g. all names
|
||||
were typos), get_dynamic_schema returns None so the tool is dropped
|
||||
entirely rather than showing an empty enum."""
|
||||
were typos), get_dynamic_schema returns None so the tool is dropped."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": "typo_one,typo_two"}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
assert get_dynamic_schema() is None
|
||||
assert get_dynamic_schema_core() is None
|
||||
assert get_dynamic_schema_admin() is None
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_backward_compat_wrapper(self, mock_req, monkeypatch):
|
||||
"""get_dynamic_schema() should delegate to get_dynamic_schema_core()."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
schema = get_dynamic_schema()
|
||||
assert schema is not None
|
||||
assert schema["name"] == "discord"
|
||||
actions = set(schema["parameters"]["properties"]["action"]["enum"])
|
||||
assert actions == set(_CORE_ACTIONS.keys())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -890,7 +974,7 @@ class TestRuntimeAllowlistEnforcement:
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": "list_guilds"}},
|
||||
)
|
||||
result = json.loads(discord_server(action="add_role", guild_id="1", user_id="2", role_id="3"))
|
||||
result = json.loads(discord_admin_handler(action="add_role", guild_id="1", user_id="2", role_id="3"))
|
||||
assert "error" in result
|
||||
assert "disabled by config" in result["error"]
|
||||
mock_req.assert_not_called()
|
||||
@@ -903,7 +987,7 @@ class TestRuntimeAllowlistEnforcement:
|
||||
lambda: {"discord": {"server_actions": "list_guilds"}},
|
||||
)
|
||||
mock_req.return_value = []
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "guilds" in result
|
||||
|
||||
|
||||
@@ -930,7 +1014,7 @@ class Test403Enrichment:
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.side_effect = DiscordAPIError(403, '{"message":"Missing Permissions"}')
|
||||
result = json.loads(discord_server(
|
||||
result = json.loads(discord_admin_handler(
|
||||
action="add_role", guild_id="1", user_id="2", role_id="3",
|
||||
))
|
||||
assert "error" in result
|
||||
@@ -944,7 +1028,7 @@ class Test403Enrichment:
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.side_effect = DiscordAPIError(500, "server error")
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "500" in result["error"]
|
||||
assert "MANAGE_ROLES" not in result["error"]
|
||||
|
||||
@@ -961,10 +1045,10 @@ class TestModelToolsIntegration:
|
||||
_reset_capability_cache()
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_discord_server_schema_rebuilt_by_get_tool_definitions(
|
||||
def test_discord_admin_schema_rebuilt_by_get_tool_definitions(
|
||||
self, mock_req, monkeypatch,
|
||||
):
|
||||
"""When model_tools.get_tool_definitions runs with discord_server
|
||||
"""When model_tools.get_tool_definitions runs with discord_admin
|
||||
available, it should replace the static schema with the dynamic one."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
@@ -976,16 +1060,16 @@ class TestModelToolsIntegration:
|
||||
|
||||
from model_tools import get_tool_definitions
|
||||
tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
|
||||
discord_tool = next(
|
||||
(t for t in tools if t.get("function", {}).get("name") == "discord_server"),
|
||||
discord_admin_tool = next(
|
||||
(t for t in tools if t.get("function", {}).get("name") == "discord_admin"),
|
||||
None,
|
||||
)
|
||||
assert discord_tool is not None, "discord_server should be in the schema"
|
||||
actions = discord_tool["function"]["parameters"]["properties"]["action"]["enum"]
|
||||
assert discord_admin_tool is not None, "discord_admin should be in the schema"
|
||||
actions = discord_admin_tool["function"]["parameters"]["properties"]["action"]["enum"]
|
||||
assert actions == ["list_guilds", "server_info"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_discord_server_dropped_when_allowlist_empties_it(
|
||||
def test_discord_tools_dropped_when_allowlist_empties_them(
|
||||
self, mock_req, monkeypatch,
|
||||
):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
@@ -998,4 +1082,6 @@ class TestModelToolsIntegration:
|
||||
from model_tools import get_tool_definitions
|
||||
tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
|
||||
names = [t.get("function", {}).get("name") for t in tools]
|
||||
assert "discord" not in names
|
||||
assert "discord_admin" not in names
|
||||
assert "discord_server" not in names
|
||||
|
||||
@@ -13,8 +13,10 @@ import os
|
||||
import tempfile
|
||||
import time
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from tools import file_state
|
||||
from tools.file_tools import (
|
||||
read_file_tool,
|
||||
write_file_tool,
|
||||
@@ -76,6 +78,7 @@ class TestStalenessCheck(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
_read_tracker.clear()
|
||||
file_state.get_registry().clear()
|
||||
self._tmpdir = tempfile.mkdtemp()
|
||||
self._tmpfile = os.path.join(self._tmpdir, "stale_test.txt")
|
||||
with open(self._tmpfile, "w") as f:
|
||||
@@ -83,6 +86,7 @@ class TestStalenessCheck(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
_read_tracker.clear()
|
||||
file_state.get_registry().clear()
|
||||
try:
|
||||
os.unlink(self._tmpfile)
|
||||
os.rmdir(self._tmpdir)
|
||||
@@ -145,6 +149,53 @@ class TestStalenessCheck(unittest.TestCase):
|
||||
result = json.loads(write_file_tool(self._tmpfile, "new", task_id="task_b"))
|
||||
self.assertNotIn("_warning", result)
|
||||
|
||||
@patch("tools.file_tools._get_file_ops")
|
||||
def test_relative_path_uses_live_cwd_for_staleness_tracking(self, mock_ops):
|
||||
"""Relative-path stale tracking must follow the live terminal cwd."""
|
||||
start_dir = os.path.join(self._tmpdir, "start")
|
||||
live_dir = os.path.join(self._tmpdir, "worktree")
|
||||
os.makedirs(start_dir, exist_ok=True)
|
||||
os.makedirs(live_dir, exist_ok=True)
|
||||
|
||||
start_file = os.path.join(start_dir, "shared.txt")
|
||||
live_file = os.path.join(live_dir, "shared.txt")
|
||||
with open(start_file, "w") as f:
|
||||
f.write("start copy\n")
|
||||
with open(live_file, "w") as f:
|
||||
f.write("live copy\n")
|
||||
|
||||
fake_ops = _make_fake_ops("live copy\n", 10)
|
||||
fake_ops.env = SimpleNamespace(cwd=live_dir)
|
||||
fake_ops.cwd = start_dir
|
||||
mock_ops.return_value = fake_ops
|
||||
|
||||
from tools import file_tools
|
||||
|
||||
with file_tools._file_ops_lock:
|
||||
previous = file_tools._file_ops_cache.get("live_task")
|
||||
file_tools._file_ops_cache["live_task"] = fake_ops
|
||||
|
||||
try:
|
||||
with patch.dict(os.environ, {"TERMINAL_CWD": start_dir}, clear=False):
|
||||
read_file_tool("shared.txt", task_id="live_task")
|
||||
|
||||
time.sleep(0.05)
|
||||
with open(live_file, "w") as f:
|
||||
f.write("live copy modified elsewhere\n")
|
||||
|
||||
result = json.loads(
|
||||
write_file_tool("shared.txt", "replacement", task_id="live_task")
|
||||
)
|
||||
finally:
|
||||
with file_tools._file_ops_lock:
|
||||
if previous is None:
|
||||
file_tools._file_ops_cache.pop("live_task", None)
|
||||
else:
|
||||
file_tools._file_ops_cache["live_task"] = previous
|
||||
|
||||
self.assertIn("_warning", result)
|
||||
self.assertIn("modified since you last read", result["_warning"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Staleness in patch
|
||||
@@ -154,6 +205,7 @@ class TestPatchStaleness(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
_read_tracker.clear()
|
||||
file_state.get_registry().clear()
|
||||
self._tmpdir = tempfile.mkdtemp()
|
||||
self._tmpfile = os.path.join(self._tmpdir, "patch_test.txt")
|
||||
with open(self._tmpfile, "w") as f:
|
||||
@@ -161,6 +213,7 @@ class TestPatchStaleness(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
_read_tracker.clear()
|
||||
file_state.get_registry().clear()
|
||||
try:
|
||||
os.unlink(self._tmpfile)
|
||||
os.rmdir(self._tmpdir)
|
||||
@@ -207,9 +260,11 @@ class TestCheckFileStalenessHelper(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
_read_tracker.clear()
|
||||
file_state.get_registry().clear()
|
||||
|
||||
def tearDown(self):
|
||||
_read_tracker.clear()
|
||||
file_state.get_registry().clear()
|
||||
|
||||
def test_returns_none_for_unknown_task(self):
|
||||
self.assertIsNone(_check_file_staleness("/tmp/x.py", "nonexistent"))
|
||||
|
||||
@@ -77,7 +77,7 @@ class TestStdioPidTracking:
|
||||
from tools.mcp_tool import _stdio_pids, _lock
|
||||
with _lock:
|
||||
# Might have residual state from other tests, just check type
|
||||
assert isinstance(_stdio_pids, set)
|
||||
assert isinstance(_stdio_pids, dict)
|
||||
|
||||
def test_kill_orphaned_noop_when_empty(self):
|
||||
"""_kill_orphaned_mcp_children does nothing when no PIDs tracked."""
|
||||
@@ -96,7 +96,7 @@ class TestStdioPidTracking:
|
||||
# Use a PID that definitely doesn't exist
|
||||
fake_pid = 999999999
|
||||
with _lock:
|
||||
_stdio_pids.add(fake_pid)
|
||||
_stdio_pids[fake_pid] = "test"
|
||||
|
||||
# Should not raise (ProcessLookupError is caught)
|
||||
_kill_orphaned_mcp_children()
|
||||
@@ -105,40 +105,49 @@ class TestStdioPidTracking:
|
||||
assert fake_pid not in _stdio_pids
|
||||
|
||||
def test_kill_orphaned_uses_sigkill_when_available(self, monkeypatch):
|
||||
"""Unix-like platforms should keep using SIGKILL for orphan cleanup."""
|
||||
"""SIGTERM-first then SIGKILL after 2s for orphan cleanup."""
|
||||
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
|
||||
|
||||
fake_pid = 424242
|
||||
with _lock:
|
||||
_stdio_pids.clear()
|
||||
_stdio_pids.add(fake_pid)
|
||||
_stdio_pids[fake_pid] = "test"
|
||||
|
||||
fake_sigkill = 9
|
||||
monkeypatch.setattr(signal, "SIGKILL", fake_sigkill, raising=False)
|
||||
|
||||
with patch("tools.mcp_tool.os.kill") as mock_kill:
|
||||
with patch("tools.mcp_tool.os.kill") as mock_kill, \
|
||||
patch("time.sleep") as mock_sleep:
|
||||
_kill_orphaned_mcp_children()
|
||||
|
||||
mock_kill.assert_called_once_with(fake_pid, fake_sigkill)
|
||||
# SIGTERM, then alive-check (signal 0), then SIGKILL
|
||||
mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
|
||||
mock_kill.assert_any_call(fake_pid, 0) # alive check
|
||||
mock_kill.assert_any_call(fake_pid, fake_sigkill)
|
||||
assert mock_kill.call_count == 3
|
||||
mock_sleep.assert_called_once_with(2)
|
||||
|
||||
with _lock:
|
||||
assert fake_pid not in _stdio_pids
|
||||
|
||||
def test_kill_orphaned_falls_back_without_sigkill(self, monkeypatch):
|
||||
"""Windows-like signal modules without SIGKILL should fall back to SIGTERM."""
|
||||
"""Without SIGKILL, SIGTERM is used for both phases."""
|
||||
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
|
||||
|
||||
fake_pid = 434343
|
||||
with _lock:
|
||||
_stdio_pids.clear()
|
||||
_stdio_pids.add(fake_pid)
|
||||
_stdio_pids[fake_pid] = "test"
|
||||
|
||||
monkeypatch.delattr(signal, "SIGKILL", raising=False)
|
||||
|
||||
with patch("tools.mcp_tool.os.kill") as mock_kill:
|
||||
with patch("tools.mcp_tool.os.kill") as mock_kill, \
|
||||
patch("time.sleep") as mock_sleep:
|
||||
_kill_orphaned_mcp_children()
|
||||
|
||||
mock_kill.assert_called_once_with(fake_pid, signal.SIGTERM)
|
||||
# SIGTERM phase, alive check raises (process gone), no escalation
|
||||
mock_kill.assert_any_call(fake_pid, signal.SIGTERM)
|
||||
assert mock_sleep.called
|
||||
|
||||
with _lock:
|
||||
assert fake_pid not in _stdio_pids
|
||||
|
||||
@@ -120,6 +120,177 @@ class TestSchemaConversion:
|
||||
|
||||
assert schema["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_definitions_refs_are_rewritten_to_defs(self):
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
mcp_tool = _make_mcp_tool(
|
||||
name="submit",
|
||||
description="Submit a payload",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input": {"$ref": "#/definitions/Payload"},
|
||||
},
|
||||
"required": ["input"],
|
||||
"definitions": {
|
||||
"Payload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
schema = _convert_mcp_schema("forms", mcp_tool)
|
||||
|
||||
assert schema["parameters"]["properties"]["input"]["$ref"] == "#/$defs/Payload"
|
||||
assert "$defs" in schema["parameters"]
|
||||
assert "definitions" not in schema["parameters"]
|
||||
|
||||
def test_nested_definition_refs_are_rewritten_recursively(self):
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
mcp_tool = _make_mcp_tool(
|
||||
name="nested",
|
||||
description="Nested schema",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/Entry"},
|
||||
},
|
||||
},
|
||||
"definitions": {
|
||||
"Entry": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"child": {"$ref": "#/definitions/Child"},
|
||||
},
|
||||
},
|
||||
"Child": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"value": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
schema = _convert_mcp_schema("forms", mcp_tool)
|
||||
|
||||
assert schema["parameters"]["properties"]["items"]["items"]["$ref"] == "#/$defs/Entry"
|
||||
assert schema["parameters"]["$defs"]["Entry"]["properties"]["child"]["$ref"] == "#/$defs/Child"
|
||||
|
||||
def test_missing_type_on_object_is_coerced(self):
|
||||
"""Schemas that describe an object but omit ``type`` get type='object'."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"properties": {"q": {"type": "string"}},
|
||||
"required": ["q"],
|
||||
})
|
||||
|
||||
assert schema["type"] == "object"
|
||||
assert schema["properties"]["q"]["type"] == "string"
|
||||
assert schema["required"] == ["q"]
|
||||
|
||||
def test_null_type_on_object_is_coerced(self):
|
||||
"""type: None should be treated like missing type (common MCP server bug)."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": None,
|
||||
"properties": {"x": {"type": "integer"}},
|
||||
})
|
||||
|
||||
assert schema["type"] == "object"
|
||||
|
||||
def test_required_pruned_when_property_missing(self):
|
||||
"""Gemini 400s on required names that don't exist in properties."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": "object",
|
||||
"properties": {"a": {"type": "string"}},
|
||||
"required": ["a", "ghost", "phantom"],
|
||||
})
|
||||
|
||||
assert schema["required"] == ["a"]
|
||||
|
||||
def test_required_removed_when_all_names_dangle(self):
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": ["ghost"],
|
||||
})
|
||||
|
||||
assert "required" not in schema
|
||||
|
||||
def test_required_pruning_applies_recursively_inside_nested_objects(self):
|
||||
"""Nested object schemas also get required pruning."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filter": {
|
||||
"type": "object",
|
||||
"properties": {"field": {"type": "string"}},
|
||||
"required": ["field", "missing"],
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
assert schema["properties"]["filter"]["required"] == ["field"]
|
||||
|
||||
def test_object_in_array_items_gets_properties_filled(self):
|
||||
"""Array-item object schemas without properties get an empty dict."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {"type": "object"},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
assert schema["properties"]["items"]["items"]["properties"] == {}
|
||||
|
||||
def test_convert_mcp_schema_survives_missing_inputschema_attribute(self):
|
||||
"""A Tool object without .inputSchema must not crash registration."""
|
||||
import types
|
||||
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
bare_tool = types.SimpleNamespace(name="probe", description="Probe")
|
||||
schema = _convert_mcp_schema("srv", bare_tool)
|
||||
|
||||
assert schema["name"] == "mcp_srv_probe"
|
||||
assert schema["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_convert_mcp_schema_with_none_inputschema(self):
|
||||
"""Tool with inputSchema=None produces a valid empty object schema."""
|
||||
import types
|
||||
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
# Note: _make_mcp_tool(input_schema=None) falls back to a default —
|
||||
# build the namespace directly so .inputSchema really is None.
|
||||
mcp_tool = types.SimpleNamespace(name="probe", description="Probe", inputSchema=None)
|
||||
schema = _convert_mcp_schema("srv", mcp_tool)
|
||||
|
||||
assert schema["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_tool_name_prefix_format(self):
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
@@ -1029,6 +1200,92 @@ class TestHTTPConfig:
|
||||
|
||||
asyncio.run(_test())
|
||||
|
||||
def test_http_seeds_initial_protocol_header(self):
|
||||
from tools.mcp_tool import LATEST_PROTOCOL_VERSION, MCPServerTask
|
||||
|
||||
server = MCPServerTask("remote")
|
||||
captured = {}
|
||||
|
||||
class DummyAsyncClient:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
class DummyTransportCtx:
|
||||
async def __aenter__(self):
|
||||
return MagicMock(), MagicMock(), (lambda: None)
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
class DummySession:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
async def initialize(self):
|
||||
return None
|
||||
|
||||
class DummyLegacyTransportCtx:
|
||||
def __init__(self, **kwargs):
|
||||
captured["legacy_headers"] = kwargs.get("headers")
|
||||
|
||||
async def __aenter__(self):
|
||||
return MagicMock(), MagicMock(), (lambda: None)
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
async def _discover_tools(self):
|
||||
self._shutdown_event.set()
|
||||
|
||||
async def _run(config, *, new_http):
|
||||
captured.clear()
|
||||
with patch("tools.mcp_tool._MCP_HTTP_AVAILABLE", True), \
|
||||
patch("tools.mcp_tool._MCP_NEW_HTTP", new_http), \
|
||||
patch("httpx.AsyncClient", DummyAsyncClient), \
|
||||
patch("tools.mcp_tool.streamable_http_client", return_value=DummyTransportCtx()), \
|
||||
patch("tools.mcp_tool.streamablehttp_client", side_effect=lambda url, **kwargs: DummyLegacyTransportCtx(**kwargs)), \
|
||||
patch("tools.mcp_tool.ClientSession", DummySession), \
|
||||
patch.object(MCPServerTask, "_discover_tools", _discover_tools):
|
||||
await server._run_http(config)
|
||||
|
||||
asyncio.run(_run({"url": "https://example.com/mcp"}, new_http=True))
|
||||
assert captured["headers"]["mcp-protocol-version"] == LATEST_PROTOCOL_VERSION
|
||||
|
||||
asyncio.run(_run({
|
||||
"url": "https://example.com/mcp",
|
||||
"headers": {"mcp-protocol-version": "custom-version"},
|
||||
}, new_http=True))
|
||||
assert captured["headers"]["mcp-protocol-version"] == "custom-version"
|
||||
|
||||
asyncio.run(_run({
|
||||
"url": "https://example.com/mcp",
|
||||
"headers": {"MCP-Protocol-Version": "custom-version"},
|
||||
}, new_http=True))
|
||||
assert captured["headers"]["MCP-Protocol-Version"] == "custom-version"
|
||||
assert "mcp-protocol-version" not in captured["headers"]
|
||||
|
||||
asyncio.run(_run({"url": "https://example.com/mcp"}, new_http=False))
|
||||
assert captured["legacy_headers"]["mcp-protocol-version"] == LATEST_PROTOCOL_VERSION
|
||||
|
||||
asyncio.run(_run({
|
||||
"url": "https://example.com/mcp",
|
||||
"headers": {"MCP-Protocol-Version": "custom-version"},
|
||||
}, new_http=False))
|
||||
assert captured["legacy_headers"]["MCP-Protocol-Version"] == "custom-version"
|
||||
assert "mcp-protocol-version" not in captured["legacy_headers"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reconnection logic
|
||||
|
||||
@@ -8,14 +8,17 @@ import pytest
|
||||
moa = importlib.import_module("tools.mixture_of_agents_tool")
|
||||
|
||||
|
||||
def test_moa_defaults_track_current_openrouter_frontier_models():
|
||||
assert moa.REFERENCE_MODELS == [
|
||||
"anthropic/claude-opus-4.6",
|
||||
"google/gemini-3-pro-preview",
|
||||
"openai/gpt-5.4-pro",
|
||||
"deepseek/deepseek-v3.2",
|
||||
]
|
||||
assert moa.AGGREGATOR_MODEL == "anthropic/claude-opus-4.6"
|
||||
def test_moa_defaults_are_well_formed():
|
||||
# Invariants, not a catalog snapshot: the exact model list churns with
|
||||
# OpenRouter availability (see PR #6636 where gemini-3-pro-preview was
|
||||
# removed upstream). What we care about is that the defaults are present
|
||||
# and valid vendor/model slugs.
|
||||
assert isinstance(moa.REFERENCE_MODELS, list)
|
||||
assert len(moa.REFERENCE_MODELS) >= 1
|
||||
for m in moa.REFERENCE_MODELS:
|
||||
assert isinstance(m, str) and "/" in m and not m.startswith("/")
|
||||
assert isinstance(moa.AGGREGATOR_MODEL, str)
|
||||
assert "/" in moa.AGGREGATOR_MODEL
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -22,8 +23,9 @@ class TestResolvePath:
|
||||
monkeypatch.setenv("TERMINAL_CWD", str(tmp_path))
|
||||
from tools.file_tools import _resolve_path
|
||||
|
||||
result = _resolve_path("/etc/hosts")
|
||||
assert result == Path("/etc/hosts")
|
||||
absolute = (tmp_path / "already-absolute.txt").resolve()
|
||||
result = _resolve_path(str(absolute))
|
||||
assert result == absolute
|
||||
|
||||
def test_falls_back_to_cwd_without_terminal_cwd(self, monkeypatch):
|
||||
"""Without TERMINAL_CWD, falls back to os.getcwd()."""
|
||||
@@ -50,3 +52,34 @@ class TestResolvePath:
|
||||
result = _resolve_path("a/../b/file.txt")
|
||||
assert ".." not in str(result)
|
||||
assert result == (tmp_path / "b" / "file.txt")
|
||||
|
||||
def test_relative_path_prefers_live_file_ops_cwd(self, monkeypatch, tmp_path):
|
||||
"""Live env.cwd must win after the terminal session changes directory."""
|
||||
start_dir = tmp_path / "start"
|
||||
live_dir = tmp_path / "worktree"
|
||||
start_dir.mkdir()
|
||||
live_dir.mkdir()
|
||||
monkeypatch.setenv("TERMINAL_CWD", str(start_dir))
|
||||
|
||||
from tools import file_tools
|
||||
|
||||
task_id = "live-cwd"
|
||||
fake_ops = SimpleNamespace(
|
||||
env=SimpleNamespace(cwd=str(live_dir)),
|
||||
cwd=str(start_dir),
|
||||
)
|
||||
|
||||
with file_tools._file_ops_lock:
|
||||
previous = file_tools._file_ops_cache.get(task_id)
|
||||
file_tools._file_ops_cache[task_id] = fake_ops
|
||||
|
||||
try:
|
||||
result = file_tools._resolve_path("nested/file.txt", task_id=task_id)
|
||||
finally:
|
||||
with file_tools._file_ops_lock:
|
||||
if previous is None:
|
||||
file_tools._file_ops_cache.pop(task_id, None)
|
||||
else:
|
||||
file_tools._file_ops_cache[task_id] = previous
|
||||
|
||||
assert result == live_dir / "nested" / "file.txt"
|
||||
|
||||
@@ -44,6 +44,18 @@ description: Description for {name}.
|
||||
return skill_dir
|
||||
|
||||
|
||||
def _symlink_category(skills_dir: Path, linked_root: Path, category: str) -> Path:
|
||||
"""Create a category symlink under skills_dir pointing outside the tree."""
|
||||
external_category = linked_root / category
|
||||
external_category.mkdir(parents=True, exist_ok=True)
|
||||
symlink_path = skills_dir / category
|
||||
try:
|
||||
symlink_path.symlink_to(external_category, target_is_directory=True)
|
||||
except (OSError, NotImplementedError) as exc:
|
||||
pytest.skip(f"symlinks unavailable in test environment: {exc}")
|
||||
return external_category
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _parse_frontmatter
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -255,6 +267,20 @@ class TestFindAllSkills:
|
||||
assert len(skills) == 1
|
||||
assert skills[0]["name"] == "real-skill"
|
||||
|
||||
def test_finds_skills_in_symlinked_category_dir(self, tmp_path):
|
||||
external_root = tmp_path / "repo"
|
||||
skills_root = tmp_path / "skills"
|
||||
skills_root.mkdir()
|
||||
|
||||
external_category = _symlink_category(skills_root, external_root, "linked")
|
||||
_make_skill(external_category.parent, "knowledge-brain", category="linked")
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", skills_root):
|
||||
skills = _find_all_skills()
|
||||
|
||||
assert [s["name"] for s in skills] == ["knowledge-brain"]
|
||||
assert skills[0]["category"] == "linked"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# skills_list
|
||||
@@ -288,6 +314,23 @@ class TestSkillsList:
|
||||
assert result["count"] == 1
|
||||
assert result["skills"][0]["name"] == "skill-a"
|
||||
|
||||
def test_category_filter_finds_symlinked_category(self, tmp_path):
|
||||
external_root = tmp_path / "repo"
|
||||
skills_root = tmp_path / "skills"
|
||||
skills_root.mkdir()
|
||||
|
||||
external_category = _symlink_category(skills_root, external_root, "linked")
|
||||
_make_skill(external_category.parent, "knowledge-brain", category="linked")
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", skills_root):
|
||||
raw = skills_list(category="linked")
|
||||
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 1
|
||||
assert result["categories"] == ["linked"]
|
||||
assert result["skills"][0]["name"] == "knowledge-brain"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# skill_view
|
||||
@@ -389,6 +432,35 @@ class TestSkillView:
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
|
||||
def test_view_finds_skill_in_symlinked_category_dir(self, tmp_path):
|
||||
external_root = tmp_path / "repo"
|
||||
skills_root = tmp_path / "skills"
|
||||
skills_root.mkdir()
|
||||
|
||||
external_category = _symlink_category(skills_root, external_root, "linked")
|
||||
_make_skill(external_category.parent, "knowledge-brain", category="linked")
|
||||
|
||||
with patch("tools.skills_tool.SKILLS_DIR", skills_root):
|
||||
raw = skill_view("knowledge-brain")
|
||||
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert result["name"] == "knowledge-brain"
|
||||
|
||||
def test_not_found_hint_uses_same_order_as_skills_list(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(tmp_path, "zeta", category="z-cat")
|
||||
_make_skill(tmp_path, "alpha", category="a-cat")
|
||||
_make_skill(tmp_path, "beta", category="a-cat")
|
||||
|
||||
list_result = json.loads(skills_list())
|
||||
view_result = json.loads(skill_view("missing-skill"))
|
||||
|
||||
assert view_result["success"] is False
|
||||
assert view_result["available_skills"] == [
|
||||
skill["name"] for skill in list_result["skills"]
|
||||
]
|
||||
|
||||
|
||||
class TestSkillViewSecureSetupOnLoad:
|
||||
def test_requests_missing_required_env_and_continues(self, tmp_path, monkeypatch):
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
"""Tests for tools.tool_output_limits.
|
||||
|
||||
Covers:
|
||||
1. Default values when no config is provided.
|
||||
2. Config override picks up user-supplied max_bytes / max_lines /
|
||||
max_line_length.
|
||||
3. Malformed values (None, negative, wrong type) fall back to defaults
|
||||
rather than raising.
|
||||
4. Integration: the helpers return what the terminal_tool and
|
||||
file_operations call paths will actually consume.
|
||||
|
||||
Port-tracking: anomalyco/opencode PR #23770
|
||||
(feat(truncate): allow configuring tool output truncation limits).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools import tool_output_limits as tol
|
||||
|
||||
|
||||
class TestDefaults:
|
||||
def test_defaults_match_previous_hardcoded_values(self):
|
||||
assert tol.DEFAULT_MAX_BYTES == 50_000
|
||||
assert tol.DEFAULT_MAX_LINES == 2000
|
||||
assert tol.DEFAULT_MAX_LINE_LENGTH == 2000
|
||||
|
||||
def test_get_limits_returns_defaults_when_config_missing(self):
|
||||
with patch("hermes_cli.config.load_config", return_value={}):
|
||||
limits = tol.get_tool_output_limits()
|
||||
assert limits == {
|
||||
"max_bytes": tol.DEFAULT_MAX_BYTES,
|
||||
"max_lines": tol.DEFAULT_MAX_LINES,
|
||||
"max_line_length": tol.DEFAULT_MAX_LINE_LENGTH,
|
||||
}
|
||||
|
||||
def test_get_limits_returns_defaults_when_config_not_a_dict(self):
|
||||
# load_config should always return a dict but be defensive anyway.
|
||||
with patch("hermes_cli.config.load_config", return_value="not a dict"):
|
||||
limits = tol.get_tool_output_limits()
|
||||
assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES
|
||||
|
||||
def test_get_limits_returns_defaults_when_load_config_raises(self):
|
||||
def _boom():
|
||||
raise RuntimeError("boom")
|
||||
|
||||
with patch("hermes_cli.config.load_config", side_effect=_boom):
|
||||
limits = tol.get_tool_output_limits()
|
||||
assert limits["max_lines"] == tol.DEFAULT_MAX_LINES
|
||||
|
||||
|
||||
class TestOverrides:
|
||||
def test_user_config_overrides_all_three(self):
|
||||
cfg = {
|
||||
"tool_output": {
|
||||
"max_bytes": 100_000,
|
||||
"max_lines": 5000,
|
||||
"max_line_length": 4096,
|
||||
}
|
||||
}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
limits = tol.get_tool_output_limits()
|
||||
assert limits == {
|
||||
"max_bytes": 100_000,
|
||||
"max_lines": 5000,
|
||||
"max_line_length": 4096,
|
||||
}
|
||||
|
||||
def test_partial_override_preserves_other_defaults(self):
|
||||
cfg = {"tool_output": {"max_bytes": 200_000}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
limits = tol.get_tool_output_limits()
|
||||
assert limits["max_bytes"] == 200_000
|
||||
assert limits["max_lines"] == tol.DEFAULT_MAX_LINES
|
||||
assert limits["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH
|
||||
|
||||
def test_section_not_a_dict_falls_back(self):
|
||||
cfg = {"tool_output": "nonsense"}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
limits = tol.get_tool_output_limits()
|
||||
assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES
|
||||
|
||||
|
||||
class TestCoercion:
|
||||
@pytest.mark.parametrize("bad", [None, "not a number", -1, 0, [], {}])
|
||||
def test_invalid_values_fall_back_to_defaults(self, bad):
|
||||
cfg = {"tool_output": {"max_bytes": bad, "max_lines": bad, "max_line_length": bad}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
limits = tol.get_tool_output_limits()
|
||||
assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES
|
||||
assert limits["max_lines"] == tol.DEFAULT_MAX_LINES
|
||||
assert limits["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH
|
||||
|
||||
def test_string_integer_is_coerced(self):
|
||||
cfg = {"tool_output": {"max_bytes": "75000"}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
limits = tol.get_tool_output_limits()
|
||||
assert limits["max_bytes"] == 75_000
|
||||
|
||||
|
||||
class TestShortcuts:
|
||||
def test_individual_accessors_delegate_to_get_tool_output_limits(self):
|
||||
cfg = {
|
||||
"tool_output": {
|
||||
"max_bytes": 111,
|
||||
"max_lines": 222,
|
||||
"max_line_length": 333,
|
||||
}
|
||||
}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
assert tol.get_max_bytes() == 111
|
||||
assert tol.get_max_lines() == 222
|
||||
assert tol.get_max_line_length() == 333
|
||||
|
||||
|
||||
class TestDefaultConfigHasSection:
|
||||
"""The DEFAULT_CONFIG in hermes_cli.config must expose tool_output so
|
||||
that ``hermes setup`` and default installs stay in sync with the
|
||||
helpers here."""
|
||||
|
||||
def test_default_config_contains_tool_output_section(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
assert "tool_output" in DEFAULT_CONFIG
|
||||
section = DEFAULT_CONFIG["tool_output"]
|
||||
assert isinstance(section, dict)
|
||||
assert section["max_bytes"] == tol.DEFAULT_MAX_BYTES
|
||||
assert section["max_lines"] == tol.DEFAULT_MAX_LINES
|
||||
assert section["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH
|
||||
|
||||
|
||||
class TestIntegrationReadPagination:
|
||||
"""normalize_read_pagination uses get_max_lines() — verify the plumbing."""
|
||||
|
||||
def test_pagination_limit_clamped_by_config_value(self):
|
||||
from tools.file_operations import normalize_read_pagination
|
||||
cfg = {"tool_output": {"max_lines": 50}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
offset, limit = normalize_read_pagination(offset=1, limit=1000)
|
||||
# limit should have been clamped to 50 (the configured max_lines)
|
||||
assert limit == 50
|
||||
assert offset == 1
|
||||
|
||||
def test_pagination_default_when_config_missing(self):
|
||||
from tools.file_operations import normalize_read_pagination
|
||||
with patch("hermes_cli.config.load_config", return_value={}):
|
||||
offset, limit = normalize_read_pagination(offset=10, limit=100000)
|
||||
# Clamped to default MAX_LINES (2000).
|
||||
assert limit == tol.DEFAULT_MAX_LINES
|
||||
assert offset == 10
|
||||
@@ -63,11 +63,15 @@ _HERMES_ENV_PATH = (
|
||||
r'(?:\$hermes_home|\$\{hermes_home\})/)'
|
||||
r'\.env\b'
|
||||
)
|
||||
_PROJECT_ENV_PATH = r'(?:(?:/|\.{1,2}/)?(?:[^\s/"\'`]+/)*\.env(?:\.[^/\s"\'`]+)*)'
|
||||
_PROJECT_CONFIG_PATH = r'(?:(?:/|\.{1,2}/)?(?:[^\s/"\'`]+/)*config\.yaml)'
|
||||
_SENSITIVE_WRITE_TARGET = (
|
||||
r'(?:/etc/|/dev/sd|'
|
||||
rf'{_SSH_SENSITIVE_PATH}|'
|
||||
rf'{_HERMES_ENV_PATH})'
|
||||
)
|
||||
_PROJECT_SENSITIVE_WRITE_TARGET = rf'(?:{_PROJECT_ENV_PATH}|{_PROJECT_CONFIG_PATH})'
|
||||
_COMMAND_TAIL = r'(?:\s*(?:&&|\|\||;).*)?$'
|
||||
|
||||
# =========================================================================
|
||||
# Dangerous command patterns
|
||||
@@ -99,6 +103,8 @@ DANGEROUS_PATTERNS = [
|
||||
(r'\b(bash|sh|zsh|ksh)\s+<\s*<?\s*\(\s*(curl|wget)\b', "execute remote script via process substitution"),
|
||||
(rf'\btee\b.*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via tee"),
|
||||
(rf'>>?\s*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via redirection"),
|
||||
(rf'\btee\b.*["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config via tee"),
|
||||
(rf'>>?\s*["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config via redirection"),
|
||||
(r'\bxargs\s+.*\brm\b', "xargs with rm"),
|
||||
(r'\bfind\b.*-exec\s+(/\S*/)?rm\b', "find -exec rm"),
|
||||
(r'\bfind\b.*-delete\b', "find -delete"),
|
||||
@@ -120,6 +126,7 @@ DANGEROUS_PATTERNS = [
|
||||
(r'\bkill\b.*`\s*pgrep\b', "kill process via backtick pgrep expansion (self-termination)"),
|
||||
# File copy/move/edit into sensitive system paths
|
||||
(r'\b(cp|mv|install)\b.*\s/etc/', "copy/move file into /etc/"),
|
||||
(rf'\b(cp|mv|install)\b.*\s["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config file"),
|
||||
(r'\bsed\s+-[^\s]*i.*\s/etc/', "in-place edit of system config"),
|
||||
(r'\bsed\s+--in-place\b.*\s/etc/', "in-place edit of system config (long flag)"),
|
||||
# Script execution via heredoc — bypasses the -e/-c flag patterns above.
|
||||
|
||||
+157
-10
@@ -188,10 +188,116 @@ async def _cdp_call(
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _browser_cdp_via_supervisor(
|
||||
task_id: str,
|
||||
frame_id: str,
|
||||
method: str,
|
||||
params: Optional[Dict[str, Any]],
|
||||
timeout: float,
|
||||
) -> str:
|
||||
"""Route a CDP call through the live supervisor session for an OOPIF frame.
|
||||
|
||||
Looks up the frame in the supervisor's snapshot, extracts its child
|
||||
``cdp_session_id``, and dispatches ``method`` with that sessionId via
|
||||
the supervisor's already-connected WebSocket (using
|
||||
``asyncio.run_coroutine_threadsafe`` onto the supervisor loop).
|
||||
"""
|
||||
try:
|
||||
from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found]
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
return tool_error(
|
||||
f"CDP supervisor is not available: {exc}. frame_id routing requires "
|
||||
f"a running supervisor attached via /browser connect or an active "
|
||||
f"Browserbase session."
|
||||
)
|
||||
|
||||
supervisor = SUPERVISOR_REGISTRY.get(task_id)
|
||||
if supervisor is None:
|
||||
return tool_error(
|
||||
f"No CDP supervisor is attached for task={task_id!r}. Call "
|
||||
f"browser_navigate or /browser connect first so the supervisor "
|
||||
f"can attach. Once attached, browser_snapshot will populate "
|
||||
f"frame_tree with frame_ids you can pass here."
|
||||
)
|
||||
|
||||
snap = supervisor.snapshot()
|
||||
# Search both the top frame and the children for the requested id.
|
||||
top = snap.frame_tree.get("top")
|
||||
frame_info: Optional[Dict[str, Any]] = None
|
||||
if top and top.get("frame_id") == frame_id:
|
||||
frame_info = top
|
||||
else:
|
||||
for child in snap.frame_tree.get("children", []) or []:
|
||||
if child.get("frame_id") == frame_id:
|
||||
frame_info = child
|
||||
break
|
||||
if frame_info is None:
|
||||
# Check the raw frames dict too (frame_tree is capped at 30 entries)
|
||||
with supervisor._state_lock: # type: ignore[attr-defined]
|
||||
raw = supervisor._frames.get(frame_id) # type: ignore[attr-defined]
|
||||
if raw is not None:
|
||||
frame_info = raw.to_dict()
|
||||
|
||||
if frame_info is None:
|
||||
return tool_error(
|
||||
f"frame_id {frame_id!r} not found in supervisor state. "
|
||||
f"Call browser_snapshot to see current frame_tree."
|
||||
)
|
||||
|
||||
child_sid = frame_info.get("session_id")
|
||||
if not child_sid:
|
||||
# Not an OOPIF — fall back to top-level session (evaluating at page
|
||||
# scope). Same-origin iframes don't get their own sessionId; the
|
||||
# agent can still use contentWindow/contentDocument from the parent.
|
||||
return tool_error(
|
||||
f"frame_id {frame_id!r} is not an out-of-process iframe (no "
|
||||
f"dedicated CDP session). For same-origin iframes, use "
|
||||
f"`browser_cdp(method='Runtime.evaluate', params={{'expression': "
|
||||
f"\"document.querySelector('iframe').contentDocument.title\"}})` "
|
||||
f"at the top-level page instead."
|
||||
)
|
||||
|
||||
# Dispatch onto the supervisor's loop.
|
||||
import asyncio as _asyncio
|
||||
loop = supervisor._loop # type: ignore[attr-defined]
|
||||
if loop is None or not loop.is_running():
|
||||
return tool_error(
|
||||
"CDP supervisor loop is not running. Try reconnecting with "
|
||||
"/browser connect."
|
||||
)
|
||||
|
||||
async def _do_cdp():
|
||||
return await supervisor._cdp( # type: ignore[attr-defined]
|
||||
method,
|
||||
params or {},
|
||||
session_id=child_sid,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
try:
|
||||
fut = _asyncio.run_coroutine_threadsafe(_do_cdp(), loop)
|
||||
result_msg = fut.result(timeout=timeout + 2)
|
||||
except Exception as exc:
|
||||
return tool_error(
|
||||
f"CDP call via supervisor failed: {type(exc).__name__}: {exc}",
|
||||
cdp_docs=CDP_DOCS_URL,
|
||||
)
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"method": method,
|
||||
"frame_id": frame_id,
|
||||
"session_id": child_sid,
|
||||
"result": result_msg.get("result", {}),
|
||||
}
|
||||
return json.dumps(payload, ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_cdp(
|
||||
method: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
target_id: Optional[str] = None,
|
||||
frame_id: Optional[str] = None,
|
||||
timeout: float = 30.0,
|
||||
task_id: Optional[str] = None,
|
||||
) -> str:
|
||||
@@ -202,16 +308,34 @@ def browser_cdp(
|
||||
params: Method-specific parameters; defaults to ``{}``.
|
||||
target_id: Optional target/tab ID for page-level methods. When set,
|
||||
we first attach to the target (``flatten=True``) and send
|
||||
``method`` with the resulting ``sessionId``.
|
||||
``method`` with the resulting ``sessionId``. Uses a fresh
|
||||
stateless CDP connection.
|
||||
frame_id: Optional cross-origin (OOPIF) iframe ``frame_id`` from
|
||||
``browser_snapshot.frame_tree.children[]``. When set (and the
|
||||
frame is an OOPIF with a live session tracked by the CDP
|
||||
supervisor), routes the call through the supervisor's existing
|
||||
WebSocket — which is how you Runtime.evaluate *inside* an
|
||||
iframe on backends where per-call fresh CDP connections would
|
||||
hit signed-URL expiry (Browserbase) or expensive reattach.
|
||||
timeout: Seconds to wait for the call to complete.
|
||||
task_id: Unused (tool is stateless) — accepted for uniformity with
|
||||
other browser tools.
|
||||
task_id: Task identifier for supervisor lookup. When ``frame_id``
|
||||
is set, this identifies which task's supervisor to use; the
|
||||
handler will default to ``"default"`` otherwise.
|
||||
|
||||
Returns:
|
||||
JSON string ``{"success": True, "method": ..., "result": {...}}`` on
|
||||
success, or ``{"error": "..."}`` on failure.
|
||||
"""
|
||||
del task_id # unused — stateless
|
||||
# --- Route iframe-scoped calls through the supervisor ---------------
|
||||
if frame_id:
|
||||
return _browser_cdp_via_supervisor(
|
||||
task_id=task_id or "default",
|
||||
frame_id=frame_id,
|
||||
method=method,
|
||||
params=params,
|
||||
timeout=timeout,
|
||||
)
|
||||
del task_id # stateless path below
|
||||
|
||||
if not method or not isinstance(method, str):
|
||||
return tool_error(
|
||||
@@ -324,12 +448,18 @@ BROWSER_CDP_SCHEMA: Dict[str, Any] = {
|
||||
"'mobile': false}, target_id=<tabId>\n\n"
|
||||
"**Usage rules:**\n"
|
||||
"- Browser-level methods (Target.*, Browser.*, Storage.*): omit "
|
||||
"target_id.\n"
|
||||
"target_id and frame_id.\n"
|
||||
"- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, "
|
||||
"Network.* scoped to a tab): pass target_id from Target.getTargets.\n"
|
||||
"- Each call is independent — sessions and event subscriptions do "
|
||||
"not persist between calls. For stateful workflows, prefer the "
|
||||
"dedicated browser tools."
|
||||
"- **Cross-origin iframe scope** (Runtime.evaluate inside an OOPIF, "
|
||||
"Page.* targeting a frame target, etc.): pass frame_id from the "
|
||||
"browser_snapshot frame_tree output. This routes through the CDP "
|
||||
"supervisor's live connection — the only reliable way on "
|
||||
"Browserbase where stateless CDP calls hit signed-URL expiry.\n"
|
||||
"- Each stateless call (without frame_id) is independent — sessions "
|
||||
"and event subscriptions do not persist between calls. For stateful "
|
||||
"workflows, prefer the dedicated browser tools or use frame_id "
|
||||
"routing."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
@@ -353,8 +483,24 @@ BROWSER_CDP_SCHEMA: Dict[str, Any] = {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional. Target/tab ID from Target.getTargets result "
|
||||
"(each entry's 'targetId'). Required for page-level "
|
||||
"methods; must be omitted for browser-level methods."
|
||||
"(each entry's 'targetId'). Use for page-level methods "
|
||||
"at the top-level tab scope. Mutually exclusive with "
|
||||
"frame_id."
|
||||
),
|
||||
},
|
||||
"frame_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional. Out-of-process iframe (OOPIF) frame_id from "
|
||||
"browser_snapshot.frame_tree.children[] where "
|
||||
"is_oopif=true. When set, routes the call through the "
|
||||
"CDP supervisor's live session for that iframe. "
|
||||
"Essential for Runtime.evaluate inside cross-origin "
|
||||
"iframes, especially on Browserbase where fresh "
|
||||
"per-call CDP connections can't keep up with signed "
|
||||
"URL rotation. For same-origin iframes, use parent "
|
||||
"contentWindow/contentDocument from Runtime.evaluate "
|
||||
"at the top-level page instead."
|
||||
),
|
||||
},
|
||||
"timeout": {
|
||||
@@ -408,6 +554,7 @@ registry.register(
|
||||
method=args.get("method", ""),
|
||||
params=args.get("params"),
|
||||
target_id=args.get("target_id"),
|
||||
frame_id=args.get("frame_id"),
|
||||
timeout=args.get("timeout", 30.0),
|
||||
task_id=kw.get("task_id"),
|
||||
),
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
"""Agent-facing tool: respond to a native JS dialog captured by the CDP supervisor.
|
||||
|
||||
This tool is response-only — the agent first reads ``pending_dialogs`` from
|
||||
``browser_snapshot`` output, then calls ``browser_dialog(action=...)`` to
|
||||
accept or dismiss.
|
||||
|
||||
Gated on the same ``_browser_cdp_check`` as ``browser_cdp`` so it only
|
||||
appears when a CDP endpoint is reachable (Browserbase with a
|
||||
``connectUrl``, local Chrome via ``/browser connect``, or
|
||||
``browser.cdp_url`` set in config).
|
||||
|
||||
See ``website/docs/developer-guide/browser-supervisor.md`` for the full
|
||||
design.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from tools.browser_supervisor import SUPERVISOR_REGISTRY
|
||||
from tools.registry import registry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
BROWSER_DIALOG_SCHEMA: Dict[str, Any] = {
|
||||
"name": "browser_dialog",
|
||||
"description": (
|
||||
"Respond to a native JavaScript dialog (alert / confirm / prompt / "
|
||||
"beforeunload) that is currently blocking the page.\n\n"
|
||||
"**Workflow:** call ``browser_snapshot`` first — if a dialog is open, "
|
||||
"it appears in the ``pending_dialogs`` field with ``id``, ``type``, "
|
||||
"and ``message``. Then call this tool with ``action='accept'`` or "
|
||||
"``action='dismiss'``.\n\n"
|
||||
"**Prompt dialogs:** pass ``prompt_text`` to supply the response "
|
||||
"string. Ignored for alert/confirm/beforeunload.\n\n"
|
||||
"**Multiple dialogs:** if more than one dialog is queued (rare — "
|
||||
"happens when a second dialog fires while the first is still open), "
|
||||
"pass ``dialog_id`` from the snapshot to disambiguate.\n\n"
|
||||
"**Availability:** only present when a CDP-capable backend is "
|
||||
"attached — Browserbase sessions, local Chrome via "
|
||||
"``/browser connect``, or ``browser.cdp_url`` in config.yaml. "
|
||||
"Not available on Camofox (REST-only) or the default Playwright "
|
||||
"local browser (CDP port is hidden)."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {
|
||||
"type": "string",
|
||||
"enum": ["accept", "dismiss"],
|
||||
"description": (
|
||||
"'accept' clicks OK / returns the prompt text. "
|
||||
"'dismiss' clicks Cancel / returns null from prompt(). "
|
||||
"For ``beforeunload`` dialogs: 'accept' allows the "
|
||||
"navigation, 'dismiss' keeps the page."
|
||||
),
|
||||
},
|
||||
"prompt_text": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Response string for a ``prompt()`` dialog. Ignored for "
|
||||
"other dialog types. Defaults to empty string."
|
||||
),
|
||||
},
|
||||
"dialog_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Specific dialog to respond to, from "
|
||||
"``browser_snapshot.pending_dialogs[].id``. Required "
|
||||
"only when multiple dialogs are queued."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def browser_dialog(
|
||||
action: str,
|
||||
prompt_text: Optional[str] = None,
|
||||
dialog_id: Optional[str] = None,
|
||||
task_id: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Respond to a pending dialog on the active task's CDP supervisor."""
|
||||
effective_task_id = task_id or "default"
|
||||
supervisor = SUPERVISOR_REGISTRY.get(effective_task_id)
|
||||
if supervisor is None:
|
||||
return json.dumps(
|
||||
{
|
||||
"success": False,
|
||||
"error": (
|
||||
"No CDP supervisor is attached to this task. Either the "
|
||||
"browser backend doesn't expose CDP (Camofox, default "
|
||||
"Playwright) or no browser session has been started yet. "
|
||||
"Call browser_navigate or /browser connect first."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
result = supervisor.respond_to_dialog(
|
||||
action=action,
|
||||
prompt_text=prompt_text,
|
||||
dialog_id=dialog_id,
|
||||
)
|
||||
if result.get("ok"):
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"action": action,
|
||||
"dialog": result.get("dialog", {}),
|
||||
}
|
||||
)
|
||||
return json.dumps({"success": False, "error": result.get("error", "unknown error")})
|
||||
|
||||
|
||||
def _browser_dialog_check() -> bool:
|
||||
"""Gate: same as ``browser_cdp`` — only offered when CDP is reachable.
|
||||
|
||||
Kept identical so the two tools appear and disappear together. The
|
||||
supervisor itself is started lazily by ``browser_navigate`` /
|
||||
``/browser connect`` / Browserbase session creation, so a reachable
|
||||
CDP URL is enough to commit to showing the tool.
|
||||
"""
|
||||
try:
|
||||
from tools.browser_cdp_tool import _browser_cdp_check # type: ignore[import-not-found]
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("browser_dialog check: browser_cdp_tool import failed: %s", exc)
|
||||
return False
|
||||
return _browser_cdp_check()
|
||||
|
||||
|
||||
registry.register(
|
||||
name="browser_dialog",
|
||||
toolset="browser-cdp",
|
||||
schema=BROWSER_DIALOG_SCHEMA,
|
||||
handler=lambda args, **kw: browser_dialog(
|
||||
action=args.get("action", ""),
|
||||
prompt_text=args.get("prompt_text"),
|
||||
dialog_id=args.get("dialog_id"),
|
||||
task_id=kw.get("task_id"),
|
||||
),
|
||||
check_fn=_browser_dialog_check,
|
||||
emoji="💬",
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
+128
-5
@@ -63,7 +63,7 @@ import tempfile
|
||||
import threading
|
||||
import time
|
||||
import requests
|
||||
from typing import Dict, Any, Optional, List
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
from pathlib import Path
|
||||
from agent.auxiliary_client import call_llm
|
||||
from hermes_constants import get_hermes_home
|
||||
@@ -287,6 +287,100 @@ def _get_cdp_override() -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def _get_dialog_policy_config() -> Tuple[str, float]:
|
||||
"""Read ``browser.dialog_policy`` + ``browser.dialog_timeout_s`` from config.
|
||||
|
||||
Returns a ``(policy, timeout_s)`` tuple, falling back to the supervisor's
|
||||
defaults when keys are absent or invalid.
|
||||
"""
|
||||
# Defer imports so browser_tool can be imported in minimal environments.
|
||||
from tools.browser_supervisor import (
|
||||
DEFAULT_DIALOG_POLICY,
|
||||
DEFAULT_DIALOG_TIMEOUT_S,
|
||||
_VALID_POLICIES,
|
||||
)
|
||||
|
||||
try:
|
||||
from hermes_cli.config import read_raw_config
|
||||
|
||||
cfg = read_raw_config()
|
||||
browser_cfg = cfg.get("browser", {}) if isinstance(cfg, dict) else {}
|
||||
if not isinstance(browser_cfg, dict):
|
||||
return DEFAULT_DIALOG_POLICY, DEFAULT_DIALOG_TIMEOUT_S
|
||||
policy = str(browser_cfg.get("dialog_policy") or DEFAULT_DIALOG_POLICY)
|
||||
if policy not in _VALID_POLICIES:
|
||||
logger.debug("Invalid browser.dialog_policy=%r; using default", policy)
|
||||
policy = DEFAULT_DIALOG_POLICY
|
||||
timeout_raw = browser_cfg.get("dialog_timeout_s")
|
||||
try:
|
||||
timeout_s = float(timeout_raw) if timeout_raw is not None else DEFAULT_DIALOG_TIMEOUT_S
|
||||
if timeout_s <= 0:
|
||||
timeout_s = DEFAULT_DIALOG_TIMEOUT_S
|
||||
except (TypeError, ValueError):
|
||||
timeout_s = DEFAULT_DIALOG_TIMEOUT_S
|
||||
return policy, timeout_s
|
||||
except Exception:
|
||||
return DEFAULT_DIALOG_POLICY, DEFAULT_DIALOG_TIMEOUT_S
|
||||
|
||||
|
||||
def _ensure_cdp_supervisor(task_id: str) -> None:
|
||||
"""Start a CDP supervisor for ``task_id`` if an endpoint is reachable.
|
||||
|
||||
Idempotent — delegates to ``SupervisorRegistry.get_or_start`` which skips
|
||||
when a supervisor for this ``(task_id, cdp_url)`` already exists and
|
||||
tears down + restarts on URL change. Safe to call on every
|
||||
``browser_navigate`` / ``/browser connect`` without worrying about
|
||||
double-attach.
|
||||
|
||||
Resolves the CDP URL in this order:
|
||||
1. ``BROWSER_CDP_URL`` / ``browser.cdp_url`` — covers ``/browser connect``
|
||||
and config-set overrides.
|
||||
2. ``_active_sessions[task_id]["cdp_url"]`` — covers Browserbase + any
|
||||
other cloud provider whose ``create_session`` returns a raw CDP URL.
|
||||
|
||||
Swallows all errors — failing to attach the supervisor must not break
|
||||
the browser session itself. The agent simply won't see
|
||||
``pending_dialogs`` / ``frame_tree`` fields in snapshots.
|
||||
"""
|
||||
cdp_url = _get_cdp_override()
|
||||
if not cdp_url:
|
||||
# Fallback: active session may carry a per-session CDP URL from a
|
||||
# cloud provider (Browserbase sets this).
|
||||
with _cleanup_lock:
|
||||
session_info = _active_sessions.get(task_id, {})
|
||||
maybe = str(session_info.get("cdp_url") or "")
|
||||
if maybe:
|
||||
cdp_url = _resolve_cdp_override(maybe)
|
||||
if not cdp_url:
|
||||
return
|
||||
try:
|
||||
from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found]
|
||||
|
||||
policy, timeout_s = _get_dialog_policy_config()
|
||||
SUPERVISOR_REGISTRY.get_or_start(
|
||||
task_id=task_id,
|
||||
cdp_url=cdp_url,
|
||||
dialog_policy=policy,
|
||||
dialog_timeout_s=timeout_s,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"CDP supervisor attach for task=%s failed (non-fatal): %s",
|
||||
task_id,
|
||||
exc,
|
||||
)
|
||||
|
||||
|
||||
def _stop_cdp_supervisor(task_id: str) -> None:
|
||||
"""Stop the CDP supervisor for ``task_id`` if one exists. No-op otherwise."""
|
||||
try:
|
||||
from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found]
|
||||
|
||||
SUPERVISOR_REGISTRY.stop(task_id)
|
||||
except Exception as exc:
|
||||
logger.debug("CDP supervisor stop for task=%s failed (non-fatal): %s", task_id, exc)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Cloud Provider Registry
|
||||
# ============================================================================
|
||||
@@ -995,7 +1089,12 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
|
||||
if task_id in _active_sessions:
|
||||
return _active_sessions[task_id]
|
||||
_active_sessions[task_id] = session_info
|
||||
|
||||
|
||||
# Lazy-start the CDP supervisor now that the session exists (if the
|
||||
# backend surfaces a CDP URL via override or session_info["cdp_url"]).
|
||||
# Idempotent; swallows errors. See _ensure_cdp_supervisor for details.
|
||||
_ensure_cdp_supervisor(task_id)
|
||||
|
||||
return session_info
|
||||
|
||||
|
||||
@@ -1455,7 +1554,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
||||
if is_first_nav:
|
||||
session_info["_first_nav"] = False
|
||||
_maybe_start_recording(effective_task_id)
|
||||
|
||||
|
||||
result = _run_browser_command(effective_task_id, "open", [url], timeout=max(_get_command_timeout(), 60))
|
||||
|
||||
if result.get("success"):
|
||||
@@ -1578,7 +1677,20 @@ def browser_snapshot(
|
||||
"snapshot": snapshot_text,
|
||||
"element_count": len(refs) if refs else 0
|
||||
}
|
||||
|
||||
|
||||
# Merge supervisor state (pending dialogs + frame tree) when a CDP
|
||||
# supervisor is attached to this task. No-op otherwise. See
|
||||
# website/docs/developer-guide/browser-supervisor.md.
|
||||
try:
|
||||
from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found]
|
||||
_supervisor = SUPERVISOR_REGISTRY.get(effective_task_id)
|
||||
if _supervisor is not None:
|
||||
_sv_snap = _supervisor.snapshot()
|
||||
if _sv_snap.active:
|
||||
response.update(_sv_snap.to_dict())
|
||||
except Exception as _sv_exc:
|
||||
logger.debug("supervisor snapshot merge failed: %s", _sv_exc)
|
||||
|
||||
return json.dumps(response, ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps({
|
||||
@@ -2248,7 +2360,11 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
|
||||
"""
|
||||
if task_id is None:
|
||||
task_id = "default"
|
||||
|
||||
|
||||
# Stop the CDP supervisor for this task FIRST so we close our WebSocket
|
||||
# before the backend tears down the underlying CDP endpoint.
|
||||
_stop_cdp_supervisor(task_id)
|
||||
|
||||
# Also clean up Camofox session if running in Camofox mode.
|
||||
# Skip full close when managed persistence is enabled — the browser
|
||||
# profile (and its session cookies) must survive across agent tasks.
|
||||
@@ -2329,6 +2445,13 @@ def cleanup_all_browsers() -> None:
|
||||
for task_id in task_ids:
|
||||
cleanup_browser(task_id)
|
||||
|
||||
# Tear down CDP supervisors for all tasks so background threads exit.
|
||||
try:
|
||||
from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found]
|
||||
SUPERVISOR_REGISTRY.stop_all()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Reset cached lookups so they are re-evaluated on next use.
|
||||
global _cached_agent_browser, _agent_browser_resolved
|
||||
global _cached_command_timeout, _command_timeout_resolved
|
||||
|
||||
@@ -215,6 +215,8 @@ def _format_job(job: Dict[str, Any]) -> Dict[str, Any]:
|
||||
}
|
||||
if job.get("script"):
|
||||
result["script"] = job["script"]
|
||||
if job.get("enabled_toolsets"):
|
||||
result["enabled_toolsets"] = job["enabled_toolsets"]
|
||||
return result
|
||||
|
||||
|
||||
@@ -234,6 +236,7 @@ def cronjob(
|
||||
base_url: Optional[str] = None,
|
||||
reason: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
task_id: str = None,
|
||||
) -> str:
|
||||
"""Unified cron job management tool."""
|
||||
@@ -271,6 +274,7 @@ def cronjob(
|
||||
provider=_normalize_optional_job_value(provider),
|
||||
base_url=_normalize_optional_job_value(base_url, strip_trailing_slash=True),
|
||||
script=_normalize_optional_job_value(script),
|
||||
enabled_toolsets=enabled_toolsets or None,
|
||||
)
|
||||
return json.dumps(
|
||||
{
|
||||
@@ -360,6 +364,8 @@ def cronjob(
|
||||
if script_error:
|
||||
return tool_error(script_error, success=False)
|
||||
updates["script"] = _normalize_optional_job_value(script) if script else None
|
||||
if enabled_toolsets is not None:
|
||||
updates["enabled_toolsets"] = enabled_toolsets or None
|
||||
if repeat is not None:
|
||||
# Normalize: treat 0 or negative as None (infinite)
|
||||
normalized_repeat = None if repeat <= 0 else repeat
|
||||
@@ -459,6 +465,11 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
|
||||
"type": "string",
|
||||
"description": f"Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under {display_hermes_home()}/scripts/. On update, pass empty string to clear."
|
||||
},
|
||||
"enabled_toolsets": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Optional list of toolset names to restrict the job's agent to (e.g. [\"web\", \"terminal\", \"file\", \"delegation\"]). When set, only tools from these toolsets are loaded, significantly reducing input token overhead. When omitted, all default tools are loaded. Infer from the job's prompt — e.g. use \"web\" if it calls web_search, \"terminal\" if it runs scripts, \"file\" if it reads files, \"delegation\" if it calls delegate_task. On update, pass an empty array to clear."
|
||||
},
|
||||
},
|
||||
"required": ["action"]
|
||||
}
|
||||
@@ -503,6 +514,7 @@ registry.register(
|
||||
base_url=args.get("base_url"),
|
||||
reason=args.get("reason"),
|
||||
script=args.get("script"),
|
||||
enabled_toolsets=args.get("enabled_toolsets"),
|
||||
task_id=kw.get("task_id"),
|
||||
))(),
|
||||
check_fn=check_cronjob_requirements,
|
||||
|
||||
+14
-10
@@ -298,7 +298,7 @@ def _get_child_timeout() -> float:
|
||||
"""Read delegation.child_timeout_seconds from config.
|
||||
|
||||
Returns the number of seconds a single child agent is allowed to run
|
||||
before being considered stuck. Default: 300 s (5 minutes).
|
||||
before being considered stuck. Default: 600 s (10 minutes).
|
||||
"""
|
||||
cfg = _load_config()
|
||||
val = cfg.get("child_timeout_seconds")
|
||||
@@ -409,7 +409,7 @@ def _preserve_parent_mcp_toolsets(
|
||||
|
||||
|
||||
DEFAULT_MAX_ITERATIONS = 50
|
||||
DEFAULT_CHILD_TIMEOUT = 300 # seconds before a child agent is considered stuck
|
||||
DEFAULT_CHILD_TIMEOUT = 600 # seconds before a child agent is considered stuck
|
||||
_HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during delegation
|
||||
_HEARTBEAT_STALE_CYCLES = (
|
||||
5 # mark child stale after this many heartbeats with no iteration progress
|
||||
@@ -1558,7 +1558,18 @@ def delegate_task(
|
||||
# Load config
|
||||
cfg = _load_config()
|
||||
default_max_iter = cfg.get("max_iterations", DEFAULT_MAX_ITERATIONS)
|
||||
effective_max_iter = max_iterations or default_max_iter
|
||||
# Model-supplied max_iterations is ignored — the config value is authoritative
|
||||
# so users get predictable budgets. The kwarg is retained for internal callers
|
||||
# and tests; a model-emitted value here would only shrink the budget and
|
||||
# surprise the user mid-run. Log and drop it if one slips through from a
|
||||
# cached tool schema or a stale provider.
|
||||
if max_iterations is not None and max_iterations != default_max_iter:
|
||||
logger.debug(
|
||||
"delegate_task: ignoring caller-supplied max_iterations=%s; "
|
||||
"using delegation.max_iterations=%s from config",
|
||||
max_iterations, default_max_iter,
|
||||
)
|
||||
effective_max_iter = default_max_iter
|
||||
|
||||
# Resolve delegation credentials (provider:model pair).
|
||||
# When delegation.provider is configured, this resolves the full credential
|
||||
@@ -2098,13 +2109,6 @@ DELEGATE_TASK_SCHEMA = {
|
||||
"When provided, top-level goal/context/toolsets are ignored."
|
||||
),
|
||||
},
|
||||
"max_iterations": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Max tool-calling turns per subagent (default: 50). "
|
||||
"Only set lower for simple tasks."
|
||||
),
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"enum": ["leaf", "orchestrator"],
|
||||
|
||||
+111
-63
@@ -473,6 +473,12 @@ _ACTIONS = {
|
||||
"remove_role": _remove_role,
|
||||
}
|
||||
|
||||
_CORE_ACTION_NAMES = frozenset({"fetch_messages", "search_members", "create_thread"})
|
||||
_ADMIN_ACTION_NAMES = frozenset(_ACTIONS.keys()) - _CORE_ACTION_NAMES
|
||||
|
||||
_CORE_ACTIONS = {k: v for k, v in _ACTIONS.items() if k in _CORE_ACTION_NAMES}
|
||||
_ADMIN_ACTIONS = {k: v for k, v in _ACTIONS.items() if k in _ADMIN_ACTION_NAMES}
|
||||
|
||||
# Single-source-of-truth manifest: action → (signature, one-line description).
|
||||
# Consumed by :func:`_build_schema` so the schema's top-level description
|
||||
# always matches the registered action set.
|
||||
@@ -531,7 +537,7 @@ def _load_allowed_actions_config() -> Optional[List[str]]:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as exc:
|
||||
logger.debug("discord_server: could not load config (%s); allowing all actions.", exc)
|
||||
logger.debug("discord: could not load config (%s); allowing all actions.", exc)
|
||||
return None
|
||||
|
||||
raw = (cfg.get("discord") or {}).get("server_actions")
|
||||
@@ -586,12 +592,16 @@ def _available_actions(
|
||||
def _build_schema(
|
||||
actions: List[str],
|
||||
caps: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build the tool schema for the given filtered action list."""
|
||||
tool_name: str = "discord",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Build the tool schema for the given filtered action list.
|
||||
|
||||
Returns ``None`` when *actions* is empty — callers should drop the
|
||||
tool from registration in that case.
|
||||
"""
|
||||
caps = caps or {}
|
||||
if not actions:
|
||||
# Tool shouldn't be registered when empty, but guard anyway.
|
||||
actions = list(_ACTIONS.keys())
|
||||
return None
|
||||
|
||||
# Action manifest lines (action-first, parameter-scoped).
|
||||
manifest_lines = [
|
||||
@@ -602,24 +612,36 @@ def _build_schema(
|
||||
manifest_block = "\n".join(manifest_lines)
|
||||
|
||||
content_note = ""
|
||||
if caps.get("detected") and caps.get("has_message_content") is False:
|
||||
affected_actions = {"fetch_messages", "list_pins"} & set(actions)
|
||||
if affected_actions and caps.get("detected") and caps.get("has_message_content") is False:
|
||||
names = " and ".join(sorted(affected_actions))
|
||||
content_note = (
|
||||
"\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
|
||||
"fetch_messages and list_pins will return message metadata (author, "
|
||||
f"\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
|
||||
f"{names} will return message metadata (author, "
|
||||
"timestamps, attachments, reactions, pin state) but `content` will be "
|
||||
"empty for messages not sent as a direct mention to the bot or in DMs. "
|
||||
"Enable the intent in the Discord Developer Portal to see all content."
|
||||
)
|
||||
|
||||
description = (
|
||||
"Query and manage a Discord server via the REST API.\n\n"
|
||||
"Available actions:\n"
|
||||
f"{manifest_block}\n\n"
|
||||
"Call list_guilds first to discover guild_ids, then list_channels for "
|
||||
"channel_ids. Runtime errors will tell you if the bot lacks a specific "
|
||||
"per-guild permission (e.g. MANAGE_ROLES for add_role)."
|
||||
f"{content_note}"
|
||||
)
|
||||
if tool_name == "discord_admin":
|
||||
description = (
|
||||
"Manage a Discord server via the REST API.\n\n"
|
||||
"Available actions:\n"
|
||||
f"{manifest_block}\n\n"
|
||||
"Call list_guilds first to discover guild_ids, then list_channels for "
|
||||
"channel_ids. Runtime errors will tell you if the bot lacks a specific "
|
||||
"per-guild permission (e.g. MANAGE_ROLES for add_role)."
|
||||
f"{content_note}"
|
||||
)
|
||||
else:
|
||||
description = (
|
||||
"Read and participate in a Discord server.\n\n"
|
||||
"Available actions:\n"
|
||||
f"{manifest_block}\n\n"
|
||||
"Use the channel_id from the current conversation context. "
|
||||
"Use search_members to look up user IDs by name prefix."
|
||||
f"{content_note}"
|
||||
)
|
||||
|
||||
properties: Dict[str, Any] = {
|
||||
"action": {
|
||||
@@ -676,7 +698,7 @@ def _build_schema(
|
||||
}
|
||||
|
||||
return {
|
||||
"name": "discord_server",
|
||||
"name": tool_name,
|
||||
"description": description,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
@@ -686,28 +708,33 @@ def _build_schema(
|
||||
}
|
||||
|
||||
|
||||
def get_dynamic_schema() -> Optional[Dict[str, Any]]:
|
||||
"""Return a schema filtered by current intents + config allowlist.
|
||||
|
||||
Called by ``model_tools.get_tool_definitions`` as a post-processing
|
||||
step so the schema the model sees always reflects reality. Returns
|
||||
``None`` when no actions are available (tool should be removed from
|
||||
the schema list entirely).
|
||||
"""
|
||||
def _get_dynamic_schema(
|
||||
action_subset: Dict[str, Any],
|
||||
tool_name: str,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Build a dynamic schema for *action_subset* filtered by intents + config."""
|
||||
token = _get_bot_token()
|
||||
if not token:
|
||||
return None
|
||||
|
||||
caps = _detect_capabilities(token)
|
||||
allowlist = _load_allowed_actions_config()
|
||||
actions = _available_actions(caps, allowlist)
|
||||
actions = [a for a in _available_actions(caps, allowlist) if a in action_subset]
|
||||
if not actions:
|
||||
logger.warning(
|
||||
"discord_server: config allowlist/intents left zero available actions; "
|
||||
"hiding tool from this session."
|
||||
)
|
||||
return None
|
||||
return _build_schema(actions, caps)
|
||||
return _build_schema(actions, caps, tool_name=tool_name)
|
||||
|
||||
|
||||
def get_dynamic_schema_core() -> Optional[Dict[str, Any]]:
|
||||
return _get_dynamic_schema(_CORE_ACTIONS, "discord")
|
||||
|
||||
|
||||
def get_dynamic_schema_admin() -> Optional[Dict[str, Any]]:
|
||||
return _get_dynamic_schema(_ADMIN_ACTIONS, "discord_admin")
|
||||
|
||||
|
||||
def get_dynamic_schema() -> Optional[Dict[str, Any]]:
|
||||
"""Backward-compat wrapper — returns core schema."""
|
||||
return get_dynamic_schema_core()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -774,11 +801,13 @@ def check_discord_tool_requirements() -> bool:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main handler
|
||||
# Handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def discord_server(
|
||||
def _run_discord_action(
|
||||
action: str,
|
||||
valid_actions: Dict[str, Any],
|
||||
tool_label: str,
|
||||
guild_id: str = "",
|
||||
channel_id: str = "",
|
||||
user_id: str = "",
|
||||
@@ -790,18 +819,17 @@ def discord_server(
|
||||
before: str = "",
|
||||
after: str = "",
|
||||
auto_archive_duration: int = 1440,
|
||||
task_id: str = None,
|
||||
) -> str:
|
||||
"""Execute a Discord server action."""
|
||||
"""Shared handler logic for both discord tools."""
|
||||
token = _get_bot_token()
|
||||
if not token:
|
||||
return json.dumps({"error": "DISCORD_BOT_TOKEN not configured."})
|
||||
|
||||
action_fn = _ACTIONS.get(action)
|
||||
action_fn = valid_actions.get(action)
|
||||
if not action_fn:
|
||||
return json.dumps({
|
||||
"error": f"Unknown action: {action}",
|
||||
"available_actions": list(_ACTIONS.keys()),
|
||||
"available_actions": list(valid_actions.keys()),
|
||||
})
|
||||
|
||||
# Config-level allowlist gate (defense in depth — schema already filtered,
|
||||
@@ -848,44 +876,64 @@ def discord_server(
|
||||
auto_archive_duration=auto_archive_duration,
|
||||
)
|
||||
except DiscordAPIError as e:
|
||||
logger.warning("Discord API error in action '%s': %s", action, e)
|
||||
logger.warning("Discord API error in %s action '%s': %s", tool_label, action, e)
|
||||
if e.status == 403:
|
||||
return json.dumps({"error": _enrich_403(action, e.body)})
|
||||
return json.dumps({"error": str(e)})
|
||||
except Exception as e:
|
||||
logger.exception("Unexpected error in discord_server action '%s'", action)
|
||||
logger.exception("Unexpected error in %s action '%s'", tool_label, action)
|
||||
return json.dumps({"error": f"Unexpected error: {e}"})
|
||||
|
||||
|
||||
def discord_core(action: str, **kwargs) -> str:
|
||||
"""Execute a core Discord action (fetch_messages, search_members, create_thread)."""
|
||||
return _run_discord_action(action, _CORE_ACTIONS, "discord", **kwargs)
|
||||
|
||||
|
||||
def discord_admin_handler(action: str, **kwargs) -> str:
|
||||
"""Execute a Discord admin action (server management)."""
|
||||
return _run_discord_action(action, _ADMIN_ACTIONS, "discord_admin", **kwargs)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Register with the full unfiltered schema. ``model_tools.get_tool_definitions``
|
||||
# rebuilds this per-session via ``get_dynamic_schema`` so the model only ever
|
||||
# sees intent-available, config-allowed actions. The static registration is a
|
||||
# safe baseline for tools that inspect the registry directly.
|
||||
_STATIC_SCHEMA = _build_schema(list(_ACTIONS.keys()), caps={"detected": False})
|
||||
_HANDLER_DEFAULTS = {
|
||||
"action": "", "guild_id": "", "channel_id": "", "user_id": "",
|
||||
"role_id": "", "message_id": "", "query": "", "name": "",
|
||||
"limit": 50, "before": "", "after": "", "auto_archive_duration": 1440,
|
||||
}
|
||||
|
||||
|
||||
def _make_handler(handler_fn):
|
||||
"""Create a registry-compatible handler lambda for a discord handler."""
|
||||
return lambda args, **kw: handler_fn(
|
||||
**{k: args.get(k, v) for k, v in _HANDLER_DEFAULTS.items()},
|
||||
)
|
||||
|
||||
|
||||
_STATIC_CORE_SCHEMA = _build_schema(
|
||||
list(_CORE_ACTIONS.keys()), caps={"detected": False}, tool_name="discord",
|
||||
)
|
||||
_STATIC_ADMIN_SCHEMA = _build_schema(
|
||||
list(_ADMIN_ACTIONS.keys()), caps={"detected": False}, tool_name="discord_admin",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="discord_server",
|
||||
name="discord",
|
||||
toolset="discord",
|
||||
schema=_STATIC_SCHEMA,
|
||||
handler=lambda args, **kw: discord_server(
|
||||
action=args.get("action", ""),
|
||||
guild_id=args.get("guild_id", ""),
|
||||
channel_id=args.get("channel_id", ""),
|
||||
user_id=args.get("user_id", ""),
|
||||
role_id=args.get("role_id", ""),
|
||||
message_id=args.get("message_id", ""),
|
||||
query=args.get("query", ""),
|
||||
name=args.get("name", ""),
|
||||
limit=args.get("limit", 50),
|
||||
before=args.get("before", ""),
|
||||
after=args.get("after", ""),
|
||||
auto_archive_duration=args.get("auto_archive_duration", 1440),
|
||||
task_id=kw.get("task_id"),
|
||||
),
|
||||
schema=_STATIC_CORE_SCHEMA,
|
||||
handler=_make_handler(discord_core),
|
||||
check_fn=check_discord_tool_requirements,
|
||||
requires_env=["DISCORD_BOT_TOKEN"],
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="discord_admin",
|
||||
toolset="discord_admin",
|
||||
schema=_STATIC_ADMIN_SCHEMA,
|
||||
handler=_make_handler(discord_admin_handler),
|
||||
check_fn=check_discord_tool_requirements,
|
||||
requires_env=["DISCORD_BOT_TOKEN"],
|
||||
)
|
||||
|
||||
@@ -292,10 +292,15 @@ def normalize_read_pagination(offset: Any = DEFAULT_READ_OFFSET,
|
||||
Tool schemas declare minimum/maximum values, but not every caller or
|
||||
provider enforces schemas before dispatch. Clamp here so invalid values
|
||||
cannot leak into sed ranges like ``0,-1p``.
|
||||
|
||||
The upper bound on ``limit`` comes from ``tool_output.max_lines`` in
|
||||
config.yaml (defaults to the module-level ``MAX_LINES`` constant).
|
||||
"""
|
||||
from tools.tool_output_limits import get_max_lines
|
||||
max_lines = get_max_lines()
|
||||
normalized_offset = max(1, _coerce_int(offset, DEFAULT_READ_OFFSET))
|
||||
normalized_limit = _coerce_int(limit, DEFAULT_READ_LIMIT)
|
||||
normalized_limit = max(1, min(normalized_limit, MAX_LINES))
|
||||
normalized_limit = max(1, min(normalized_limit, max_lines))
|
||||
return normalized_offset, normalized_limit
|
||||
|
||||
|
||||
@@ -414,12 +419,14 @@ class ShellFileOperations(FileOperations):
|
||||
|
||||
def _add_line_numbers(self, content: str, start_line: int = 1) -> str:
|
||||
"""Add line numbers to content in LINE_NUM|CONTENT format."""
|
||||
from tools.tool_output_limits import get_max_line_length
|
||||
max_line_length = get_max_line_length()
|
||||
lines = content.split('\n')
|
||||
numbered = []
|
||||
for i, line in enumerate(lines, start=start_line):
|
||||
# Truncate long lines
|
||||
if len(line) > MAX_LINE_LENGTH:
|
||||
line = line[:MAX_LINE_LENGTH] + "... [truncated]"
|
||||
if len(line) > max_line_length:
|
||||
line = line[:max_line_length] + "... [truncated]"
|
||||
numbered.append(f"{i:6d}|{line}")
|
||||
return '\n'.join(numbered)
|
||||
|
||||
|
||||
+49
-15
@@ -79,13 +79,45 @@ _BLOCKED_DEVICE_PATHS = frozenset({
|
||||
})
|
||||
|
||||
|
||||
def _resolve_path(filepath: str) -> Path:
|
||||
def _resolve_path(filepath: str, task_id: str = "default") -> Path:
|
||||
"""Resolve a path relative to TERMINAL_CWD (the worktree base directory)
|
||||
instead of the main repository root.
|
||||
"""
|
||||
return _resolve_path_for_task(filepath, task_id)
|
||||
|
||||
|
||||
def _get_live_tracking_cwd(task_id: str = "default") -> str | None:
|
||||
"""Return the task's live terminal cwd for bookkeeping when available."""
|
||||
with _file_ops_lock:
|
||||
cached = _file_ops_cache.get(task_id)
|
||||
if cached is not None:
|
||||
live_cwd = getattr(getattr(cached, "env", None), "cwd", None) or getattr(
|
||||
cached, "cwd", None
|
||||
)
|
||||
if live_cwd:
|
||||
return live_cwd
|
||||
|
||||
try:
|
||||
from tools.terminal_tool import _active_environments, _env_lock
|
||||
|
||||
with _env_lock:
|
||||
env = _active_environments.get(task_id)
|
||||
live_cwd = getattr(env, "cwd", None) if env is not None else None
|
||||
if live_cwd:
|
||||
return live_cwd
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path:
|
||||
"""Resolve *filepath* against the task's live terminal cwd when possible."""
|
||||
p = Path(filepath).expanduser()
|
||||
if not p.is_absolute():
|
||||
base = os.environ.get("TERMINAL_CWD", os.getcwd())
|
||||
base = _get_live_tracking_cwd(task_id) or os.environ.get(
|
||||
"TERMINAL_CWD", os.getcwd()
|
||||
)
|
||||
p = Path(base) / p
|
||||
return p.resolve()
|
||||
|
||||
@@ -118,10 +150,10 @@ _SENSITIVE_PATH_PREFIXES = (
|
||||
_SENSITIVE_EXACT_PATHS = {"/var/run/docker.sock", "/run/docker.sock"}
|
||||
|
||||
|
||||
def _check_sensitive_path(filepath: str) -> str | None:
|
||||
def _check_sensitive_path(filepath: str, task_id: str = "default") -> str | None:
|
||||
"""Return an error message if the path targets a sensitive system location."""
|
||||
try:
|
||||
resolved = str(_resolve_path(filepath))
|
||||
resolved = str(_resolve_path_for_task(filepath, task_id))
|
||||
except (OSError, ValueError):
|
||||
resolved = filepath
|
||||
normalized = os.path.normpath(os.path.expanduser(filepath))
|
||||
@@ -368,7 +400,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
||||
),
|
||||
})
|
||||
|
||||
_resolved = _resolve_path(path)
|
||||
_resolved = _resolve_path_for_task(path, task_id)
|
||||
|
||||
# ── Binary file guard ─────────────────────────────────────────
|
||||
# Block binary files by extension (no I/O).
|
||||
@@ -574,7 +606,7 @@ def _update_read_timestamp(filepath: str, task_id: str) -> None:
|
||||
refreshes the stored timestamp to match the file's new state.
|
||||
"""
|
||||
try:
|
||||
resolved = str(_resolve_path(filepath))
|
||||
resolved = str(_resolve_path_for_task(filepath, task_id))
|
||||
current_mtime = os.path.getmtime(resolved)
|
||||
except (OSError, ValueError):
|
||||
return
|
||||
@@ -593,7 +625,7 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None:
|
||||
or was never read. Does not block — the write still proceeds.
|
||||
"""
|
||||
try:
|
||||
resolved = str(_resolve_path(filepath))
|
||||
resolved = str(_resolve_path_for_task(filepath, task_id))
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
with _read_tracker_lock:
|
||||
@@ -618,7 +650,7 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None:
|
||||
|
||||
def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
|
||||
"""Write content to a file."""
|
||||
sensitive_err = _check_sensitive_path(path)
|
||||
sensitive_err = _check_sensitive_path(path, task_id)
|
||||
if sensitive_err:
|
||||
return tool_error(sensitive_err)
|
||||
try:
|
||||
@@ -626,7 +658,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
|
||||
# fall back to the legacy path — write proceeds, per-task staleness
|
||||
# check below still runs.
|
||||
try:
|
||||
_resolved = str(_resolve_path(path))
|
||||
_resolved = str(_resolve_path_for_task(path, task_id))
|
||||
except Exception:
|
||||
_resolved = None
|
||||
|
||||
@@ -681,7 +713,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
||||
for _m in _re.finditer(r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$', patch, _re.MULTILINE):
|
||||
_paths_to_check.append(_m.group(1).strip())
|
||||
for _p in _paths_to_check:
|
||||
sensitive_err = _check_sensitive_path(_p)
|
||||
sensitive_err = _check_sensitive_path(_p, task_id)
|
||||
if sensitive_err:
|
||||
return tool_error(sensitive_err)
|
||||
try:
|
||||
@@ -692,7 +724,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
||||
_seen: set[str] = set()
|
||||
for _p in _paths_to_check:
|
||||
try:
|
||||
_r = str(_resolve_path(_p))
|
||||
_r = str(_resolve_path_for_task(_p, task_id))
|
||||
except Exception:
|
||||
_r = None
|
||||
if _r and _r not in _seen:
|
||||
@@ -714,7 +746,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
||||
_path_to_resolved: dict[str, str] = {}
|
||||
for _p in _paths_to_check:
|
||||
try:
|
||||
_r = str(_resolve_path(_p))
|
||||
_r = str(_resolve_path_for_task(_p, task_id))
|
||||
except Exception:
|
||||
_r = None
|
||||
_path_to_resolved[_p] = _r
|
||||
@@ -749,15 +781,17 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
||||
_r = _path_to_resolved.get(_p)
|
||||
if _r:
|
||||
file_state.note_write(task_id, _r)
|
||||
result_json = json.dumps(result_dict, ensure_ascii=False)
|
||||
# Hint when old_string not found — saves iterations where the agent
|
||||
# retries with stale content instead of re-reading the file.
|
||||
# Suppressed when patch_replace already attached a rich "Did you mean?"
|
||||
# snippet (which is strictly more useful than the generic hint).
|
||||
if result_dict.get("error") and "Could not find" in str(result_dict["error"]):
|
||||
if "Did you mean one of these sections?" not in str(result_dict["error"]):
|
||||
result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
|
||||
return result_json
|
||||
result_dict["_hint"] = (
|
||||
"old_string not found. Use read_file to verify the current "
|
||||
"content, or search_files to locate the text."
|
||||
)
|
||||
return json.dumps(result_dict, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
|
||||
|
||||
+210
-19
@@ -78,12 +78,86 @@ import math
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stdio subprocess stderr redirection
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# The MCP SDK's ``stdio_client(server, errlog=sys.stderr)`` defaults the
|
||||
# subprocess stderr stream to the parent process's real stderr, i.e. the
|
||||
# user's TTY. That means any MCP server we spawn at startup (FastMCP
|
||||
# banners, slack-mcp-server JSON startup logs, etc.) writes directly onto
|
||||
# the terminal while prompt_toolkit / Rich is rendering the TUI — which
|
||||
# corrupts the display and can hang the session.
|
||||
#
|
||||
# Instead we redirect every stdio MCP subprocess's stderr into a shared
|
||||
# per-profile log file (~/.hermes/logs/mcp-stderr.log), tagged with the
|
||||
# server name so individual servers remain debuggable.
|
||||
#
|
||||
# Fallback is os.devnull if opening the log file fails for any reason.
|
||||
|
||||
_mcp_stderr_log_fh: Optional[Any] = None
|
||||
_mcp_stderr_log_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_mcp_stderr_log() -> Any:
|
||||
"""Return a shared append-mode file handle for MCP subprocess stderr.
|
||||
|
||||
Opened once per process and reused for every stdio server. Must have a
|
||||
real OS-level file descriptor (``fileno()``) because asyncio's subprocess
|
||||
machinery wires the child's stderr directly to that fd. Falls back to
|
||||
``/dev/null`` if opening the log file fails.
|
||||
"""
|
||||
global _mcp_stderr_log_fh
|
||||
with _mcp_stderr_log_lock:
|
||||
if _mcp_stderr_log_fh is not None:
|
||||
return _mcp_stderr_log_fh
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
log_dir = get_hermes_home() / "logs"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_path = log_dir / "mcp-stderr.log"
|
||||
# Line-buffered so server output lands on disk promptly; errors=
|
||||
# "replace" tolerates garbled binary output from misbehaving
|
||||
# servers.
|
||||
fh = open(log_path, "a", encoding="utf-8", errors="replace", buffering=1)
|
||||
# Sanity-check: confirm a real fd is available before we commit.
|
||||
fh.fileno()
|
||||
_mcp_stderr_log_fh = fh
|
||||
except Exception as exc: # pragma: no cover — best-effort fallback
|
||||
logger.debug("Failed to open MCP stderr log, using devnull: %s", exc)
|
||||
try:
|
||||
_mcp_stderr_log_fh = open(os.devnull, "w", encoding="utf-8")
|
||||
except Exception:
|
||||
# Last resort: the real stderr. Not ideal for TUI users but
|
||||
# it matches pre-fix behavior.
|
||||
_mcp_stderr_log_fh = sys.stderr
|
||||
return _mcp_stderr_log_fh
|
||||
|
||||
|
||||
def _write_stderr_log_header(server_name: str) -> None:
|
||||
"""Write a human-readable session marker before launching a server.
|
||||
|
||||
Gives operators a way to find each server's output in the shared
|
||||
``mcp-stderr.log`` file without needing per-line prefixes (which would
|
||||
require a pipe + reader thread and complicate shutdown).
|
||||
"""
|
||||
fh = _get_mcp_stderr_log()
|
||||
try:
|
||||
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
fh.write(f"\n===== [{ts}] starting MCP server '{server_name}' =====\n")
|
||||
fh.flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Graceful import -- MCP SDK is an optional dependency
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -93,6 +167,10 @@ _MCP_HTTP_AVAILABLE = False
|
||||
_MCP_SAMPLING_TYPES = False
|
||||
_MCP_NOTIFICATION_TYPES = False
|
||||
_MCP_MESSAGE_HANDLER_SUPPORTED = False
|
||||
# Conservative fallback for SDK builds that don't export LATEST_PROTOCOL_VERSION.
|
||||
# Streamable HTTP was introduced by 2025-03-26, so this remains valid for the
|
||||
# HTTP transport path even on older-but-supported SDK versions.
|
||||
LATEST_PROTOCOL_VERSION = "2025-03-26"
|
||||
try:
|
||||
from mcp import ClientSession, StdioServerParameters
|
||||
from mcp.client.stdio import stdio_client
|
||||
@@ -109,6 +187,10 @@ try:
|
||||
_MCP_NEW_HTTP = True
|
||||
except ImportError:
|
||||
_MCP_NEW_HTTP = False
|
||||
try:
|
||||
from mcp.types import LATEST_PROTOCOL_VERSION
|
||||
except ImportError:
|
||||
logger.debug("mcp.types.LATEST_PROTOCOL_VERSION not available -- using fallback protocol version")
|
||||
# Sampling types -- separated so older SDK versions don't break MCP support
|
||||
try:
|
||||
from mcp.types import (
|
||||
@@ -962,12 +1044,19 @@ class MCPServerTask:
|
||||
|
||||
# Snapshot child PIDs before spawning so we can track the new one.
|
||||
pids_before = _snapshot_child_pids()
|
||||
async with stdio_client(server_params) as (read_stream, write_stream):
|
||||
# Redirect subprocess stderr into a shared log file so MCP servers
|
||||
# (FastMCP banners, slack-mcp startup JSON, etc.) don't dump onto
|
||||
# the user's TTY and corrupt the TUI. Preserves debuggability via
|
||||
# ~/.hermes/logs/mcp-stderr.log.
|
||||
_write_stderr_log_header(self.name)
|
||||
_errlog = _get_mcp_stderr_log()
|
||||
async with stdio_client(server_params, errlog=_errlog) as (read_stream, write_stream):
|
||||
# Capture the newly spawned subprocess PID for force-kill cleanup.
|
||||
new_pids = _snapshot_child_pids() - pids_before
|
||||
if new_pids:
|
||||
with _lock:
|
||||
_stdio_pids.update(new_pids)
|
||||
for _pid in new_pids:
|
||||
_stdio_pids[_pid] = self.name
|
||||
async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session:
|
||||
await session.initialize()
|
||||
self.session = session
|
||||
@@ -980,7 +1069,8 @@ class MCPServerTask:
|
||||
# Context exited cleanly — subprocess was terminated by the SDK.
|
||||
if new_pids:
|
||||
with _lock:
|
||||
_stdio_pids.difference_update(new_pids)
|
||||
for _pid in new_pids:
|
||||
_stdio_pids.pop(_pid, None)
|
||||
|
||||
async def _run_http(self, config: dict):
|
||||
"""Run the server using HTTP/StreamableHTTP transport."""
|
||||
@@ -993,6 +1083,12 @@ class MCPServerTask:
|
||||
|
||||
url = config["url"]
|
||||
headers = dict(config.get("headers") or {})
|
||||
# Some MCP servers require MCP-Protocol-Version on the initial
|
||||
# initialize request and reject session-less POSTs otherwise.
|
||||
# Seed it as a client-level default, but treat user overrides as
|
||||
# case-insensitive so conventional casing is preserved.
|
||||
if not any(key.lower() == "mcp-protocol-version" for key in headers):
|
||||
headers["mcp-protocol-version"] = LATEST_PROTOCOL_VERSION
|
||||
connect_timeout = config.get("connect_timeout", _DEFAULT_CONNECT_TIMEOUT)
|
||||
ssl_verify = config.get("ssl_verify", True)
|
||||
|
||||
@@ -1484,7 +1580,7 @@ _lock = threading.Lock()
|
||||
# them on shutdown if the graceful cleanup (SDK context-manager teardown)
|
||||
# fails or times out. PIDs are added after connection and removed on
|
||||
# normal server shutdown.
|
||||
_stdio_pids: set = set()
|
||||
_stdio_pids: Dict[int, str] = {} # pid -> server_name
|
||||
|
||||
|
||||
def _snapshot_child_pids() -> set:
|
||||
@@ -2017,14 +2113,92 @@ def _make_check_fn(server_name: str):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _normalize_mcp_input_schema(schema: dict | None) -> dict:
|
||||
"""Normalize MCP input schemas for LLM tool-calling compatibility."""
|
||||
"""Normalize MCP input schemas for LLM tool-calling compatibility.
|
||||
|
||||
MCP servers can emit plain JSON Schema with ``definitions`` /
|
||||
``#/definitions/...`` references. Kimi / Moonshot rejects that form and
|
||||
requires local refs to point into ``#/$defs/...`` instead. Normalize the
|
||||
common draft-07 shape here so MCP tool schemas remain portable across
|
||||
OpenAI-compatible providers.
|
||||
|
||||
Additional MCP-server robustness repairs applied recursively:
|
||||
|
||||
* Missing or ``null`` ``type`` on an object-shaped node is coerced to
|
||||
``"object"`` (some servers omit it). See PR #4897.
|
||||
* When an ``object`` node lacks ``properties``, an empty ``properties``
|
||||
dict is added so ``required`` entries don't dangle.
|
||||
* ``required`` arrays are pruned to only names that exist in
|
||||
``properties``; otherwise Google AI Studio / Gemini 400s with
|
||||
``property is not defined``. See PR #4651.
|
||||
|
||||
All repairs are provider-agnostic and ideally produce a schema valid on
|
||||
OpenAI, Anthropic, Gemini, and Moonshot in one pass.
|
||||
"""
|
||||
if not schema:
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
if schema.get("type") == "object" and "properties" not in schema:
|
||||
return {**schema, "properties": {}}
|
||||
def _rewrite_local_refs(node):
|
||||
if isinstance(node, dict):
|
||||
normalized = {}
|
||||
for key, value in node.items():
|
||||
out_key = "$defs" if key == "definitions" else key
|
||||
normalized[out_key] = _rewrite_local_refs(value)
|
||||
ref = normalized.get("$ref")
|
||||
if isinstance(ref, str) and ref.startswith("#/definitions/"):
|
||||
normalized["$ref"] = "#/$defs/" + ref[len("#/definitions/"):]
|
||||
return normalized
|
||||
if isinstance(node, list):
|
||||
return [_rewrite_local_refs(item) for item in node]
|
||||
return node
|
||||
|
||||
return schema
|
||||
def _repair_object_shape(node):
|
||||
"""Recursively repair object-shaped nodes: fill type, prune required."""
|
||||
if isinstance(node, list):
|
||||
return [_repair_object_shape(item) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
repaired = {k: _repair_object_shape(v) for k, v in node.items()}
|
||||
|
||||
# Coerce missing / null type when the shape is clearly an object
|
||||
# (has properties or required but no type).
|
||||
if not repaired.get("type") and (
|
||||
"properties" in repaired or "required" in repaired
|
||||
):
|
||||
repaired["type"] = "object"
|
||||
|
||||
if repaired.get("type") == "object":
|
||||
# Ensure properties exists so required can reference it safely
|
||||
if "properties" not in repaired or not isinstance(
|
||||
repaired.get("properties"), dict
|
||||
):
|
||||
repaired["properties"] = {} if "properties" not in repaired else repaired["properties"]
|
||||
if not isinstance(repaired.get("properties"), dict):
|
||||
repaired["properties"] = {}
|
||||
|
||||
# Prune required to only include names that exist in properties
|
||||
required = repaired.get("required")
|
||||
if isinstance(required, list):
|
||||
props = repaired.get("properties") or {}
|
||||
valid = [r for r in required if isinstance(r, str) and r in props]
|
||||
if len(valid) != len(required):
|
||||
if valid:
|
||||
repaired["required"] = valid
|
||||
else:
|
||||
repaired.pop("required", None)
|
||||
|
||||
return repaired
|
||||
|
||||
normalized = _rewrite_local_refs(schema)
|
||||
normalized = _repair_object_shape(normalized)
|
||||
|
||||
# Ensure top-level is a well-formed object schema
|
||||
if not isinstance(normalized, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
if normalized.get("type") == "object" and "properties" not in normalized:
|
||||
normalized = {**normalized, "properties": {}}
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def sanitize_mcp_name_component(value: str) -> str:
|
||||
@@ -2055,7 +2229,7 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict:
|
||||
return {
|
||||
"name": prefixed_name,
|
||||
"description": mcp_tool.description or f"MCP tool {mcp_tool.name} from {server_name}",
|
||||
"parameters": _normalize_mcp_input_schema(mcp_tool.inputSchema),
|
||||
"parameters": _normalize_mcp_input_schema(getattr(mcp_tool, "inputSchema", None)),
|
||||
}
|
||||
|
||||
|
||||
@@ -2618,27 +2792,44 @@ def shutdown_mcp_servers():
|
||||
|
||||
|
||||
def _kill_orphaned_mcp_children() -> None:
|
||||
"""Best-effort kill of MCP stdio subprocesses that survived loop shutdown.
|
||||
"""Graceful shutdown of MCP stdio subprocesses that survived loop cleanup.
|
||||
|
||||
After the MCP event loop is stopped, stdio server subprocesses *should*
|
||||
have been terminated by the SDK's context-manager cleanup. If the loop
|
||||
was stuck or the shutdown timed out, orphaned children may remain.
|
||||
Sends SIGTERM first, waits 2 seconds, then escalates to SIGKILL.
|
||||
This prevents shared-resource collisions when multiple hermes processes
|
||||
run on the same host (each has its own _stdio_pids dict).
|
||||
|
||||
Only kills PIDs tracked in ``_stdio_pids`` — never arbitrary children.
|
||||
"""
|
||||
import signal as _signal
|
||||
kill_signal = getattr(_signal, "SIGKILL", _signal.SIGTERM)
|
||||
import time as _time
|
||||
|
||||
with _lock:
|
||||
pids = list(_stdio_pids)
|
||||
pids = dict(_stdio_pids)
|
||||
_stdio_pids.clear()
|
||||
|
||||
for pid in pids:
|
||||
# Phase 1: SIGTERM (graceful)
|
||||
for pid, server_name in pids.items():
|
||||
try:
|
||||
os.kill(pid, kill_signal)
|
||||
logger.debug("Force-killed orphaned MCP stdio process %d", pid)
|
||||
os.kill(pid, _signal.SIGTERM)
|
||||
logger.debug("Sent SIGTERM to orphaned MCP process %d (%s)", pid, server_name)
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
pass # Already exited or inaccessible
|
||||
pass
|
||||
|
||||
# Phase 2: Wait for graceful exit
|
||||
_time.sleep(2)
|
||||
|
||||
# Phase 3: SIGKILL any survivors
|
||||
_sigkill = getattr(_signal, "SIGKILL", _signal.SIGTERM)
|
||||
for pid, server_name in pids.items():
|
||||
try:
|
||||
os.kill(pid, 0) # Check if still alive
|
||||
os.kill(pid, _sigkill)
|
||||
logger.warning(
|
||||
"Force-killed MCP process %d (%s) after SIGTERM timeout",
|
||||
pid, server_name,
|
||||
)
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
pass # Good — exited after SIGTERM
|
||||
|
||||
|
||||
def _stop_mcp_loop():
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user