Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 603a68b918 |
@@ -14,6 +14,3 @@ node_modules
|
||||
.env
|
||||
|
||||
*.md
|
||||
|
||||
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
|
||||
data/
|
||||
|
||||
@@ -53,9 +53,6 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Build skills index (if not already present)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -36,9 +36,6 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
.DS_Store
|
||||
/venv/
|
||||
/_pycache/
|
||||
*.pyc*
|
||||
|
||||
@@ -5,61 +5,78 @@ Instructions for AI coding assistants and developers working on the hermes-agent
|
||||
## Development Environment
|
||||
|
||||
```bash
|
||||
# Prefer .venv; fall back to venv if that's what your checkout has.
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
source venv/bin/activate # ALWAYS activate before running Python
|
||||
```
|
||||
|
||||
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
|
||||
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
|
||||
main checkout).
|
||||
|
||||
## Project Structure
|
||||
|
||||
File counts shift constantly — don't treat the tree below as exhaustive.
|
||||
The canonical source is the filesystem. The notes call out the load-bearing
|
||||
entry points you'll actually edit.
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
|
||||
├── run_agent.py # AIAgent class — core conversation loop
|
||||
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
|
||||
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator
|
||||
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
|
||||
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
|
||||
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
|
||||
├── batch_runner.py # Parallel batch processing
|
||||
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
|
||||
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
|
||||
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
|
||||
├── agent/ # Agent internals
|
||||
│ ├── prompt_builder.py # System prompt assembly
|
||||
│ ├── context_compressor.py # Auto context compression
|
||||
│ ├── prompt_caching.py # Anthropic prompt caching
|
||||
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ ├── models_dev.py # models.dev registry integration (provider-aware context)
|
||||
│ ├── display.py # KawaiiSpinner, tool preview formatting
|
||||
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
├── hermes_cli/ # CLI subcommands and setup
|
||||
│ ├── main.py # Entry point — all `hermes` subcommands
|
||||
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
|
||||
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
|
||||
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── skin_engine.py # Skin/theme engine — CLI visual customization
|
||||
│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform
|
||||
│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform
|
||||
│ ├── skills_hub.py # `/skills` slash command (search, browse, install)
|
||||
│ ├── models.py # Model catalog, provider model lists
|
||||
│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway)
|
||||
│ └── auth.py # Provider credential resolution
|
||||
├── tools/ # Tool implementations (one file per tool)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection
|
||||
│ ├── terminal_tool.py # Terminal orchestration
|
||||
│ ├── process_registry.py # Background process management
|
||||
│ ├── file_tools.py # File read/write/search/patch
|
||||
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
|
||||
│ ├── browser_tool.py # Browserbase browser automation
|
||||
│ ├── code_execution_tool.py # execute_code sandbox
|
||||
│ ├── delegate_tool.py # Subagent delegation
|
||||
│ ├── mcp_tool.py # MCP client (~1050 lines)
|
||||
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── gateway/ # Messaging platform gateway
|
||||
│ ├── run.py # Main loop, slash commands, message dispatch
|
||||
│ ├── session.py # SessionStore — conversation persistence
|
||||
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
|
||||
│ ├── src/entry.tsx # TTY gate + render()
|
||||
│ ├── src/app.tsx # Main state machine and UI
|
||||
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
|
||||
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
|
||||
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
|
||||
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
|
||||
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
|
||||
├── tui_gateway/ # Python JSON-RPC backend for the TUI
|
||||
│ ├── entry.py # stdio entrypoint
|
||||
│ ├── server.py # RPC handlers and session logic
|
||||
│ ├── render.py # Optional rich/ANSI bridge
|
||||
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler — jobs.py, scheduler.py
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
|
||||
├── tests/ # Pytest suite (~3000 tests)
|
||||
└── batch_runner.py # Parallel batch processing
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
|
||||
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
|
||||
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
@@ -77,30 +94,20 @@ run_agent.py, cli.py, batch_runner.py, environments/
|
||||
|
||||
## AIAgent Class (run_agent.py)
|
||||
|
||||
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
|
||||
session context, budget, credential pool, etc.). The signature below is the
|
||||
minimum subset you'll usually touch — read `run_agent.py` for the full list.
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(self,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
provider: str = None,
|
||||
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
|
||||
model: str = "", # empty → resolved from config/provider later
|
||||
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
max_iterations: int = 90,
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
quiet_mode: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
session_id: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
credential_pool=None,
|
||||
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
|
||||
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
|
||||
# ... plus provider, api_mode, callbacks, routing params
|
||||
): ...
|
||||
|
||||
def chat(self, message: str) -> str:
|
||||
@@ -113,13 +120,10 @@ class AIAgent:
|
||||
|
||||
### Agent Loop
|
||||
|
||||
The core loop is inside `run_conversation()` — entirely synchronous, with
|
||||
interrupt checks, budget tracking, and a one-turn grace call:
|
||||
The core loop is inside `run_conversation()` — entirely synchronous:
|
||||
|
||||
```python
|
||||
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
|
||||
or self._budget_grace_call:
|
||||
if self._interrupt_requested: break
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
@@ -130,8 +134,7 @@ while (api_call_count < self.max_iterations and self.iteration_budget.remaining
|
||||
return response.content
|
||||
```
|
||||
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
|
||||
Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
|
||||
---
|
||||
|
||||
@@ -240,19 +243,6 @@ npm run fmt # prettier
|
||||
npm test # vitest
|
||||
```
|
||||
|
||||
### TUI in the Dashboard (`hermes dashboard` → `/chat`)
|
||||
|
||||
The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
|
||||
|
||||
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
|
||||
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
|
||||
|
||||
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
|
||||
|
||||
---
|
||||
|
||||
## Adding New Tools
|
||||
@@ -290,7 +280,7 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
|
||||
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
@@ -298,13 +288,9 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
### config.yaml options:
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
|
||||
ONLY if you need to actively migrate/transform existing user config
|
||||
(renaming keys, changing structure). Adding a new key to an existing
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
2. Bump `_config_version` (currently 5) to trigger migration for existing users
|
||||
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
### .env variables:
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
"NEW_API_KEY": {
|
||||
@@ -316,29 +302,13 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
},
|
||||
```
|
||||
|
||||
Non-secret settings (timeouts, thresholds, feature flags, paths, display
|
||||
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
|
||||
env var mirror for backward compatibility, bridge it from `config.yaml` to
|
||||
the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
|
||||
|
||||
### Config loaders (three paths — know which one you're in):
|
||||
### Config loaders (two separate systems):
|
||||
|
||||
| Loader | Used by | Location |
|
||||
|--------|---------|----------|
|
||||
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
|
||||
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
|
||||
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
|
||||
|
||||
If you add a new key and the CLI sees it but the gateway doesn't (or vice
|
||||
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
|
||||
|
||||
### Working directory:
|
||||
- **CLI** — uses the process's current directory (`os.getcwd()`).
|
||||
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
|
||||
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
|
||||
removed** — the config loader prints a deprecation warning if it's set in
|
||||
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
|
||||
`terminal.cwd` in `config.yaml`.
|
||||
| `load_cli_config()` | CLI mode | `cli.py` |
|
||||
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
|
||||
| Direct YAML load | Gateway | `gateway/run.py` |
|
||||
|
||||
---
|
||||
|
||||
@@ -431,95 +401,7 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
|
||||
|
||||
---
|
||||
|
||||
## Plugins
|
||||
|
||||
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
|
||||
repo-shipped plugins can be discovered alongside user-installed ones in
|
||||
`~/.hermes/plugins/` and pip-installed entry points.
|
||||
|
||||
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
|
||||
|
||||
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
|
||||
and pip entry points. Each plugin exposes a `register(ctx)` function that
|
||||
can:
|
||||
|
||||
- Register Python-callback lifecycle hooks:
|
||||
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
|
||||
`on_session_start`, `on_session_end`
|
||||
- Register new tools via `ctx.register_tool(...)`
|
||||
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
|
||||
plugin's argparse tree is wired into `hermes` at startup so
|
||||
`hermes <pluginname> <subcmd>` works with no change to `main.py`
|
||||
|
||||
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
|
||||
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
|
||||
as a side effect of importing `model_tools.py`. Code paths that read plugin
|
||||
state without importing `model_tools.py` first must call `discover_plugins()`
|
||||
explicitly (it's idempotent).
|
||||
|
||||
### Memory-provider plugins (`plugins/memory/<name>/`)
|
||||
|
||||
Separate discovery system for pluggable memory backends. Current built-in
|
||||
providers include **honcho, mem0, supermemory, byterover, hindsight,
|
||||
holographic, openviking, retaindb**.
|
||||
|
||||
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
|
||||
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
|
||||
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
|
||||
`post_setup(hermes_home, config)` for setup-wizard integration.
|
||||
|
||||
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
|
||||
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
|
||||
it at argparse setup time and wires it into `hermes <plugin>`. The
|
||||
framework only exposes CLI commands for the **currently active** memory
|
||||
provider (read from `memory.provider` in config.yaml), so disabled
|
||||
providers don't clutter `hermes --help`.
|
||||
|
||||
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
|
||||
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
|
||||
If a plugin needs a capability the framework doesn't expose, expand the
|
||||
generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
|
||||
etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
|
||||
Context engines plug into `agent/context_engine.py`; image-gen providers
|
||||
into `agent/image_gen_provider.py`.
|
||||
|
||||
---
|
||||
|
||||
## Skills
|
||||
|
||||
Two parallel surfaces:
|
||||
|
||||
- **`skills/`** — built-in skills shipped and loadable by default.
|
||||
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
|
||||
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
|
||||
NOT active by default. Installed explicitly via
|
||||
`hermes skills install official/<category>/<skill>`. Adapter lives in
|
||||
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
|
||||
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
|
||||
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
|
||||
`research`, `security`, `web-development`.
|
||||
|
||||
When reviewing skill PRs, check which directory they target — heavy-dep or
|
||||
niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `platforms`
|
||||
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.config` (config.yaml settings the skill needs — stored
|
||||
under `skills.config.<key>`, prompted during setup, injected at load time).
|
||||
|
||||
---
|
||||
|
||||
## Important Policies
|
||||
|
||||
### Prompt Caching Must Not Break
|
||||
|
||||
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
|
||||
@@ -529,10 +411,9 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
|
||||
|
||||
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
|
||||
|
||||
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
|
||||
must be **cache-aware**: default to deferred invalidation (change takes
|
||||
effect next session), with an opt-in `--now` flag for immediate
|
||||
invalidation. See `/skills install --now` for the canonical pattern.
|
||||
### Working Directory Behavior
|
||||
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
|
||||
|
||||
### Background Process Notifications (Gateway)
|
||||
|
||||
@@ -554,7 +435,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
|
||||
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
|
||||
|
||||
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
|
||||
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
|
||||
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
|
||||
automatically scope to the active profile.
|
||||
|
||||
### Rules for profile-safe code
|
||||
@@ -611,12 +492,8 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
|
||||
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
|
||||
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
|
||||
|
||||
### DO NOT introduce new `simple_term_menu` usage
|
||||
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
|
||||
the preferred UI is curses (stdlib) because `simple_term_menu` has
|
||||
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
|
||||
interactive menus must use `hermes_cli/curses_ui.py` — see
|
||||
`hermes_cli/tools_config.py` for the canonical pattern.
|
||||
### DO NOT use `simple_term_menu` for interactive menus
|
||||
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
|
||||
|
||||
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
|
||||
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
|
||||
@@ -627,30 +504,6 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
|
||||
### DO NOT hardcode cross-tool references in schema descriptions
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### The gateway has TWO message guards — both must bypass approval/control commands
|
||||
When an agent is running, messages pass through two sequential guards:
|
||||
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
|
||||
`_pending_messages` when `session_key in self._active_sessions`, and
|
||||
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
|
||||
`/queue`, `/status`, `/approve`, `/deny` before they reach
|
||||
`running_agent.interrupt()`. Any new command that must reach the runner
|
||||
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
|
||||
guards and be dispatched inline, not via `_process_message_background()`
|
||||
(which races session lifecycle).
|
||||
|
||||
### Squash merges from stale branches silently revert recent fixes
|
||||
Before squash-merging a PR, ensure the branch is up to date with `main`
|
||||
(`git fetch origin main && git reset --hard origin/main` in the worktree,
|
||||
then re-apply the PR's commits). A stale branch's version of an unrelated
|
||||
file will silently overwrite recent fixes on main when squashed. Verify
|
||||
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
|
||||
red flag.
|
||||
|
||||
### Don't wire in dead code without E2E validation
|
||||
Unused code that was never shipped was dead for a reason. Before wiring an
|
||||
unused module into a live code path, E2E test the real resolution chain
|
||||
with actual imports (not mocks) against a temp `HERMES_HOME`.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
|
||||
@@ -706,7 +559,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
```
|
||||
|
||||
|
||||
+6
-6
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
|
||||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
|
||||
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite
|
||||
|
||||
| Requirement | Notes |
|
||||
|-------------|-------|
|
||||
| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
|
||||
| **Git** | With `--recurse-submodules` support |
|
||||
| **Python 3.11+** | uv will install it if missing |
|
||||
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
|
||||
| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
|
||||
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
|
||||
|
||||
### Clone and install
|
||||
|
||||
@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
|
||||
touch ~/.hermes/.env
|
||||
|
||||
# Add at minimum an LLM provider key:
|
||||
echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
|
||||
echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
|
||||
```
|
||||
|
||||
### Run
|
||||
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
|
||||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
|
||||
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
|
||||
|
||||
### Critical rules
|
||||
|
||||
@@ -597,7 +597,7 @@ refactor/description # Code restructuring
|
||||
|
||||
1. **Run tests**: `pytest tests/ -v`
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
||||
+4
-13
@@ -10,11 +10,9 @@ ENV PYTHONUNBUFFERED=1
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
@@ -43,21 +41,14 @@ COPY --chown=hermes:hermes . .
|
||||
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
|
||||
RUN cd web && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
# The venv needs to be traversable too.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes
|
||||
# Start as root so the entrypoint can usermod/groupmod + gosu.
|
||||
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
|
||||
|
||||
# ---------- Python virtualenv ----------
|
||||
RUN chown hermes:hermes /opt/hermes
|
||||
USER hermes
|
||||
RUN uv venv && \
|
||||
uv pip install --no-cache-dir -e ".[all]"
|
||||
|
||||
# ---------- Runtime ----------
|
||||
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
ENV HERMES_HOME=/opt/data
|
||||
ENV PATH="/opt/data/.local/bin:${PATH}"
|
||||
VOLUME [ "/opt/data" ]
|
||||
ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
|
||||
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
|
||||
|
||||
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
|
||||
| Set a personality | `/personality [name]` | `/personality [name]` |
|
||||
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
|
||||
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
|
||||
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
|
||||
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
|
||||
|
||||
@@ -157,10 +157,14 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
scripts/run_tests.sh
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
|
||||
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
|
||||
---
|
||||
|
||||
@@ -169,6 +173,7 @@ scripts/run_tests.sh
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [Skills Hub](https://agentskills.io)
|
||||
- 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
|
||||
|
||||
---
|
||||
|
||||
@@ -1,453 +0,0 @@
|
||||
# Hermes Agent v0.11.0 (v2026.4.23)
|
||||
|
||||
**Release Date:** April 23, 2026
|
||||
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
|
||||
|
||||
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
|
||||
|
||||
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
|
||||
|
||||
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
|
||||
|
||||
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
|
||||
|
||||
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
|
||||
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
|
||||
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
|
||||
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
|
||||
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
|
||||
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
|
||||
|
||||
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Transport Layer (NEW)
|
||||
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
|
||||
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
|
||||
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
|
||||
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
|
||||
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
|
||||
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
|
||||
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
|
||||
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
|
||||
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
|
||||
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
|
||||
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
|
||||
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
|
||||
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
|
||||
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
|
||||
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
|
||||
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
|
||||
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
|
||||
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
|
||||
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
|
||||
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
|
||||
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
|
||||
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
|
||||
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
|
||||
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
|
||||
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
|
||||
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
|
||||
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
|
||||
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
|
||||
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
|
||||
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
|
||||
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
|
||||
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
|
||||
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
|
||||
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
|
||||
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
|
||||
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
|
||||
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
|
||||
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
|
||||
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
|
||||
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
|
||||
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
|
||||
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
|
||||
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
|
||||
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
|
||||
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
|
||||
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
|
||||
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
|
||||
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
|
||||
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
|
||||
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
|
||||
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
|
||||
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
|
||||
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
|
||||
|
||||
### Session & Memory
|
||||
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
|
||||
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
|
||||
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
|
||||
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
|
||||
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
|
||||
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
|
||||
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ New Ink-based TUI
|
||||
|
||||
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
|
||||
|
||||
### TUI Foundations
|
||||
- New TUI based on Ink + Python JSON-RPC backend
|
||||
- Prettier + ESLint + vitest tooling for `ui-tui/`
|
||||
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
|
||||
- Persistent `_SlashWorker` subprocess for slash command dispatch
|
||||
|
||||
### UX & Features
|
||||
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
|
||||
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
|
||||
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
|
||||
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
|
||||
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
|
||||
- Sticky composer that freezes during scroll
|
||||
- OSC-52 clipboard support for copy across SSH sessions
|
||||
- Virtualized history rendering for performance
|
||||
- Slash command autocomplete via `complete.slash` RPC
|
||||
- Path autocomplete via `complete.path` RPC
|
||||
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
|
||||
|
||||
### Structural Refactors
|
||||
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
|
||||
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
|
||||
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
### Telegram
|
||||
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
|
||||
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
|
||||
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
|
||||
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
|
||||
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
|
||||
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
|
||||
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
|
||||
- Fix: Markdown special char escaping in `send_exec_approval`
|
||||
- Fix: parentheses in URLs during MarkdownV2 link conversion
|
||||
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
|
||||
- Many platform hint / streaming / session-key fixes
|
||||
|
||||
### Discord
|
||||
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
|
||||
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
|
||||
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
|
||||
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
|
||||
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
|
||||
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
|
||||
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
|
||||
|
||||
### Feishu
|
||||
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
|
||||
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
|
||||
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
|
||||
|
||||
### DingTalk
|
||||
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
|
||||
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
|
||||
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
|
||||
|
||||
### WhatsApp
|
||||
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
|
||||
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
|
||||
|
||||
### WeCom / Weixin
|
||||
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
|
||||
|
||||
### Signal
|
||||
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
|
||||
|
||||
### Slack
|
||||
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
|
||||
|
||||
### BlueBubbles (iMessage)
|
||||
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
|
||||
|
||||
### Gateway Core
|
||||
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
|
||||
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
|
||||
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
|
||||
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
|
||||
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
|
||||
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
|
||||
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
|
||||
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
|
||||
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
|
||||
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
|
||||
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
|
||||
- Fix: unlink stale PID + lock files on cleanup
|
||||
- Fix: force-unlink stale PID file after `--replace` takeover
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin Surface (major expansion)
|
||||
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
|
||||
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
|
||||
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
|
||||
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
|
||||
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
|
||||
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
|
||||
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
|
||||
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
|
||||
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
### Browser
|
||||
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
|
||||
- Camofox hardening + connection stability across the window
|
||||
|
||||
### Execute Code
|
||||
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
|
||||
|
||||
### Image Generation
|
||||
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
|
||||
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
|
||||
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
|
||||
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
|
||||
|
||||
### TTS / STT / Voice
|
||||
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
|
||||
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
|
||||
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
|
||||
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
|
||||
|
||||
### Webhook / Cron
|
||||
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
|
||||
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
|
||||
|
||||
### Delegate
|
||||
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
### File / Patch
|
||||
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
|
||||
|
||||
### API Server
|
||||
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
|
||||
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
|
||||
|
||||
### Docker / Podman
|
||||
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
|
||||
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
|
||||
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
|
||||
|
||||
### MCP
|
||||
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill System
|
||||
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
|
||||
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
|
||||
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
|
||||
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
|
||||
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
|
||||
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
|
||||
|
||||
### New Skills
|
||||
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
|
||||
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
|
||||
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
|
||||
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
|
||||
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
|
||||
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
|
||||
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
|
||||
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
|
||||
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
|
||||
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
|
||||
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
|
||||
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
|
||||
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
|
||||
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
|
||||
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
|
||||
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
|
||||
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
|
||||
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
|
||||
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
|
||||
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
|
||||
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
|
||||
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
|
||||
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
|
||||
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
|
||||
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
|
||||
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
|
||||
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
|
||||
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
|
||||
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
|
||||
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
|
||||
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
|
||||
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
|
||||
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
|
||||
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
|
||||
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
|
||||
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
The `fix:` category in this window covers 482 PRs. Highlights:
|
||||
|
||||
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
|
||||
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
|
||||
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
|
||||
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
|
||||
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
|
||||
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
|
||||
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
|
||||
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
|
||||
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
|
||||
- Doctor checks for Kimi China credentials fixed
|
||||
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
|
||||
- Rapid Telegram follow-ups no longer get cut off
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- Atropos + wandb links in user guide
|
||||
- ACP / VS Code / Zed / JetBrains integration docs refresh
|
||||
- Webhook subscription docs updated for direct-delivery mode
|
||||
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
|
||||
- Transport layer developer guide added
|
||||
- Website removed Discussions link from README
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count)
|
||||
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
|
||||
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
|
||||
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
|
||||
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
|
||||
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
|
||||
- **@ethernet8023** — 3 PRs
|
||||
- **@benbarclay** — 3 PRs
|
||||
- **@Aslaaen** — 2 PRs
|
||||
|
||||
### Also contributing
|
||||
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
|
||||
|
||||
### All Contributors (alphabetical)
|
||||
|
||||
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
|
||||
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
|
||||
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
|
||||
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
|
||||
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
|
||||
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
|
||||
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
|
||||
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
|
||||
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
|
||||
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
|
||||
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
|
||||
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
|
||||
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
|
||||
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
|
||||
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
|
||||
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
|
||||
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
|
||||
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
|
||||
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
|
||||
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
|
||||
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
|
||||
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
|
||||
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
|
||||
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
|
||||
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
|
||||
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
|
||||
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
|
||||
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
|
||||
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
|
||||
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
|
||||
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
|
||||
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
|
||||
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
|
||||
|
||||
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
|
||||
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
|
||||
@@ -63,9 +63,6 @@ def make_approval_callback(
|
||||
logger.warning("Permission request timed out or failed: %s", exc)
|
||||
return "deny"
|
||||
|
||||
if response is None:
|
||||
return "deny"
|
||||
|
||||
outcome = response.outcome
|
||||
if isinstance(outcome, AllowedOutcome):
|
||||
option_id = outcome.option_id
|
||||
|
||||
+29
-90
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, Deque, Optional
|
||||
@@ -52,7 +51,7 @@ try:
|
||||
except ImportError:
|
||||
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
|
||||
|
||||
from acp_adapter.auth import detect_provider
|
||||
from acp_adapter.auth import detect_provider, has_provider
|
||||
from acp_adapter.events import (
|
||||
make_message_cb,
|
||||
make_step_cb,
|
||||
@@ -60,7 +59,7 @@ from acp_adapter.events import (
|
||||
make_tool_progress_cb,
|
||||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
|
||||
from acp_adapter.session import SessionManager, SessionState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -72,11 +71,6 @@ except Exception:
|
||||
# Thread pool for running AIAgent (synchronous) in parallel.
|
||||
_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
|
||||
|
||||
# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
|
||||
# does not expose a client-side limit, so this is a fixed cap that clients
|
||||
# paginate against using `cursor` / `next_cursor`.
|
||||
_LIST_SESSIONS_PAGE_SIZE = 50
|
||||
|
||||
|
||||
def _extract_text(
|
||||
prompt: list[
|
||||
@@ -287,11 +281,7 @@ class HermesACPAgent(acp.Agent):
|
||||
try:
|
||||
from model_tools import get_tool_definitions
|
||||
|
||||
enabled_toolsets = _expand_acp_enabled_toolsets(
|
||||
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
|
||||
mcp_server_names=[server.name for server in mcp_servers],
|
||||
)
|
||||
state.agent.enabled_toolsets = enabled_toolsets
|
||||
enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
|
||||
state.agent.tools = get_tool_definitions(
|
||||
enabled_toolsets=enabled_toolsets,
|
||||
@@ -361,18 +351,9 @@ class HermesACPAgent(acp.Agent):
|
||||
)
|
||||
|
||||
async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
|
||||
# Only accept authenticate() calls whose method_id matches the
|
||||
# provider we advertised in initialize(). Without this check,
|
||||
# authenticate() would acknowledge any method_id as long as the
|
||||
# server has provider credentials configured — harmless under
|
||||
# Hermes' threat model (ACP is stdio-only, local-trust), but poor
|
||||
# API hygiene and confusing if ACP ever grows multi-method auth.
|
||||
provider = detect_provider()
|
||||
if not provider:
|
||||
return None
|
||||
if not isinstance(method_id, str) or method_id.strip().lower() != provider:
|
||||
return None
|
||||
return AuthenticateResponse()
|
||||
if has_provider():
|
||||
return AuthenticateResponse()
|
||||
return None
|
||||
|
||||
# ---- Session management -------------------------------------------------
|
||||
|
||||
@@ -456,28 +437,7 @@ class HermesACPAgent(acp.Agent):
|
||||
cwd: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> ListSessionsResponse:
|
||||
"""List ACP sessions with optional ``cwd`` filtering and cursor pagination.
|
||||
|
||||
``cwd`` is passed through to ``SessionManager.list_sessions`` which already
|
||||
normalizes and filters by working directory. ``cursor`` is a ``session_id``
|
||||
previously returned as ``next_cursor``; results resume after that entry.
|
||||
Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
|
||||
results remain, ``next_cursor`` is set to the last returned ``session_id``.
|
||||
"""
|
||||
infos = self.session_manager.list_sessions(cwd=cwd)
|
||||
|
||||
if cursor:
|
||||
for idx, s in enumerate(infos):
|
||||
if s["session_id"] == cursor:
|
||||
infos = infos[idx + 1:]
|
||||
break
|
||||
else:
|
||||
# Unknown cursor -> empty page (do not fall back to full list).
|
||||
infos = []
|
||||
|
||||
has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
|
||||
infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
|
||||
|
||||
sessions = []
|
||||
for s in infos:
|
||||
updated_at = s.get("updated_at")
|
||||
@@ -491,9 +451,7 @@ class HermesACPAgent(acp.Agent):
|
||||
updated_at=updated_at,
|
||||
)
|
||||
)
|
||||
|
||||
next_cursor = sessions[-1].session_id if has_more and sessions else None
|
||||
return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
|
||||
return ListSessionsResponse(sessions=sessions)
|
||||
|
||||
# ---- Prompt (core) ------------------------------------------------------
|
||||
|
||||
@@ -538,7 +496,6 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
tool_call_ids: dict[str, Deque[str]] = defaultdict(deque)
|
||||
tool_call_meta: dict[str, dict[str, Any]] = {}
|
||||
previous_approval_cb = None
|
||||
|
||||
if conn:
|
||||
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
|
||||
@@ -559,32 +516,21 @@ class HermesACPAgent(acp.Agent):
|
||||
agent.step_callback = step_cb
|
||||
agent.message_callback = message_cb
|
||||
|
||||
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
|
||||
# Set it INSIDE _run_agent so the TLS write happens in the executor
|
||||
# thread — setting it here would write to the event-loop thread's TLS,
|
||||
# not the executor's. Also set HERMES_INTERACTIVE so approval.py
|
||||
# takes the CLI-interactive path (which calls the registered
|
||||
# callback via prompt_dangerous_approval) instead of the
|
||||
# non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
|
||||
# ACP's conn.request_permission maps cleanly to the interactive
|
||||
# callback shape — not the gateway-queue HERMES_EXEC_ASK path,
|
||||
# which requires a notify_cb registered in _gateway_notify_cbs.
|
||||
previous_approval_cb = None
|
||||
previous_interactive = None
|
||||
# Install the per-session approval callback into the current asyncio
|
||||
# task's context. Because ``terminal_tool._approval_callback_var`` is
|
||||
# a ``ContextVar`` and ``loop.run_in_executor`` propagates the caller's
|
||||
# context to the worker thread, concurrent ACP sessions in the same
|
||||
# process each see their own callback without stomping on each other.
|
||||
# No save/restore is needed: when this coroutine returns, the context
|
||||
# snapshot holding the set is discarded.
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
_terminal_tool.set_approval_callback(approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not set ACP approval callback", exc_info=True)
|
||||
|
||||
def _run_agent() -> dict:
|
||||
nonlocal previous_approval_cb, previous_interactive
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
previous_approval_cb = _terminal_tool._get_approval_callback()
|
||||
_terminal_tool.set_approval_callback(approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not set ACP approval callback", exc_info=True)
|
||||
# Signal to tools.approval that we have an interactive callback
|
||||
# and the non-interactive auto-approve path must not fire.
|
||||
previous_interactive = os.environ.get("HERMES_INTERACTIVE")
|
||||
os.environ["HERMES_INTERACTIVE"] = "1"
|
||||
try:
|
||||
result = agent.run_conversation(
|
||||
user_message=user_text,
|
||||
@@ -595,21 +541,16 @@ class HermesACPAgent(acp.Agent):
|
||||
except Exception as e:
|
||||
logger.exception("Agent error in session %s", session_id)
|
||||
return {"final_response": f"Error: {e}", "messages": state.history}
|
||||
finally:
|
||||
# Restore HERMES_INTERACTIVE.
|
||||
if previous_interactive is None:
|
||||
os.environ.pop("HERMES_INTERACTIVE", None)
|
||||
else:
|
||||
os.environ["HERMES_INTERACTIVE"] = previous_interactive
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
_terminal_tool.set_approval_callback(previous_approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not restore approval callback", exc_info=True)
|
||||
|
||||
try:
|
||||
result = await loop.run_in_executor(_executor, _run_agent)
|
||||
# Copy the current asyncio task's context and run the agent inside
|
||||
# it so per-session ContextVar state (e.g. the approval callback
|
||||
# installed above via set_approval_callback) is visible to tool code
|
||||
# executing on the worker thread. ``loop.run_in_executor`` does NOT
|
||||
# propagate contextvars on its own.
|
||||
import contextvars as _ctxvars
|
||||
_ctx = _ctxvars.copy_context()
|
||||
result = await loop.run_in_executor(_executor, lambda: _ctx.run(_run_agent))
|
||||
except Exception:
|
||||
logger.exception("Executor error for session %s", session_id)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
@@ -758,9 +699,7 @@ class HermesACPAgent(acp.Agent):
|
||||
def _cmd_tools(self, args: str, state: SessionState) -> str:
|
||||
try:
|
||||
from model_tools import get_tool_definitions
|
||||
toolsets = _expand_acp_enabled_toolsets(
|
||||
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
)
|
||||
toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
|
||||
if not tools:
|
||||
return "No tools available."
|
||||
|
||||
+1
-28
@@ -106,24 +106,6 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
|
||||
logger.debug("Failed to register ACP task cwd override", exc_info=True)
|
||||
|
||||
|
||||
def _expand_acp_enabled_toolsets(
|
||||
toolsets: List[str] | None = None,
|
||||
mcp_server_names: List[str] | None = None,
|
||||
) -> List[str]:
|
||||
"""Return ACP toolsets plus explicit MCP server toolsets for this session."""
|
||||
expanded: List[str] = []
|
||||
for name in list(toolsets or ["hermes-acp"]):
|
||||
if name and name not in expanded:
|
||||
expanded.append(name)
|
||||
|
||||
for server_name in list(mcp_server_names or []):
|
||||
toolset_name = f"mcp-{server_name}"
|
||||
if server_name and toolset_name not in expanded:
|
||||
expanded.append(toolset_name)
|
||||
|
||||
return expanded
|
||||
|
||||
|
||||
def _clear_task_cwd(task_id: str) -> None:
|
||||
"""Remove task-specific cwd overrides for an ACP session."""
|
||||
if not task_id:
|
||||
@@ -555,18 +537,9 @@ class SessionManager:
|
||||
elif isinstance(model_cfg, str) and model_cfg.strip():
|
||||
default_model = model_cfg.strip()
|
||||
|
||||
configured_mcp_servers = [
|
||||
name
|
||||
for name, cfg in (config.get("mcp_servers") or {}).items()
|
||||
if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
|
||||
]
|
||||
|
||||
kwargs = {
|
||||
"platform": "acp",
|
||||
"enabled_toolsets": _expand_acp_enabled_toolsets(
|
||||
["hermes-acp"],
|
||||
mcp_server_names=configured_mcp_servers,
|
||||
),
|
||||
"enabled_toolsets": ["hermes-acp"],
|
||||
"quiet_mode": True,
|
||||
"session_id": session_id,
|
||||
"model": model or default_model,
|
||||
|
||||
+119
-268
@@ -14,13 +14,11 @@ import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from utils import normalize_proxy_env_vars
|
||||
|
||||
try:
|
||||
import anthropic as _anthropic_sdk
|
||||
@@ -118,63 +116,6 @@ def _get_anthropic_max_output(model: str) -> int:
|
||||
return best_val
|
||||
|
||||
|
||||
def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
|
||||
"""Return ``value`` floored to a positive int, or ``None`` if it is not a
|
||||
finite positive number. Ported from openclaw/openclaw#66664.
|
||||
|
||||
Anthropic's Messages API rejects ``max_tokens`` values that are 0,
|
||||
negative, non-integer, or non-finite with HTTP 400. Python's ``or``
|
||||
idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
|
||||
negative ints and fractional floats (``-1``, ``0.5``) through to the
|
||||
API, producing a user-visible failure instead of a local error.
|
||||
"""
|
||||
# Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
|
||||
# silently become 1 and ``False`` doesn't become 0.
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
if not isinstance(value, (int, float)):
|
||||
return None
|
||||
try:
|
||||
import math
|
||||
if not math.isfinite(value):
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
floored = int(value) # truncates toward zero for floats
|
||||
return floored if floored > 0 else None
|
||||
|
||||
|
||||
def _resolve_anthropic_messages_max_tokens(
|
||||
requested,
|
||||
model: str,
|
||||
context_length: Optional[int] = None,
|
||||
) -> int:
|
||||
"""Resolve the ``max_tokens`` budget for an Anthropic Messages call.
|
||||
|
||||
Prefers ``requested`` when it is a positive finite number; otherwise
|
||||
falls back to the model's output ceiling. Raises ``ValueError`` if no
|
||||
positive budget can be resolved (should not happen with current model
|
||||
table defaults, but guards against a future regression where
|
||||
``_get_anthropic_max_output`` could return ``0``).
|
||||
|
||||
Separately, callers apply a context-window clamp — this resolver does
|
||||
not, to keep the positive-value contract independent of endpoint
|
||||
specifics.
|
||||
|
||||
Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
|
||||
"""
|
||||
resolved = _resolve_positive_anthropic_max_tokens(requested)
|
||||
if resolved is not None:
|
||||
return resolved
|
||||
fallback = _get_anthropic_max_output(model)
|
||||
if fallback > 0:
|
||||
return fallback
|
||||
raise ValueError(
|
||||
f"Anthropic Messages adapter requires a positive max_tokens value for "
|
||||
f"model {model!r}; got {requested!r} and no model default resolved."
|
||||
)
|
||||
|
||||
|
||||
def _supports_adaptive_thinking(model: str) -> bool:
|
||||
"""Return True for Claude 4.6+ models that support adaptive thinking."""
|
||||
return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
|
||||
@@ -279,9 +220,8 @@ def _is_oauth_token(key: str) -> bool:
|
||||
Positively identifies Anthropic OAuth tokens by their key format:
|
||||
- ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
|
||||
- ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
|
||||
- ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)
|
||||
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
|
||||
and correctly return False.
|
||||
"""
|
||||
if not key:
|
||||
@@ -295,9 +235,6 @@ def _is_oauth_token(key: str) -> bool:
|
||||
# JWTs from Anthropic OAuth flow
|
||||
if key.startswith("eyJ"):
|
||||
return True
|
||||
# Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
|
||||
if key.startswith("cc-"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -328,14 +265,6 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
return True # Any other endpoint is a third-party proxy
|
||||
|
||||
|
||||
def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
|
||||
"""Return True for Kimi's /coding endpoint that requires claude-code UA."""
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
|
||||
|
||||
|
||||
def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
"""Return True for Anthropic-compatible providers that require Bearer auth.
|
||||
|
||||
@@ -379,9 +308,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
"The 'anthropic' package is required for the Anthropic provider. "
|
||||
"Install it with: pip install 'anthropic>=0.39.0'"
|
||||
)
|
||||
|
||||
normalize_proxy_env_vars()
|
||||
|
||||
from httpx import Timeout
|
||||
|
||||
normalized_base_url = _normalize_base_url_text(base_url)
|
||||
@@ -390,30 +316,12 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
# Azure Anthropic endpoints require an ``api-version`` query parameter.
|
||||
# Pass it via default_query so the SDK appends it to every request URL
|
||||
# without corrupting the base_url (appending it directly produces
|
||||
# malformed paths like /anthropic?api-version=.../v1/messages).
|
||||
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
|
||||
if _is_azure_endpoint and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url.rstrip("/")
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
|
||||
# to be recognized as a valid Coding Agent. Without it, returns 403.
|
||||
# Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
|
||||
kwargs["api_key"] = api_key
|
||||
kwargs["default_headers"] = {
|
||||
"User-Agent": "claude-code/0.1.0",
|
||||
**( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
|
||||
}
|
||||
elif _requires_bearer_auth(normalized_base_url):
|
||||
if _requires_bearer_auth(normalized_base_url):
|
||||
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
|
||||
# Authorization: Bearer *** for regular API keys. Route those endpoints
|
||||
# Authorization: Bearer even for regular API keys. Route those endpoints
|
||||
# through auth_token so the SDK sends Bearer auth instead of x-api-key.
|
||||
# Check this before OAuth token shape detection because MiniMax secrets do
|
||||
# not use Anthropic's sk-ant-api prefix and would otherwise be misread as
|
||||
@@ -476,72 +384,8 @@ def build_anthropic_bedrock_client(region: str):
|
||||
)
|
||||
|
||||
|
||||
def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||||
"""Read Claude Code OAuth credentials from the macOS Keychain.
|
||||
|
||||
Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
|
||||
service name "Claude Code-credentials" rather than (or in addition to)
|
||||
the JSON file at ~/.claude/.credentials.json.
|
||||
|
||||
The password field contains a JSON string with the same claudeAiOauth
|
||||
structure as the JSON file.
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
if platform.system() != "Darwin":
|
||||
return None
|
||||
|
||||
try:
|
||||
# Read the "Claude Code-credentials" generic password entry
|
||||
result = subprocess.run(
|
||||
["security", "find-generic-password",
|
||||
"-s", "Claude Code-credentials",
|
||||
"-w"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
logger.debug("Keychain: security command not available or timed out")
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
|
||||
return None
|
||||
|
||||
raw = result.stdout.strip()
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Keychain: credentials payload is not valid JSON")
|
||||
return None
|
||||
|
||||
oauth_data = data.get("claudeAiOauth")
|
||||
if oauth_data and isinstance(oauth_data, dict):
|
||||
access_token = oauth_data.get("accessToken", "")
|
||||
if access_token:
|
||||
return {
|
||||
"accessToken": access_token,
|
||||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||||
"source": "macos_keychain",
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
"""Read refreshable Claude Code OAuth credentials.
|
||||
|
||||
Checks two sources in order:
|
||||
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
|
||||
2. ~/.claude/.credentials.json file
|
||||
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
|
||||
|
||||
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
|
||||
subscription flow is OAuth/setup-token based with refreshable credentials,
|
||||
@@ -550,12 +394,6 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
# Try macOS Keychain first (covers Claude Code >=2.1.114)
|
||||
kc_creds = _read_claude_code_credentials_from_keychain()
|
||||
if kc_creds:
|
||||
return kc_creds
|
||||
|
||||
# Fall back to JSON file
|
||||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||||
if cred_path.exists():
|
||||
try:
|
||||
@@ -726,9 +564,7 @@ def _write_claude_code_credentials(
|
||||
existing["claudeAiOauth"] = oauth_data
|
||||
|
||||
cred_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
_tmp_cred = cred_path.with_suffix(".tmp")
|
||||
_tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||||
_tmp_cred.replace(cred_path)
|
||||
cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||||
# Restrict permissions (credentials file)
|
||||
cred_path.chmod(0o600)
|
||||
except (OSError, IOError) as e:
|
||||
@@ -995,26 +831,6 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_bedrock_model_id(model: str) -> bool:
|
||||
"""Detect AWS Bedrock model IDs that use dots as namespace separators.
|
||||
|
||||
Bedrock model IDs come in two forms:
|
||||
- Bare: ``anthropic.claude-opus-4-7``
|
||||
- Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
|
||||
|
||||
In both cases the dots separate namespace components, not version
|
||||
numbers, and must be preserved verbatim for the Bedrock API.
|
||||
"""
|
||||
lower = model.lower()
|
||||
# Regional inference-profile prefixes
|
||||
if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
|
||||
return True
|
||||
# Bare Bedrock model IDs: provider.model-family
|
||||
if lower.startswith("anthropic."):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
"""Normalize a model name for the Anthropic API.
|
||||
|
||||
@@ -1022,19 +838,11 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
- Converts dots to hyphens in version numbers (OpenRouter uses dots,
|
||||
Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
|
||||
preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
|
||||
- Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
|
||||
regional inference profiles (``us.anthropic.claude-*``) whose dots
|
||||
are namespace separators, not version separators.
|
||||
"""
|
||||
lower = model.lower()
|
||||
if lower.startswith("anthropic/"):
|
||||
model = model[len("anthropic/"):]
|
||||
if not preserve_dots:
|
||||
# Bedrock model IDs use dots as namespace separators
|
||||
# (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
|
||||
# These must not be converted to hyphens. See issue #12295.
|
||||
if _is_bedrock_model_id(model):
|
||||
return model
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
model = model.replace(".", "-")
|
||||
@@ -1254,31 +1062,6 @@ def convert_messages_to_anthropic(
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
@@ -1434,7 +1217,6 @@ def convert_messages_to_anthropic(
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
_is_kimi = _is_kimi_coding_endpoint(base_url)
|
||||
|
||||
last_assistant_idx = None
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
@@ -1446,25 +1228,7 @@ def convert_messages_to_anthropic(
|
||||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||||
continue
|
||||
|
||||
if _is_kimi:
|
||||
# Kimi's /coding endpoint enables thinking server-side and
|
||||
# requires unsigned thinking blocks on replayed assistant
|
||||
# tool-call messages. Strip signed Anthropic blocks (Kimi
|
||||
# can't validate signatures) but preserve the unsigned ones
|
||||
# we synthesised from reasoning_content above.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Anthropic-signed block — Kimi can't validate, strip
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: Kimi needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
if _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
@@ -1562,12 +1326,7 @@ def build_anthropic_kwargs(
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
# effective_max_tokens = output cap for this call (≠ total context window)
|
||||
# Use the resolver helper so non-positive values (negative ints,
|
||||
# fractional floats, NaN, non-numeric) fail locally with a clear error
|
||||
# rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
|
||||
effective_max_tokens = _resolve_anthropic_messages_max_tokens(
|
||||
max_tokens, model, context_length=context_length
|
||||
)
|
||||
effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
|
||||
|
||||
# Clamp output cap to fit inside the total context window.
|
||||
# Only matters for small custom endpoints where context_length < native
|
||||
@@ -1646,25 +1405,11 @@ def build_anthropic_kwargs(
|
||||
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
|
||||
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
|
||||
#
|
||||
# Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
|
||||
# its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
|
||||
# validates the message history and requires every prior assistant
|
||||
# tool-call message to carry OpenAI-style ``reasoning_content``. The
|
||||
# Anthropic path never populates that field, and
|
||||
# ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
|
||||
# on third-party endpoints — so the request fails with HTTP 400
|
||||
# "thinking is enabled but reasoning_content is missing in assistant
|
||||
# tool call message at index N". Kimi's reasoning is driven server-side
|
||||
# on the /coding route, so skip Anthropic's thinking parameter entirely
|
||||
# for that host. (Kimi on chat_completions enables thinking via
|
||||
# extra_body in the ChatCompletionsTransport — see #13503.)
|
||||
#
|
||||
# On 4.7+ the `thinking.display` field defaults to "omitted", which
|
||||
# silently hides reasoning text that Hermes surfaces in its CLI. We
|
||||
# request "summarized" so the reasoning blocks stay populated — matching
|
||||
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
|
||||
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
|
||||
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||
budget = THINKING_BUDGET.get(effort, 8000)
|
||||
@@ -1689,9 +1434,9 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Strip sampling params on 4.7+ ─────────────────────────────────
|
||||
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
|
||||
# Callers (auxiliary_client, etc.) may set these for older models;
|
||||
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
|
||||
# don't require coordinated edits everywhere.
|
||||
# Callers (auxiliary_client, flush_memories, etc.) may set these for
|
||||
# older models; drop them here as a safety net so upstream 4.6 → 4.7
|
||||
# migrations don't require coordinated edits everywhere.
|
||||
if _forbids_sampling_params(model):
|
||||
for _sampling_key in ("temperature", "top_p", "top_k"):
|
||||
kwargs.pop(_sampling_key, None)
|
||||
@@ -1713,3 +1458,109 @@ def build_anthropic_kwargs(
|
||||
return kwargs
|
||||
|
||||
|
||||
def normalize_anthropic_response(
|
||||
response,
|
||||
strip_tool_prefix: bool = False,
|
||||
) -> Tuple[SimpleNamespace, str]:
|
||||
"""Normalize Anthropic response to match the shape expected by AIAgent.
|
||||
|
||||
Returns (assistant_message, finish_reason) where assistant_message has
|
||||
.content, .tool_calls, and .reasoning attributes.
|
||||
|
||||
When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
|
||||
added to tool names for OAuth Claude Code compatibility.
|
||||
"""
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
reasoning_details = []
|
||||
tool_calls = []
|
||||
|
||||
for block in response.content:
|
||||
if block.type == "text":
|
||||
text_parts.append(block.text)
|
||||
elif block.type == "thinking":
|
||||
reasoning_parts.append(block.thinking)
|
||||
block_dict = _to_plain_data(block)
|
||||
if isinstance(block_dict, dict):
|
||||
reasoning_details.append(block_dict)
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
|
||||
name = name[len(_MCP_TOOL_PREFIX):]
|
||||
tool_calls.append(
|
||||
SimpleNamespace(
|
||||
id=block.id,
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=name,
|
||||
arguments=json.dumps(block.input),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Map Anthropic stop_reason to OpenAI finish_reason.
|
||||
# Newer stop reasons added in Claude 4.5+ / 4.7:
|
||||
# - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
|
||||
# - model_context_window_exceeded: hit context limit (not max_tokens)
|
||||
# Both need distinct handling upstream — a refusal should surface to the
|
||||
# user with a clear message, and a context-window overflow should trigger
|
||||
# compression/truncation rather than be treated as normal end-of-turn.
|
||||
stop_reason_map = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"refusal": "content_filter",
|
||||
"model_context_window_exceeded": "length",
|
||||
}
|
||||
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
|
||||
|
||||
return (
|
||||
SimpleNamespace(
|
||||
content="\n".join(text_parts) if text_parts else None,
|
||||
tool_calls=tool_calls or None,
|
||||
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=reasoning_details or None,
|
||||
),
|
||||
finish_reason,
|
||||
)
|
||||
|
||||
|
||||
def normalize_anthropic_response_v2(
|
||||
response,
|
||||
strip_tool_prefix: bool = False,
|
||||
) -> "NormalizedResponse":
|
||||
"""Normalize Anthropic response to NormalizedResponse.
|
||||
|
||||
Wraps the existing normalize_anthropic_response() and maps its output
|
||||
to the shared transport types. This allows incremental migration —
|
||||
one call site at a time — without changing the original function.
|
||||
"""
|
||||
from agent.transports.types import NormalizedResponse, build_tool_call
|
||||
|
||||
assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
|
||||
|
||||
tool_calls = None
|
||||
if assistant_msg.tool_calls:
|
||||
tool_calls = [
|
||||
build_tool_call(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
)
|
||||
for tc in assistant_msg.tool_calls
|
||||
]
|
||||
|
||||
provider_data = {}
|
||||
if getattr(assistant_msg, "reasoning_details", None):
|
||||
provider_data["reasoning_details"] = assistant_msg.reasoning_details
|
||||
|
||||
return NormalizedResponse(
|
||||
content=assistant_msg.content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
reasoning=getattr(assistant_msg, "reasoning", None),
|
||||
usage=None, # Anthropic usage is on the raw response, not the normaliser
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
||||
+63
-630
@@ -42,28 +42,16 @@ import time
|
||||
from pathlib import Path # noqa: F401 — used by test mocks
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
|
||||
from utils import base_url_host_matches, base_url_hostname
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_url_query_params(url: str):
|
||||
"""Extract query params from URL, return (clean_url, default_query dict or None)."""
|
||||
parsed = urlparse(url)
|
||||
if parsed.query:
|
||||
clean = urlunparse(parsed._replace(query=""))
|
||||
params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
|
||||
return clean, params
|
||||
return url, None
|
||||
|
||||
|
||||
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
|
||||
_stale_base_url_warned = False
|
||||
|
||||
@@ -86,12 +74,6 @@ _PROVIDER_ALIASES = {
|
||||
"minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic",
|
||||
"claude-code": "anthropic",
|
||||
"github": "copilot",
|
||||
"github-copilot": "copilot",
|
||||
"github-model": "copilot",
|
||||
"github-models": "copilot",
|
||||
"github-copilot-acp": "copilot-acp",
|
||||
"copilot-acp-agent": "copilot-acp",
|
||||
}
|
||||
|
||||
|
||||
@@ -107,11 +89,10 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
|
||||
if normalized == "main":
|
||||
# Resolve to the user's actual main provider so named custom providers
|
||||
# and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
|
||||
main_prov = (_read_main_provider() or "").strip().lower()
|
||||
main_prov = _read_main_provider()
|
||||
if main_prov and main_prov not in ("auto", "main", ""):
|
||||
normalized = main_prov
|
||||
else:
|
||||
return "custom"
|
||||
return main_prov
|
||||
return "custom"
|
||||
return _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
|
||||
@@ -153,7 +134,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"gemini": "gemini-3-flash-preview",
|
||||
"zai": "glm-4.5-flash",
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
"stepfun": "step-3.5-flash",
|
||||
"kimi-coding-cn": "kimi-k2-turbo-preview",
|
||||
"minimax": "MiniMax-M2.7",
|
||||
"minimax-cn": "MiniMax-M2.7",
|
||||
@@ -170,7 +150,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
# differs from their main chat model, map it here. The vision auto-detect
|
||||
# "exotic provider" branch checks this before falling back to the main model.
|
||||
_PROVIDER_VISION_MODELS: Dict[str, str] = {
|
||||
"xiaomi": "mimo-v2.5",
|
||||
"xiaomi": "mimo-v2-omni",
|
||||
"zai": "glm-5v-turbo",
|
||||
}
|
||||
|
||||
@@ -202,6 +182,8 @@ auxiliary_is_nous: bool = False
|
||||
# Default auxiliary models per provider
|
||||
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
|
||||
_NOUS_MODEL = "google/gemini-3-flash-preview"
|
||||
_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
|
||||
_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
|
||||
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
|
||||
_AUTH_JSON_PATH = get_hermes_home() / "auth.json"
|
||||
@@ -402,7 +384,7 @@ class _CodexCompletionsAdapter:
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
|
||||
# Tools support for flush_memories and similar callers
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
@@ -592,8 +574,7 @@ class _AnthropicCompletionsAdapter:
|
||||
self._is_oauth = is_oauth
|
||||
|
||||
def create(self, **kwargs) -> Any:
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
from agent.transports import get_transport
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
|
||||
|
||||
messages = kwargs.get("messages", [])
|
||||
model = kwargs.get("model", self._model)
|
||||
@@ -630,19 +611,7 @@ class _AnthropicCompletionsAdapter:
|
||||
anthropic_kwargs["temperature"] = temperature
|
||||
|
||||
response = self._client.messages.create(**anthropic_kwargs)
|
||||
_transport = get_transport("anthropic_messages")
|
||||
_nr = _transport.normalize_response(
|
||||
response, strip_tool_prefix=self._is_oauth
|
||||
)
|
||||
|
||||
# ToolCall already duck-types as OpenAI shape (.type, .function.name,
|
||||
# .function.arguments) via properties, so no wrapping needed.
|
||||
assistant_message = SimpleNamespace(
|
||||
content=_nr.content,
|
||||
tool_calls=_nr.tool_calls,
|
||||
reasoning=_nr.reasoning,
|
||||
)
|
||||
finish_reason = _nr.finish_reason
|
||||
assistant_message, finish_reason = normalize_anthropic_response(response)
|
||||
|
||||
usage = None
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
@@ -759,33 +728,6 @@ def _nous_base_url() -> str:
|
||||
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
|
||||
|
||||
|
||||
def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
|
||||
"""Return fresh Nous runtime credentials when available.
|
||||
|
||||
This mirrors the main agent's 401 recovery path and keeps auxiliary
|
||||
clients aligned with the singleton auth store + mint flow instead of
|
||||
relying only on whatever raw tokens happen to be sitting in auth.json
|
||||
or the credential pool.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
force_mint=force_refresh,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
|
||||
return None
|
||||
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
|
||||
if not api_key or not base_url:
|
||||
return None
|
||||
return api_key, base_url
|
||||
|
||||
|
||||
def _read_codex_access_token() -> Optional[str]:
|
||||
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
|
||||
|
||||
@@ -876,7 +818,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
|
||||
extra = {}
|
||||
if base_url_host_matches(base_url, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
@@ -902,7 +844,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
|
||||
extra = {}
|
||||
if base_url_host_matches(base_url, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
@@ -935,19 +877,6 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
"""Return a more precise OpenRouter auth failure reason for logs."""
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
if entry is None:
|
||||
return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
|
||||
if not _pool_runtime_api_key(entry):
|
||||
return "OpenRouter credential pool entry is missing a runtime API key"
|
||||
if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
|
||||
return "OPENROUTER_API_KEY not set"
|
||||
return "no usable OpenRouter credentials found"
|
||||
|
||||
|
||||
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
# Check cross-session rate limit guard before attempting Nous —
|
||||
# if another session already recorded a 429, skip Nous entirely
|
||||
@@ -965,50 +894,29 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
pass
|
||||
|
||||
nous = _read_nous_auth()
|
||||
runtime = _resolve_nous_runtime_api(force_refresh=False)
|
||||
if runtime is None and not nous:
|
||||
if not nous:
|
||||
return None, None
|
||||
global auxiliary_is_nous
|
||||
auxiliary_is_nous = True
|
||||
logger.debug("Auxiliary client: Nous Portal")
|
||||
|
||||
# Ask the Portal which model it currently recommends for this task type.
|
||||
# The /api/nous/recommended-models endpoint is the authoritative source:
|
||||
# it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
|
||||
# auto-detects the caller's tier via check_nous_free_tier(). Fall back to
|
||||
# _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
|
||||
# or returns a null recommendation for this task type.
|
||||
model = _NOUS_MODEL
|
||||
try:
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
recommended = get_nous_recommended_aux_model(vision=vision)
|
||||
if recommended:
|
||||
model = recommended
|
||||
logger.debug(
|
||||
"Auxiliary/%s: using Portal-recommended model %s",
|
||||
"vision" if vision else "text", model,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Auxiliary/%s: no Portal recommendation, falling back to %s",
|
||||
"vision" if vision else "text", model,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Auxiliary/%s: recommended-models lookup failed (%s); "
|
||||
"falling back to %s",
|
||||
"vision" if vision else "text", exc, model,
|
||||
)
|
||||
|
||||
if runtime is not None:
|
||||
api_key, base_url = runtime
|
||||
if nous.get("source") == "pool":
|
||||
model = "gemini-3-flash"
|
||||
else:
|
||||
api_key = _nous_api_key(nous or {})
|
||||
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
|
||||
model = _NOUS_MODEL
|
||||
# Free-tier users can't use paid auxiliary models — use the free
|
||||
# models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
|
||||
try:
|
||||
from hermes_cli.models import check_nous_free_tier
|
||||
if check_nous_free_tier():
|
||||
model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
|
||||
logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
|
||||
model, "vision" if vision else "text")
|
||||
except Exception:
|
||||
pass
|
||||
return (
|
||||
OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
api_key=_nous_api_key(nous),
|
||||
base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
|
||||
),
|
||||
model,
|
||||
)
|
||||
@@ -1120,8 +1028,6 @@ def _validate_proxy_env_urls() -> None:
|
||||
"""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
normalize_proxy_env_vars()
|
||||
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = str(os.environ.get(key) or "").strip()
|
||||
@@ -1169,10 +1075,8 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
return None, None
|
||||
model = _read_main_model() or "gpt-4o-mini"
|
||||
logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
_extra = {"default_query": _dq} if _dq else {}
|
||||
if custom_mode == "codex_responses":
|
||||
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
if custom_mode == "anthropic_messages":
|
||||
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
|
||||
@@ -1186,12 +1090,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
"Custom endpoint declares api_mode=anthropic_messages but the "
|
||||
"anthropic SDK is not installed — falling back to OpenAI-wire."
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return (
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
|
||||
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
@@ -1354,120 +1258,6 @@ def _is_connection_error(exc: Exception) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _is_auth_error(exc: Exception) -> bool:
|
||||
"""Detect auth failures that should trigger provider-specific refresh."""
|
||||
status = getattr(exc, "status_code", None)
|
||||
if status == 401:
|
||||
return True
|
||||
err_lower = str(exc).lower()
|
||||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
|
||||
"""Detect provider 400s for an unsupported request parameter.
|
||||
|
||||
Different OpenAI-compatible endpoints phrase the same class of error a few
|
||||
ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
|
||||
``param`` field, ``X is not supported``, ``unknown parameter: X``,
|
||||
``unrecognized request argument: X``. We match on both the parameter
|
||||
name and a generic "unsupported/unknown/unrecognized parameter" marker so
|
||||
call sites can reactively retry without the offending key instead of
|
||||
surfacing a noisy auxiliary failure.
|
||||
|
||||
Generalizes the temperature-specific detector that originally shipped
|
||||
with PR #15621 so the same retry strategy can cover ``max_tokens``,
|
||||
``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
|
||||
for the generalization pattern.
|
||||
"""
|
||||
param_lower = (param or "").lower()
|
||||
if not param_lower:
|
||||
return False
|
||||
err_lower = str(exc).lower()
|
||||
if param_lower not in err_lower:
|
||||
return False
|
||||
return any(marker in err_lower for marker in (
|
||||
"unsupported parameter",
|
||||
"unsupported_parameter",
|
||||
"not supported",
|
||||
"does not support",
|
||||
"unknown parameter",
|
||||
"unrecognized request argument",
|
||||
"unrecognized parameter",
|
||||
"invalid parameter",
|
||||
))
|
||||
|
||||
|
||||
def _is_unsupported_temperature_error(exc: Exception) -> bool:
|
||||
"""Back-compat wrapper: detect API errors where the model rejects ``temperature``.
|
||||
|
||||
Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
|
||||
public symbol because existing tests and call sites import it by name.
|
||||
"""
|
||||
return _is_unsupported_parameter_error(exc, "temperature")
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
with _client_cache_lock:
|
||||
stale_keys = [
|
||||
key for key in _client_cache
|
||||
if _normalize_aux_provider(str(key[0])) == normalized
|
||||
]
|
||||
for key in stale_keys:
|
||||
client = _client_cache.get(key, (None, None, None))[0]
|
||||
if client is not None:
|
||||
_force_close_async_httpx(client)
|
||||
try:
|
||||
close_fn = getattr(client, "close", None)
|
||||
if callable(close_fn):
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
_client_cache.pop(key, None)
|
||||
|
||||
|
||||
def _refresh_provider_credentials(provider: str) -> bool:
|
||||
"""Refresh short-lived credentials for OAuth-backed auxiliary providers."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
try:
|
||||
if normalized == "openai-codex":
|
||||
from hermes_cli.auth import resolve_codex_runtime_credentials
|
||||
|
||||
creds = resolve_codex_runtime_credentials(force_refresh=True)
|
||||
if not str(creds.get("api_key", "") or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
if normalized == "nous":
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
force_mint=True,
|
||||
)
|
||||
if not str(creds.get("api_key", "") or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
if normalized == "anthropic":
|
||||
from agent.anthropic_adapter import read_claude_code_credentials, _refresh_oauth_token, resolve_anthropic_token
|
||||
|
||||
creds = read_claude_code_credentials()
|
||||
token = _refresh_oauth_token(creds) if isinstance(creds, dict) and creds.get("refreshToken") else None
|
||||
if not str(token or "").strip():
|
||||
token = resolve_anthropic_token()
|
||||
if not str(token or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary provider credential refresh failed for %s: %s", normalized, exc)
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def _try_payment_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
@@ -1651,7 +1441,7 @@ def _to_async_client(sync_client, model: str):
|
||||
|
||||
async_kwargs["default_headers"] = copilot_default_headers()
|
||||
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
|
||||
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
@@ -1766,10 +1556,8 @@ def resolve_provider_client(
|
||||
if provider == "openrouter":
|
||||
client, default = _try_openrouter()
|
||||
if client is None:
|
||||
logger.warning(
|
||||
"resolve_provider_client: openrouter requested but %s",
|
||||
_describe_openrouter_unavailable(),
|
||||
)
|
||||
logger.warning("resolve_provider_client: openrouter requested "
|
||||
"but OPENROUTER_API_KEY not set")
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
@@ -1777,13 +1565,7 @@ def resolve_provider_client(
|
||||
|
||||
# ── Nous Portal (OAuth) ──────────────────────────────────────────
|
||||
if provider == "nous":
|
||||
# Detect vision tasks: either explicit model override from
|
||||
# _PROVIDER_VISION_MODELS, or caller passed a known vision model.
|
||||
_is_vision = (
|
||||
model in _PROVIDER_VISION_MODELS.values()
|
||||
or (model or "").strip().lower() == "mimo-v2-omni"
|
||||
)
|
||||
client, default = _try_nous(vision=_is_vision)
|
||||
client, default = _try_nous()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: nous requested "
|
||||
"but Nous Portal not configured (run: hermes auth)")
|
||||
@@ -1839,15 +1621,12 @@ def resolve_provider_client(
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
if _dq:
|
||||
extra["default_query"] = _dq
|
||||
if base_url_host_matches(custom_base, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
@@ -1865,7 +1644,7 @@ def resolve_provider_client(
|
||||
"but no endpoint credentials found")
|
||||
return None, None
|
||||
|
||||
# ── Named custom providers (config.yaml providers dict / custom_providers list) ───
|
||||
# ── Named custom providers (config.yaml custom_providers list) ───
|
||||
try:
|
||||
from hermes_cli.runtime_provider import _get_named_custom_provider
|
||||
custom_entry = _get_named_custom_provider(provider)
|
||||
@@ -1876,53 +1655,16 @@ def resolve_provider_client(
|
||||
if not custom_key and custom_key_env:
|
||||
custom_key = os.getenv(custom_key_env, "").strip()
|
||||
custom_key = custom_key or "no-key-required"
|
||||
# An explicit per-task api_mode override (from _resolve_task_provider_model)
|
||||
# wins; otherwise fall back to what the provider entry declared.
|
||||
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
|
||||
if custom_base:
|
||||
final_model = _normalize_resolved_model(
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
|
||||
_extra2 = {"default_query": _dq2} if _dq2 else {}
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
|
||||
provider, final_model, entry_api_mode or "chat_completions")
|
||||
# anthropic_messages: route through the Anthropic Messages API
|
||||
# via AnthropicAuxiliaryClient. Mirrors the anonymous-custom
|
||||
# branch in _try_custom_endpoint(). See #15033.
|
||||
if entry_api_mode == "anthropic_messages":
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
real_client = build_anthropic_client(custom_key, custom_base)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"Named custom provider %r declares api_mode="
|
||||
"anthropic_messages but the anthropic SDK is not "
|
||||
"installed — falling back to OpenAI-wire.",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, custom_key, custom_base, is_oauth=False,
|
||||
)
|
||||
if async_mode:
|
||||
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
|
||||
return sync_anthropic, final_model
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
# codex_responses or inherited auto-detect (via _wrap_if_needed).
|
||||
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
|
||||
# override). Named-provider entry api_mode=codex_responses also
|
||||
# flows through here.
|
||||
if entry_api_mode == "codex_responses" and not isinstance(
|
||||
client, CodexAuxiliaryClient
|
||||
):
|
||||
client = CodexAuxiliaryClient(client, final_model)
|
||||
else:
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
"resolve_provider_client: named custom provider %r (%s)",
|
||||
provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning(
|
||||
@@ -1987,7 +1729,7 @@ def resolve_provider_client(
|
||||
# Provider-specific headers
|
||||
headers = {}
|
||||
if base_url_host_matches(base_url, "api.kimi.com"):
|
||||
headers["User-Agent"] = "claude-code/0.1.0"
|
||||
headers["User-Agent"] = "KimiCLI/1.30.0"
|
||||
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
@@ -2055,39 +1797,6 @@ def resolve_provider_client(
|
||||
"directly supported", provider)
|
||||
return None, None
|
||||
|
||||
elif pconfig.auth_type == "aws_sdk":
|
||||
# AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
|
||||
# boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
|
||||
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||
except ImportError:
|
||||
logger.warning("resolve_provider_client: bedrock requested but "
|
||||
"boto3 or anthropic SDK not installed")
|
||||
return None, None
|
||||
|
||||
if not has_aws_credentials():
|
||||
logger.debug("resolve_provider_client: bedrock requested but "
|
||||
"no AWS credentials found")
|
||||
return None, None
|
||||
|
||||
region = resolve_bedrock_region()
|
||||
default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
try:
|
||||
real_client = build_anthropic_bedrock_client(region)
|
||||
except ImportError as exc:
|
||||
logger.warning("resolve_provider_client: cannot create Bedrock "
|
||||
"client: %s", exc)
|
||||
return None, None
|
||||
client = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, api_key="aws-sdk",
|
||||
base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
|
||||
)
|
||||
logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||
# OAuth providers — route through their specific try functions
|
||||
if provider == "nous":
|
||||
@@ -2252,35 +1961,24 @@ def resolve_vision_provider_client(
|
||||
# _PROVIDER_VISION_MODELS provides per-provider vision model
|
||||
# overrides when the provider has a dedicated multimodal model
|
||||
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
|
||||
# zai → glm-5v-turbo). Nous is the exception: it has a dedicated
|
||||
# strict vision backend with tier-aware defaults, so it must not
|
||||
# fall through to the user's text chat model here.
|
||||
# zai → glm-5v-turbo).
|
||||
# 2. OpenRouter (vision-capable aggregator fallback)
|
||||
# 3. Nous Portal (vision-capable aggregator fallback)
|
||||
# 4. Stop
|
||||
main_provider = _read_main_provider()
|
||||
main_model = _read_main_model()
|
||||
if main_provider and main_provider not in ("auto", ""):
|
||||
if main_provider == "nous":
|
||||
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
|
||||
if sync_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
main_provider, default_model or resolved_model or main_model,
|
||||
)
|
||||
return _finalize(main_provider, sync_client, default_model)
|
||||
else:
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
api_mode=resolved_api_mode)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
main_provider, rpc_model or vision_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, rpc_client, rpc_model or vision_model)
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
api_mode=resolved_api_mode)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
main_provider, rpc_model or vision_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, rpc_client, rpc_model or vision_model)
|
||||
|
||||
# Fall back through aggregators (uses their dedicated vision model,
|
||||
# not the user's main model) when main provider has no client.
|
||||
@@ -2355,76 +2053,6 @@ _client_cache_lock = threading.Lock()
|
||||
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
|
||||
|
||||
|
||||
def _client_cache_key(
|
||||
provider: str,
|
||||
*,
|
||||
async_mode: bool,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
) -> tuple:
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
|
||||
|
||||
|
||||
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
|
||||
with _client_cache_lock:
|
||||
old_entry = _client_cache.get(cache_key)
|
||||
if old_entry is not None and old_entry[0] is not client:
|
||||
_force_close_async_httpx(old_entry[0])
|
||||
try:
|
||||
close_fn = getattr(old_entry[0], "close", None)
|
||||
if callable(close_fn):
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
_client_cache[cache_key] = (client, default_model, bound_loop)
|
||||
|
||||
|
||||
def _refresh_nous_auxiliary_client(
|
||||
*,
|
||||
cache_provider: str,
|
||||
model: Optional[str],
|
||||
async_mode: bool,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
|
||||
runtime = _resolve_nous_runtime_api(force_refresh=True)
|
||||
if runtime is None:
|
||||
return None, model
|
||||
|
||||
fresh_key, fresh_base_url = runtime
|
||||
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
|
||||
final_model = model
|
||||
|
||||
current_loop = None
|
||||
if async_mode:
|
||||
try:
|
||||
import asyncio as _aio
|
||||
current_loop = _aio.get_event_loop()
|
||||
except RuntimeError:
|
||||
pass
|
||||
client, final_model = _to_async_client(sync_client, final_model or "")
|
||||
else:
|
||||
client = sync_client
|
||||
|
||||
cache_key = _client_cache_key(
|
||||
cache_provider,
|
||||
async_mode=async_mode,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
|
||||
return client, final_model
|
||||
|
||||
|
||||
def neuter_async_httpx_del() -> None:
|
||||
"""Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
|
||||
|
||||
@@ -2578,14 +2206,8 @@ def _get_cached_client(
|
||||
except RuntimeError:
|
||||
pass
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
cache_key = _client_cache_key(
|
||||
provider,
|
||||
async_mode=async_mode,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
|
||||
with _client_cache_lock:
|
||||
if cache_key in _client_cache:
|
||||
cached_client, cached_default, cached_loop = _client_cache[cache_key]
|
||||
@@ -2822,8 +2444,8 @@ def _build_call_kwargs(
|
||||
temperature = fixed_temperature
|
||||
|
||||
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0 on
|
||||
# structured-JSON extraction) don't 400 the moment
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
|
||||
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
|
||||
# the aux model is flipped to 4.7.
|
||||
if temperature is not None:
|
||||
from agent.anthropic_adapter import _forbids_sampling_params
|
||||
@@ -2911,7 +2533,7 @@ def call_llm(
|
||||
|
||||
Args:
|
||||
task: Auxiliary task name ("compression", "vision", "web_extract",
|
||||
"session_search", "skills_hub", "mcp", "title_generation").
|
||||
"session_search", "skills_hub", "mcp", "flush_memories").
|
||||
Reads provider:model from config/env. Ignored if provider is set.
|
||||
provider: Explicit provider override.
|
||||
model: Explicit model override.
|
||||
@@ -3014,45 +2636,13 @@ def call_llm(
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
||||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||
|
||||
# Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
|
||||
# then payment fallback.
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s: provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
# If retry still fails, fall through to the max_tokens /
|
||||
# payment / auth chains below using the temperature-stripped
|
||||
# kwargs. Re-raise only if the retry hit something those
|
||||
# chains won't handle.
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
@@ -3065,72 +2655,6 @@ def call_llm(
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Nous auth refresh parity with main agent ──────────────────
|
||||
client_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
|
||||
)
|
||||
if _is_auth_error(first_err) and client_is_nous:
|
||||
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
|
||||
cache_provider=resolved_provider or "nous",
|
||||
model=final_model,
|
||||
async_mode=False,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
if refreshed_client is not None:
|
||||
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
|
||||
task or "call")
|
||||
if refreshed_model and refreshed_model != kwargs.get("model"):
|
||||
kwargs["model"] = refreshed_model
|
||||
return _validate_llm_response(
|
||||
refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Auth refresh retry ───────────────────────────────────────
|
||||
if (_is_auth_error(first_err)
|
||||
and resolved_provider not in ("auto", "", None)
|
||||
and not client_is_nous):
|
||||
if _refresh_provider_credentials(resolved_provider):
|
||||
logger.info(
|
||||
"Auxiliary %s: refreshed %s credentials after auth error, retrying",
|
||||
task or "call", resolved_provider,
|
||||
)
|
||||
retry_client, retry_model = (
|
||||
resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=final_model,
|
||||
async_mode=False,
|
||||
)[1:]
|
||||
if task == "vision"
|
||||
else _get_cached_client(
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
)
|
||||
if retry_client is not None:
|
||||
retry_kwargs = _build_call_kwargs(
|
||||
resolved_provider,
|
||||
retry_model or final_model,
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=tools,
|
||||
timeout=effective_timeout,
|
||||
extra_body=effective_extra_body,
|
||||
base_url=resolved_base_url,
|
||||
)
|
||||
_retry_base = str(getattr(retry_client, "base_url", "") or "")
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
|
||||
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
|
||||
return _validate_llm_response(
|
||||
retry_client.chat.completions.create(**retry_kwargs), task)
|
||||
|
||||
# ── Payment / credit exhaustion fallback ──────────────────────
|
||||
# When the resolved provider returns 402 or a credit-related error,
|
||||
# try alternative providers instead of giving up. This handles the
|
||||
@@ -3315,35 +2839,8 @@ async def async_call_llm(
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s (async): provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
@@ -3356,70 +2853,6 @@ async def async_call_llm(
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Nous auth refresh parity with main agent ──────────────────
|
||||
client_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
|
||||
)
|
||||
if _is_auth_error(first_err) and client_is_nous:
|
||||
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
|
||||
cache_provider=resolved_provider or "nous",
|
||||
model=final_model,
|
||||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if refreshed_client is not None:
|
||||
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
|
||||
task or "call")
|
||||
if refreshed_model and refreshed_model != kwargs.get("model"):
|
||||
kwargs["model"] = refreshed_model
|
||||
return _validate_llm_response(
|
||||
await refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Auth refresh retry (mirrors sync call_llm) ───────────────
|
||||
if (_is_auth_error(first_err)
|
||||
and resolved_provider not in ("auto", "", None)
|
||||
and not client_is_nous):
|
||||
if _refresh_provider_credentials(resolved_provider):
|
||||
logger.info(
|
||||
"Auxiliary %s (async): refreshed %s credentials after auth error, retrying",
|
||||
task or "call", resolved_provider,
|
||||
)
|
||||
if task == "vision":
|
||||
_, retry_client, retry_model = resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=final_model,
|
||||
async_mode=True,
|
||||
)
|
||||
else:
|
||||
retry_client, retry_model = _get_cached_client(
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if retry_client is not None:
|
||||
retry_kwargs = _build_call_kwargs(
|
||||
resolved_provider,
|
||||
retry_model or final_model,
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=tools,
|
||||
timeout=effective_timeout,
|
||||
extra_body=effective_extra_body,
|
||||
base_url=resolved_base_url,
|
||||
)
|
||||
_retry_base = str(getattr(retry_client, "base_url", "") or "")
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
|
||||
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
|
||||
return _validate_llm_response(
|
||||
await retry_client.chat.completions.create(**retry_kwargs), task)
|
||||
|
||||
# ── Payment / connection fallback (mirrors sync call_llm) ─────
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
|
||||
+2
-130
@@ -87,114 +87,6 @@ def reset_client_cache():
|
||||
_bedrock_control_client_cache.clear()
|
||||
|
||||
|
||||
def invalidate_runtime_client(region: str) -> bool:
|
||||
"""Evict the cached ``bedrock-runtime`` client for a single region.
|
||||
|
||||
Per-region counterpart to :func:`reset_client_cache`. Used by the converse
|
||||
call wrappers to discard clients whose underlying HTTP connection has
|
||||
gone stale, so the next call allocates a fresh client (with a fresh
|
||||
connection pool) instead of reusing a dead socket.
|
||||
|
||||
Returns True if a cached entry was evicted, False if the region was not
|
||||
cached.
|
||||
"""
|
||||
existed = region in _bedrock_runtime_client_cache
|
||||
_bedrock_runtime_client_cache.pop(region, None)
|
||||
return existed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale-connection detection
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# boto3 caches its HTTPS connection pool inside the client object. When a
|
||||
# pooled connection is killed out from under us (NAT timeout, VPN flap,
|
||||
# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
|
||||
# one of a handful of low-level exceptions — most commonly
|
||||
# ``botocore.exceptions.ConnectionClosedError`` or
|
||||
# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
|
||||
# ``assert`` in a couple of paths (connection pool state checks, chunked
|
||||
# response readers) which bubbles up as a bare ``AssertionError`` with an
|
||||
# empty ``str(exc)``.
|
||||
#
|
||||
# In all of these cases the client is the problem, not the request: retrying
|
||||
# with the same cached client reproduces the failure until the process
|
||||
# restarts. The fix is to evict the region's cached client so the next
|
||||
# attempt builds a new one.
|
||||
|
||||
_STALE_LIB_MODULE_PREFIXES = (
|
||||
"urllib3.",
|
||||
"botocore.",
|
||||
"boto3.",
|
||||
)
|
||||
|
||||
|
||||
def _traceback_frames_modules(exc: BaseException):
|
||||
"""Yield ``__name__``-style module strings for each frame in exc's traceback."""
|
||||
tb = getattr(exc, "__traceback__", None)
|
||||
while tb is not None:
|
||||
frame = tb.tb_frame
|
||||
module = frame.f_globals.get("__name__", "")
|
||||
yield module or ""
|
||||
tb = tb.tb_next
|
||||
|
||||
|
||||
def is_stale_connection_error(exc: BaseException) -> bool:
|
||||
"""Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
|
||||
|
||||
Matches:
|
||||
* ``botocore.exceptions.ConnectionError`` and subclasses
|
||||
(``ConnectionClosedError``, ``EndpointConnectionError``,
|
||||
``ReadTimeoutError``, ``ConnectTimeoutError``).
|
||||
* ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
|
||||
``ConnectionError`` (best-effort import — urllib3 is a transitive
|
||||
dependency of botocore so it is always available in practice).
|
||||
* Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
|
||||
or boto3. These are internal-invariant failures (typically triggered
|
||||
by corrupted connection-pool state after a dropped socket) and are
|
||||
recoverable by swapping the client.
|
||||
|
||||
Non-library ``AssertionError``s (from application code or tests) are
|
||||
intentionally not matched — only library-internal asserts signal stale
|
||||
connection state.
|
||||
"""
|
||||
# botocore: the canonical signal — HTTPClientError is the umbrella for
|
||||
# ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
|
||||
# ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
|
||||
# the same family via a different branch of the hierarchy.
|
||||
try:
|
||||
from botocore.exceptions import (
|
||||
ConnectionError as BotoConnectionError,
|
||||
HTTPClientError,
|
||||
)
|
||||
botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
|
||||
except ImportError: # pragma: no cover — botocore always present with boto3
|
||||
botocore_errors = ()
|
||||
if botocore_errors and isinstance(exc, botocore_errors):
|
||||
return True
|
||||
|
||||
# urllib3: low-level transport failures
|
||||
try:
|
||||
from urllib3.exceptions import (
|
||||
ProtocolError,
|
||||
NewConnectionError,
|
||||
ConnectionError as Urllib3ConnectionError,
|
||||
)
|
||||
urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
|
||||
except ImportError: # pragma: no cover
|
||||
urllib3_errors = ()
|
||||
if urllib3_errors and isinstance(exc, urllib3_errors):
|
||||
return True
|
||||
|
||||
# Library-internal AssertionError (urllib3 / botocore / boto3)
|
||||
if isinstance(exc, AssertionError):
|
||||
for module in _traceback_frames_modules(exc):
|
||||
if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AWS credential detection
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -895,17 +787,7 @@ def call_converse(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
try:
|
||||
response = client.converse(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse(region=%s, model=%s): "
|
||||
"%s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
response = client.converse(**kwargs)
|
||||
return normalize_converse_response(response)
|
||||
|
||||
|
||||
@@ -937,17 +819,7 @@ def call_converse_stream(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
try:
|
||||
response = client.converse_stream(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse_stream(region=%s, "
|
||||
"model=%s): %s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
response = client.converse_stream(**kwargs)
|
||||
return normalize_converse_stream_events(response)
|
||||
|
||||
|
||||
|
||||
@@ -23,52 +23,26 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Matches Codex/Harmony tool-call serialization that occasionally leaks into
|
||||
# assistant-message content when the model fails to emit a structured
|
||||
# ``function_call`` item. Accepts the common forms:
|
||||
#
|
||||
# to=functions.exec_command
|
||||
# assistant to=functions.exec_command
|
||||
# <|channel|>commentary to=functions.exec_command
|
||||
#
|
||||
# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
|
||||
# Harmony channel prefix varies by degeneration mode. Case-insensitive to
|
||||
# cover lowercase/uppercase ``assistant`` variants.
|
||||
_TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
The ``role`` parameter controls the text content type:
|
||||
- ``"user"`` (default) → ``"input_text"``
|
||||
- ``"assistant"`` → ``"output_text"``
|
||||
|
||||
The Responses API rejects ``input_text`` inside assistant messages and
|
||||
``output_text`` inside user messages, so callers MUST pass the correct
|
||||
role for the message being converted.
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": text_type, "text": part})
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
@@ -76,7 +50,7 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": text_type, "text": text})
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
@@ -227,23 +201,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
|
||||
|
||||
|
||||
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
|
||||
"""Normalize a Responses assistant message status for replay.
|
||||
|
||||
The API accepts completed/incomplete/in_progress on replayed assistant
|
||||
output messages. Preserve those exactly (modulo case/hyphen spelling) so
|
||||
incomplete Codex continuation turns don't get falsely marked completed.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
status = value.strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if status in _RESPONSE_MESSAGE_STATUSES:
|
||||
return status
|
||||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
items: List[Dict[str, Any]] = []
|
||||
@@ -259,10 +216,9 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content, role=role)
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == text_type
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
@@ -289,57 +245,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
seen_item_ids.add(item_id)
|
||||
has_codex_reasoning = True
|
||||
|
||||
# Replay exact assistant message items (with id/phase) from
|
||||
# previous turns so the API can maintain prefix-cache hits.
|
||||
# OpenAI docs: "preserve and resend phase on all assistant
|
||||
# messages — dropping it can degrade performance."
|
||||
codex_message_items = msg.get("codex_message_items")
|
||||
replayed_message_items = 0
|
||||
if isinstance(codex_message_items, list):
|
||||
for raw_item in codex_message_items:
|
||||
if not isinstance(raw_item, dict):
|
||||
continue
|
||||
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
|
||||
continue
|
||||
raw_content_parts = raw_item.get("content")
|
||||
if not isinstance(raw_content_parts, list):
|
||||
continue
|
||||
|
||||
normalized_content_parts = []
|
||||
for part in raw_content_parts:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
part_type = str(part.get("type") or "").strip()
|
||||
if part_type not in {"output_text", "text"}:
|
||||
continue
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content_parts.append({"type": "output_text", "text": text})
|
||||
|
||||
if not normalized_content_parts:
|
||||
continue
|
||||
|
||||
replay_item = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(raw_item.get("status")),
|
||||
"content": normalized_content_parts,
|
||||
}
|
||||
item_id = raw_item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
replay_item["id"] = item_id.strip()
|
||||
phase = raw_item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
replay_item["phase"] = phase.strip()
|
||||
items.append(replay_item)
|
||||
replayed_message_items += 1
|
||||
|
||||
if replayed_message_items > 0:
|
||||
pass
|
||||
elif content_parts:
|
||||
if content_parts:
|
||||
items.append({"role": "assistant", "content": content_parts})
|
||||
elif content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
@@ -499,47 +405,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
normalized.append(reasoning_item)
|
||||
continue
|
||||
|
||||
if item_type == "message":
|
||||
role = item.get("role")
|
||||
if role != "assistant":
|
||||
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
|
||||
content = item.get("content")
|
||||
if not isinstance(content, list):
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
|
||||
normalized_content = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
|
||||
)
|
||||
part_type = part.get("type")
|
||||
if part_type not in {"output_text", "text"}:
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
|
||||
)
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content.append({"type": "output_text", "text": text})
|
||||
if not normalized_content:
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
|
||||
normalized_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item.get("status")),
|
||||
"content": normalized_content,
|
||||
}
|
||||
item_id = item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
normalized_item["id"] = item_id.strip()
|
||||
phase = item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
normalized_item["phase"] = phase.strip()
|
||||
normalized.append(normalized_item)
|
||||
continue
|
||||
|
||||
role = item.get("role")
|
||||
if role in {"user", "assistant"}:
|
||||
content = item.get("content", "")
|
||||
@@ -547,16 +412,13 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``output_text``
|
||||
# / ``input_image``). Validate each part and pass through.
|
||||
# Use the correct text type for the role — ``output_text`` for
|
||||
# assistant messages, ``input_text`` for user messages.
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": text_type, "text": part})
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
@@ -567,7 +429,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": text_type, "text": text})
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
@@ -824,7 +686,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
content_parts: List[str] = []
|
||||
reasoning_parts: List[str] = []
|
||||
reasoning_items_raw: List[Dict[str, Any]] = []
|
||||
message_items_raw: List[Dict[str, Any]] = []
|
||||
tool_calls: List[Any] = []
|
||||
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
||||
saw_commentary_phase = False
|
||||
@@ -843,7 +704,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if item_type == "message":
|
||||
item_phase = getattr(item, "phase", None)
|
||||
normalized_phase = None
|
||||
if isinstance(item_phase, str):
|
||||
normalized_phase = item_phase.strip().lower()
|
||||
if normalized_phase in {"commentary", "analysis"}:
|
||||
@@ -853,18 +713,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
message_text = _extract_responses_message_text(item)
|
||||
if message_text:
|
||||
content_parts.append(message_text)
|
||||
raw_message_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item_status),
|
||||
"content": [{"type": "output_text", "text": message_text}],
|
||||
}
|
||||
item_id = getattr(item, "id", None)
|
||||
if isinstance(item_id, str) and item_id:
|
||||
raw_message_item["id"] = item_id
|
||||
if normalized_phase:
|
||||
raw_message_item["phase"] = normalized_phase
|
||||
message_items_raw.append(raw_message_item)
|
||||
elif item_type == "reasoning":
|
||||
reasoning_text = _extract_responses_reasoning_text(item)
|
||||
if reasoning_text:
|
||||
@@ -939,37 +787,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
if isinstance(out_text, str):
|
||||
final_text = out_text.strip()
|
||||
|
||||
# ── Tool-call leak recovery ──────────────────────────────────
|
||||
# gpt-5.x on the Codex Responses API sometimes degenerates and emits
|
||||
# what should be a structured `function_call` item as plain assistant
|
||||
# text using the Harmony/Codex serialization (``to=functions.foo
|
||||
# {json}`` or ``assistant to=functions.foo {json}``). The model
|
||||
# intended to call a tool, but the intent never made it into
|
||||
# ``response.output`` as a ``function_call`` item, so ``tool_calls``
|
||||
# is empty here. If we pass this through, the parent sees a
|
||||
# confident-looking summary with no audit trail (empty ``tool_trace``)
|
||||
# and no tools actually ran — the Taiwan-embassy-email incident.
|
||||
#
|
||||
# Detection: leaked tokens always contain ``to=functions.<name>`` and
|
||||
# the assistant message has no real tool calls. Treat it as incomplete
|
||||
# so the existing Codex-incomplete continuation path (3 retries,
|
||||
# handled in run_agent.py) gets a chance to re-elicit a proper
|
||||
# ``function_call`` item. The existing loop already handles message
|
||||
# append, dedup, and retry budget.
|
||||
leaked_tool_call_text = False
|
||||
if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
|
||||
leaked_tool_call_text = True
|
||||
logger.warning(
|
||||
"Codex response contains leaked tool-call text in assistant content "
|
||||
"(no structured function_call items). Treating as incomplete so the "
|
||||
"continuation path can re-elicit a proper tool call. Leaked snippet: %r",
|
||||
final_text[:300],
|
||||
)
|
||||
# Clear the text so downstream code doesn't surface the garbage as
|
||||
# a summary. The encrypted reasoning items (if any) are preserved
|
||||
# so the model keeps its chain-of-thought on the retry.
|
||||
final_text = ""
|
||||
|
||||
assistant_message = SimpleNamespace(
|
||||
content=final_text,
|
||||
tool_calls=tool_calls,
|
||||
@@ -977,13 +794,10 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=reasoning_items_raw or None,
|
||||
codex_message_items=message_items_raw or None,
|
||||
)
|
||||
|
||||
if tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif leaked_tool_call_text:
|
||||
finish_reason = "incomplete"
|
||||
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
||||
finish_reason = "incomplete"
|
||||
elif reasoning_items_raw and not final_text:
|
||||
|
||||
@@ -64,47 +64,6 @@ _CHARS_PER_TOKEN = 4
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
def _content_text_for_contains(content: Any) -> str:
|
||||
"""Return a best-effort text view of message content.
|
||||
|
||||
Used only for substring checks when we need to know whether we've already
|
||||
appended a note to a message. Keeps multimodal lists intact elsewhere.
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
parts.append(item)
|
||||
elif isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
return "\n".join(part for part in parts if part)
|
||||
return str(content)
|
||||
|
||||
|
||||
def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
|
||||
"""Append or prepend plain text to message content safely.
|
||||
|
||||
Compression sometimes needs to add a note or merge a summary into an
|
||||
existing message. Message content may be plain text or a multimodal list of
|
||||
blocks, so direct string concatenation is not always safe.
|
||||
"""
|
||||
if content is None:
|
||||
return text
|
||||
if isinstance(content, str):
|
||||
return text + content if prepend else content + text
|
||||
if isinstance(content, list):
|
||||
text_block = {"type": "text", "text": text}
|
||||
return [text_block, *content] if prepend else [*content, text_block]
|
||||
rendered = str(content)
|
||||
return text + rendered if prepend else rendered + text
|
||||
|
||||
|
||||
def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
|
||||
"""Shrink long string values inside a tool-call arguments JSON blob while
|
||||
preserving JSON validity.
|
||||
@@ -294,7 +253,6 @@ class ContextCompressor(ContextEngine):
|
||||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
@@ -318,13 +276,6 @@ class ContextCompressor(ContextEngine):
|
||||
int(context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Recalculate token budgets for the new context length so the
|
||||
# compressor stays calibrated after a model switch (e.g. 200K → 32K).
|
||||
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
|
||||
self.tail_token_budget = target_tokens
|
||||
self.max_summary_tokens = min(
|
||||
int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -397,7 +348,6 @@ class ContextCompressor(ContextEngine):
|
||||
self._last_compression_savings_pct: float = 100.0
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -821,12 +771,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
@@ -859,15 +807,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
)
|
||||
self.summary_model = "" # empty = use main model
|
||||
self._summary_failure_cooldown_until = 0.0 # no cooldown
|
||||
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately
|
||||
return self._generate_summary(turns_to_summarize) # retry immediately
|
||||
|
||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
@@ -1114,21 +1058,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
|
||||
return max(cut_idx, head_end + 1)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# ContextEngine: manual /compress preflight
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Return True if there is a non-empty middle region to compact.
|
||||
|
||||
Overrides the ABC default so the gateway ``/compress`` guard can
|
||||
skip the LLM call when the transcript is still entirely inside
|
||||
the protected head/tail.
|
||||
"""
|
||||
compress_start = self._align_boundary_forward(messages, self.protect_first_n)
|
||||
compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
|
||||
return compress_start < compress_end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main compression entry point
|
||||
# ------------------------------------------------------------------
|
||||
@@ -1215,13 +1144,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
for i in range(compress_start):
|
||||
msg = messages[i].copy()
|
||||
if i == 0 and msg.get("role") == "system":
|
||||
existing = msg.get("content")
|
||||
existing = msg.get("content") or ""
|
||||
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
|
||||
if _compression_note not in _content_text_for_contains(existing):
|
||||
msg["content"] = _append_text_to_content(
|
||||
existing,
|
||||
"\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
|
||||
)
|
||||
if _compression_note not in existing:
|
||||
msg["content"] = existing + "\n\n" + _compression_note
|
||||
compressed.append(msg)
|
||||
|
||||
# If LLM summary failed, insert a static fallback so the model
|
||||
@@ -1265,15 +1191,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
for i in range(compress_end, n_messages):
|
||||
msg = messages[i].copy()
|
||||
if _merge_summary_into_tail and i == compress_end:
|
||||
merged_prefix = (
|
||||
original = msg.get("content") or ""
|
||||
msg["content"] = (
|
||||
summary
|
||||
+ "\n\n--- END OF CONTEXT SUMMARY — "
|
||||
"respond to the message below, not the summary above ---\n\n"
|
||||
)
|
||||
msg["content"] = _append_text_to_content(
|
||||
msg.get("content"),
|
||||
merged_prefix,
|
||||
prepend=True,
|
||||
+ original
|
||||
)
|
||||
_merge_summary_into_tail = False
|
||||
compressed.append(msg)
|
||||
|
||||
@@ -78,7 +78,6 @@ class ContextEngine(ABC):
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
current_tokens: int = None,
|
||||
focus_topic: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Compact the message list and return the new message list.
|
||||
|
||||
@@ -87,12 +86,6 @@ class ContextEngine(ABC):
|
||||
context budget. The implementation is free to summarize, build a
|
||||
DAG, or do anything else — as long as the returned list is a valid
|
||||
OpenAI-format message sequence.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional topic string from manual ``/compress <focus>``.
|
||||
Engines that support guided compression should prioritise
|
||||
preserving information related to this topic. Engines that
|
||||
don't support it may simply ignore this argument.
|
||||
"""
|
||||
|
||||
# -- Optional: pre-flight check ----------------------------------------
|
||||
@@ -105,21 +98,6 @@ class ContextEngine(ABC):
|
||||
"""
|
||||
return False
|
||||
|
||||
# -- Optional: manual /compress preflight ------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick check: is there anything in ``messages`` that can be compacted?
|
||||
|
||||
Used by the gateway ``/compress`` command as a preflight guard —
|
||||
returning False lets the gateway report "nothing to compress yet"
|
||||
without making an LLM call.
|
||||
|
||||
Default returns True (always attempt). Engines with a cheap way
|
||||
to introspect their own head/tail boundaries should override this
|
||||
to return False when the transcript is still entirely protected.
|
||||
"""
|
||||
return True
|
||||
|
||||
# -- Optional: session lifecycle ---------------------------------------
|
||||
|
||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||
|
||||
@@ -46,47 +46,6 @@ def _resolve_args() -> list[str]:
|
||||
return shlex.split(raw)
|
||||
|
||||
|
||||
def _resolve_home_dir() -> str:
|
||||
"""Return a stable HOME for child ACP processes."""
|
||||
|
||||
try:
|
||||
from hermes_constants import get_subprocess_home
|
||||
|
||||
profile_home = get_subprocess_home()
|
||||
if profile_home:
|
||||
return profile_home
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
home = os.environ.get("HOME", "").strip()
|
||||
if home:
|
||||
return home
|
||||
|
||||
expanded = os.path.expanduser("~")
|
||||
if expanded and expanded != "~":
|
||||
return expanded
|
||||
|
||||
try:
|
||||
import pwd
|
||||
|
||||
resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
|
||||
if resolved:
|
||||
return resolved
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Last resort: /tmp (writable on any POSIX system). Avoids crashing the
|
||||
# subprocess with no HOME; callers can set HERMES_HOME explicitly if they
|
||||
# need a different writable dir.
|
||||
return "/tmp"
|
||||
|
||||
|
||||
def _build_subprocess_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = _resolve_home_dir()
|
||||
return env
|
||||
|
||||
|
||||
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
@@ -423,7 +382,6 @@ class CopilotACPClient:
|
||||
text=True,
|
||||
bufsize=1,
|
||||
cwd=self._acp_cwd,
|
||||
env=_build_subprocess_env(),
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
|
||||
+3
-108
@@ -455,61 +455,6 @@ class CredentialPool:
|
||||
logger.debug("Failed to sync from credentials file: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Nous pool entry from auth.json if tokens differ.
|
||||
|
||||
Nous OAuth refresh tokens are single-use. When another process
|
||||
(e.g. a concurrent cron) refreshes the token via
|
||||
``resolve_nous_runtime_credentials``, it writes fresh tokens to
|
||||
auth.json under ``_auth_store_lock``. The pool entry's tokens
|
||||
become stale. This method detects that and adopts the newer pair,
|
||||
avoiding a "refresh token reuse" revocation on the Nous Portal.
|
||||
"""
|
||||
if self.provider != "nous" or entry.source != "device_code":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if not state:
|
||||
return entry
|
||||
store_refresh = state.get("refresh_token", "")
|
||||
store_access = state.get("access_token", "")
|
||||
if store_refresh and store_refresh != entry.refresh_token:
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
}
|
||||
if state.get("expires_at"):
|
||||
field_updates["expires_at"] = state["expires_at"]
|
||||
if state.get("agent_key"):
|
||||
field_updates["agent_key"] = state["agent_key"]
|
||||
if state.get("agent_key_expires_at"):
|
||||
field_updates["agent_key_expires_at"] = state["agent_key_expires_at"]
|
||||
if state.get("inference_base_url"):
|
||||
field_updates["inference_base_url"] = state["inference_base_url"]
|
||||
extra_updates = dict(entry.extra)
|
||||
for extra_key in ("obtained_at", "expires_in", "agent_key_id",
|
||||
"agent_key_expires_in", "agent_key_reused",
|
||||
"agent_key_obtained_at"):
|
||||
val = state.get(extra_key)
|
||||
if val is not None:
|
||||
extra_updates[extra_key] = val
|
||||
updated = replace(entry, extra=extra_updates, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync Nous entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
|
||||
"""Write refreshed pool entry tokens back to auth.json providers.
|
||||
|
||||
@@ -616,9 +561,6 @@ class CredentialPool:
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
)
|
||||
elif self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
nous_state = {
|
||||
"access_token": entry.access_token,
|
||||
"refresh_token": entry.refresh_token,
|
||||
@@ -693,26 +635,6 @@ class CredentialPool:
|
||||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For nous: another process may have consumed the refresh token
|
||||
# between our proactive sync and the HTTP call. Re-sync from
|
||||
# auth.json and adopt the fresh tokens if available.
|
||||
if self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug("Nous refresh failed but auth.json has newer tokens — adopting")
|
||||
updated = replace(
|
||||
synced,
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
last_error_reason=None,
|
||||
last_error_message=None,
|
||||
last_error_reset_at=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
@@ -776,17 +698,6 @@ class CredentialPool:
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For nous entries, sync from auth.json before status checks.
|
||||
# Another process may have successfully refreshed via
|
||||
# resolve_nous_runtime_credentials(), making this entry's
|
||||
# exhausted status stale.
|
||||
if (self.provider == "nous"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
@@ -828,11 +739,8 @@ class CredentialPool:
|
||||
|
||||
if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
|
||||
entry = min(available, key=lambda e: e.request_count)
|
||||
# Increment usage counter so subsequent selections distribute load
|
||||
updated = replace(entry, request_count=entry.request_count + 1)
|
||||
self._replace_entry(entry, updated)
|
||||
self._current_id = entry.id
|
||||
return updated
|
||||
return entry
|
||||
|
||||
if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
|
||||
entry = available[0]
|
||||
@@ -1148,18 +1056,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
"inference_base_url": state.get("inference_base_url"),
|
||||
"agent_key": state.get("agent_key"),
|
||||
"agent_key_expires_at": state.get("agent_key_expires_at"),
|
||||
# Carry the mint/refresh timestamps into the pool so
|
||||
# freshness-sensitive consumers (self-heal hooks, pool
|
||||
# pruning by age) can distinguish just-minted credentials
|
||||
# from stale ones. Without these, fresh device_code
|
||||
# entries get obtained_at=None and look older than they
|
||||
# are (#15099).
|
||||
"obtained_at": state.get("obtained_at"),
|
||||
"expires_in": state.get("expires_in"),
|
||||
"agent_key_id": state.get("agent_key_id"),
|
||||
"agent_key_expires_in": state.get("agent_key_expires_in"),
|
||||
"agent_key_reused": state.get("agent_key_reused"),
|
||||
"agent_key_obtained_at": state.get("agent_key_obtained_at"),
|
||||
"tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
|
||||
"label": seeded_label,
|
||||
},
|
||||
@@ -1170,10 +1066,9 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
# env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN). They don't live in
|
||||
# the auth store or credential pool, so we resolve them here.
|
||||
try:
|
||||
from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
|
||||
from hermes_cli.copilot_auth import resolve_copilot_token
|
||||
token, source = resolve_copilot_token()
|
||||
if token:
|
||||
api_token = get_copilot_api_token(token)
|
||||
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
@@ -1185,7 +1080,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": api_token,
|
||||
"access_token": token,
|
||||
"base_url": pconfig.inference_base_url if pconfig else "",
|
||||
"label": source,
|
||||
},
|
||||
|
||||
+10
-129
@@ -45,7 +45,6 @@ class FailoverReason(enum.Enum):
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
@@ -195,29 +194,6 @@ _MODEL_NOT_FOUND_PATTERNS = [
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# OpenRouter aggregator policy-block patterns.
|
||||
#
|
||||
# When a user's OpenRouter account privacy setting (or a per-request
|
||||
# `provider.data_collection: deny` preference) excludes the only endpoint
|
||||
# serving a model, OpenRouter returns 404 with a *specific* message that is
|
||||
# distinct from "model not found":
|
||||
#
|
||||
# "No endpoints available matching your guardrail restrictions and
|
||||
# data policy. Configure: https://openrouter.ai/settings/privacy"
|
||||
#
|
||||
# We classify this as `provider_policy_blocked` rather than
|
||||
# `model_not_found` because:
|
||||
# - The model *exists* — model_not_found is misleading in logs
|
||||
# - Provider fallback won't help: the account-level setting applies to
|
||||
# every call on the same OpenRouter account
|
||||
# - The error body already contains the fix URL, so the user gets
|
||||
# actionable guidance without us rewriting the message
|
||||
_PROVIDER_POLICY_BLOCKED_PATTERNS = [
|
||||
"no endpoints available matching your guardrail",
|
||||
"no endpoints available matching your data policy",
|
||||
"no endpoints found matching your data policy",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
@@ -244,25 +220,12 @@ _TRANSPORT_ERROR_TYPES = frozenset({
|
||||
"ConnectionAbortedError", "BrokenPipeError",
|
||||
"TimeoutError", "ReadError",
|
||||
"ServerDisconnectedError",
|
||||
# SSL/TLS transport errors — transient mid-stream handshake/record
|
||||
# failures that should retry rather than surface as a stalled session.
|
||||
# ssl.SSLError subclasses OSError (caught by isinstance) but we list
|
||||
# the type names here so provider-wrapped SSL errors (e.g. when the
|
||||
# SDK re-raises without preserving the exception chain) still classify
|
||||
# as transport rather than falling through to the unknown bucket.
|
||||
"SSLError", "SSLZeroReturnError", "SSLWantReadError",
|
||||
"SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
|
||||
# OpenAI SDK errors (not subclasses of Python builtins)
|
||||
"APIConnectionError",
|
||||
"APITimeoutError",
|
||||
})
|
||||
|
||||
# Server disconnect patterns (no status code, but transport-level).
|
||||
# These are the "ambiguous" patterns — a plain connection close could be
|
||||
# transient transport hiccup OR server-side context overflow rejection
|
||||
# (common when the API gateway disconnects instead of returning an HTTP
|
||||
# error for oversized requests). A large session + one of these patterns
|
||||
# triggers the context-overflow-with-compression recovery path.
|
||||
# Server disconnect patterns (no status code, but transport-level)
|
||||
_SERVER_DISCONNECT_PATTERNS = [
|
||||
"server disconnected",
|
||||
"peer closed connection",
|
||||
@@ -273,40 +236,6 @@ _SERVER_DISCONNECT_PATTERNS = [
|
||||
"incomplete chunked read",
|
||||
]
|
||||
|
||||
# SSL/TLS transient failure patterns — intentionally distinct from
|
||||
# _SERVER_DISCONNECT_PATTERNS above.
|
||||
#
|
||||
# An SSL alert mid-stream is almost always a transport-layer hiccup
|
||||
# (flaky network, mid-session TLS renegotiation failure, load balancer
|
||||
# dropping the connection) — NOT a server-side context overflow signal.
|
||||
# So we want the retry path but NOT the compression path; lumping these
|
||||
# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
|
||||
# expensive) context compression on any large-session SSL hiccup.
|
||||
#
|
||||
# The OpenSSL library constructs error codes by prepending a format string
|
||||
# to the uppercased alert reason; OpenSSL 3.x changed the separator
|
||||
# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
|
||||
# which silently stopped matching anything explicit. Matching on the
|
||||
# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
|
||||
# survives future OpenSSL format churn without code changes.
|
||||
_SSL_TRANSIENT_PATTERNS = [
|
||||
# Space-separated (human-readable form, Python ssl module, most SDKs)
|
||||
"bad record mac",
|
||||
"ssl alert",
|
||||
"tls alert",
|
||||
"ssl handshake failure",
|
||||
"tlsv1 alert",
|
||||
"sslv3 alert",
|
||||
# Underscore-separated (OpenSSL error code tokens, e.g.
|
||||
# `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
|
||||
"bad_record_mac",
|
||||
"ssl_alert",
|
||||
"tls_alert",
|
||||
"tls_alert_internal_error",
|
||||
# Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
|
||||
"[ssl:",
|
||||
]
|
||||
|
||||
|
||||
# ── Classification pipeline ─────────────────────────────────────────────
|
||||
|
||||
@@ -326,10 +255,9 @@ def classify_api_error(
|
||||
2. HTTP status code + message-aware refinement
|
||||
3. Error code classification (from body)
|
||||
4. Message pattern matching (billing vs rate_limit vs context vs auth)
|
||||
5. SSL/TLS transient alert patterns → retry as timeout
|
||||
5. Transport error heuristics
|
||||
6. Server disconnect + large session → context overflow
|
||||
7. Transport error heuristics
|
||||
8. Fallback: unknown (retryable with backoff)
|
||||
7. Fallback: unknown (retryable with backoff)
|
||||
|
||||
Args:
|
||||
error: The exception from the API call.
|
||||
@@ -343,11 +271,6 @@ def classify_api_error(
|
||||
"""
|
||||
status_code = _extract_status_code(error)
|
||||
error_type = type(error).__name__
|
||||
# Copilot/GitHub Models RateLimitError may not set .status_code; force 429
|
||||
# so downstream rate-limit handling (classifier reason, pool rotation,
|
||||
# fallback gating) fires correctly instead of misclassifying as generic.
|
||||
if status_code is None and error_type == "RateLimitError":
|
||||
status_code = 429
|
||||
body = _extract_error_body(error)
|
||||
error_code = _extract_error_code(body)
|
||||
|
||||
@@ -465,18 +388,7 @@ def classify_api_error(
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
|
||||
# SSL alerts mid-stream are transport hiccups, not server-side context
|
||||
# overflow signals. Classify before the disconnect check so a large
|
||||
# session doesn't incorrectly trigger context compression when the real
|
||||
# cause is a flaky TLS handshake. Also matches when the error is
|
||||
# wrapped in a generic exception whose message string carries the SSL
|
||||
# alert text but the type isn't ssl.SSLError (happens with some SDKs
|
||||
# that re-raise without chaining).
|
||||
if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 6. Server disconnect + large session → context overflow ─────
|
||||
# ── 5. Server disconnect + large session → context overflow ─────
|
||||
# Must come BEFORE generic transport error catch — a disconnect on
|
||||
# a large session is more likely context overflow than a transient
|
||||
# transport hiccup. Without this ordering, RemoteProtocolError
|
||||
@@ -493,12 +405,12 @@ def classify_api_error(
|
||||
)
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 7. Transport / timeout heuristics ───────────────────────────
|
||||
# ── 6. Transport / timeout heuristics ───────────────────────────
|
||||
|
||||
if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 8. Fallback: unknown ────────────────────────────────────────
|
||||
# ── 7. Fallback: unknown ────────────────────────────────────────
|
||||
|
||||
return _result(FailoverReason.unknown, retryable=True)
|
||||
|
||||
@@ -552,33 +464,17 @@ def _classify_by_status(
|
||||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
# OpenRouter policy-block 404 — distinct from "model not found".
|
||||
# The model exists; the user's account privacy setting excludes the
|
||||
# only endpoint serving it. Falling back to another provider won't
|
||||
# help (same account setting applies). The error body already
|
||||
# contains the fix URL, so just surface it.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
# Generic 404 with no "model not found" signal — could be a wrong
|
||||
# endpoint path (common with local llama.cpp / Ollama / vLLM when
|
||||
# the URL is slightly misconfigured), a proxy routing glitch, or
|
||||
# a transient backend issue. Classifying these as model_not_found
|
||||
# silently falls back to a different provider and tells the model
|
||||
# the model is missing, which is wrong and wastes a turn. Treat
|
||||
# as unknown so the retry loop surfaces the real error instead.
|
||||
# Generic 404 — could be model or endpoint
|
||||
return result_fn(
|
||||
FailoverReason.unknown,
|
||||
retryable=True,
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 413:
|
||||
@@ -680,12 +576,6 @@ def _classify_400(
|
||||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -858,15 +748,6 @@ def _classify_by_message(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Provider policy-block (aggregator-side guardrail) — check before
|
||||
# model_not_found so we don't mis-label as a missing model.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
@@ -44,97 +44,6 @@ def is_native_gemini_base_url(base_url: str) -> bool:
|
||||
return not normalized.endswith("/openai")
|
||||
|
||||
|
||||
def probe_gemini_tier(
|
||||
api_key: str,
|
||||
base_url: str = DEFAULT_GEMINI_BASE_URL,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
timeout: float = 10.0,
|
||||
) -> str:
|
||||
"""Probe a Google AI Studio API key and return its tier.
|
||||
|
||||
Returns one of:
|
||||
|
||||
- ``"free"`` -- key is on the free tier (unusable with Hermes)
|
||||
- ``"paid"`` -- key is on a paid tier
|
||||
- ``"unknown"`` -- probe failed; callers should proceed without blocking.
|
||||
"""
|
||||
key = (api_key or "").strip()
|
||||
if not key:
|
||||
return "unknown"
|
||||
|
||||
normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
|
||||
if not normalized_base:
|
||||
normalized_base = DEFAULT_GEMINI_BASE_URL
|
||||
if normalized_base.lower().endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
|
||||
url = f"{normalized_base}/models/{model}:generateContent"
|
||||
payload = {
|
||||
"contents": [{"role": "user", "parts": [{"text": "hi"}]}],
|
||||
"generationConfig": {"maxOutputTokens": 1},
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=timeout) as client:
|
||||
resp = client.post(
|
||||
url,
|
||||
params={"key": key},
|
||||
json=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("probe_gemini_tier: network error: %s", exc)
|
||||
return "unknown"
|
||||
|
||||
headers_lower = {k.lower(): v for k, v in resp.headers.items()}
|
||||
rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
|
||||
if rpd_header:
|
||||
try:
|
||||
rpd_val = int(rpd_header)
|
||||
except (TypeError, ValueError):
|
||||
rpd_val = None
|
||||
# Published free-tier daily caps (Dec 2025):
|
||||
# gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
|
||||
# Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
|
||||
if rpd_val is not None and rpd_val <= 1000:
|
||||
return "free"
|
||||
if rpd_val is not None and rpd_val > 1000:
|
||||
return "paid"
|
||||
|
||||
if resp.status_code == 429:
|
||||
body_text = ""
|
||||
try:
|
||||
body_text = resp.text or ""
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if "free_tier" in body_text.lower():
|
||||
return "free"
|
||||
return "paid"
|
||||
|
||||
if 200 <= resp.status_code < 300:
|
||||
return "paid"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_free_tier_quota_error(error_message: str) -> bool:
|
||||
"""Return True when a Gemini 429 message indicates free-tier exhaustion."""
|
||||
if not error_message:
|
||||
return False
|
||||
return "free_tier" in error_message.lower()
|
||||
|
||||
|
||||
_FREE_TIER_GUIDANCE = (
|
||||
"\n\nYour Google API key is on the free tier (<= 250 requests/day for "
|
||||
"gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
|
||||
"so the free tier is exhausted in a handful of messages and cannot sustain "
|
||||
"an agent session. Enable billing on your Google Cloud project and "
|
||||
"regenerate the key in a billing-enabled project: "
|
||||
"https://aistudio.google.com/apikey"
|
||||
)
|
||||
|
||||
|
||||
class GeminiAPIError(Exception):
|
||||
"""Error shape compatible with Hermes retry/error classification."""
|
||||
|
||||
@@ -741,12 +650,6 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
|
||||
else:
|
||||
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
# Free-tier quota exhaustion -> append actionable guidance so users who
|
||||
# bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
|
||||
# that the free tier cannot sustain an agent session.
|
||||
if status == 429 and is_free_tier_quota_error(err_message or body_text):
|
||||
message = message + _FREE_TIER_GUIDANCE
|
||||
|
||||
return GeminiAPIError(
|
||||
message,
|
||||
code=code,
|
||||
@@ -801,13 +704,6 @@ class GeminiNativeClient:
|
||||
http_client: Optional[httpx.Client] = None,
|
||||
**_: Any,
|
||||
) -> None:
|
||||
if not (api_key or "").strip():
|
||||
raise RuntimeError(
|
||||
"Gemini native client requires an API key, but none was provided. "
|
||||
"Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
|
||||
"(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
|
||||
"to configure the Google provider."
|
||||
)
|
||||
self.api_key = api_key
|
||||
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
|
||||
if normalized_base.endswith("/openai"):
|
||||
|
||||
@@ -73,20 +73,6 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
|
||||
]
|
||||
continue
|
||||
cleaned[key] = value
|
||||
|
||||
# Gemini's Schema validator requires every ``enum`` entry to be a string,
|
||||
# even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
|
||||
# OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
|
||||
# ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
|
||||
# so we only drop the ``enum`` when it would collide with Gemini's rule.
|
||||
# Keeping ``type: integer`` plus the human-readable description gives the
|
||||
# model enough guidance; the tool handler still validates the value.
|
||||
enum_val = cleaned.get("enum")
|
||||
type_val = cleaned.get("type")
|
||||
if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
|
||||
if any(not isinstance(item, str) for item in enum_val):
|
||||
cleaned.pop("enum", None)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
|
||||
@@ -1,242 +0,0 @@
|
||||
"""
|
||||
Image Generation Provider ABC
|
||||
=============================
|
||||
|
||||
Defines the pluggable-backend interface for image generation. Providers register
|
||||
instances via ``PluginContext.register_image_gen_provider()``; the active one
|
||||
(selected via ``image_gen.provider`` in ``config.yaml``) services every
|
||||
``image_generate`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
|
||||
as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
|
||||
via ``plugins.enabled``).
|
||||
|
||||
Response shape
|
||||
--------------
|
||||
All providers return a dict that :func:`success_response` / :func:`error_response`
|
||||
produce. The tool wrapper JSON-serializes it. Keys:
|
||||
|
||||
success bool
|
||||
image str | None URL or absolute file path
|
||||
model str provider-specific model identifier
|
||||
prompt str echoed prompt
|
||||
aspect_ratio str "landscape" | "square" | "portrait"
|
||||
provider str provider name (for diagnostics)
|
||||
error str only when success=False
|
||||
error_type str only when success=False
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import base64
|
||||
import datetime
|
||||
import logging
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
|
||||
DEFAULT_ASPECT_RATIO = "landscape"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ImageGenProvider(abc.ABC):
|
||||
"""Abstract base class for an image generation backend.
|
||||
|
||||
Subclasses must implement :meth:`generate`. Everything else has sane
|
||||
defaults — override only what your provider needs.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in ``image_gen.provider`` config.
|
||||
|
||||
Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
|
||||
return self.name.title()
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically checks for a required API key. Default: True
|
||||
(providers with no external dependencies are always available).
|
||||
"""
|
||||
return True
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
"""Return catalog entries for ``hermes tools`` model picker.
|
||||
|
||||
Each entry::
|
||||
|
||||
{
|
||||
"id": "gpt-image-1.5", # required
|
||||
"display": "GPT Image 1.5", # optional; defaults to id
|
||||
"speed": "~10s", # optional
|
||||
"strengths": "...", # optional
|
||||
"price": "$...", # optional
|
||||
}
|
||||
|
||||
Default: empty list (provider has no user-selectable models).
|
||||
"""
|
||||
return []
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker.
|
||||
|
||||
Used by ``tools_config.py`` to inject this provider as a row in
|
||||
the Image Generation provider list. Shape::
|
||||
|
||||
{
|
||||
"name": "OpenAI", # picker label
|
||||
"badge": "paid", # optional short tag
|
||||
"tag": "One-line description...", # optional subtitle
|
||||
"env_vars": [ # keys to prompt for
|
||||
{"key": "OPENAI_API_KEY",
|
||||
"prompt": "OpenAI API key",
|
||||
"url": "https://platform.openai.com/api-keys"},
|
||||
],
|
||||
}
|
||||
|
||||
Default: minimal entry derived from ``display_name``. Override to
|
||||
expose API key prompts and custom badges.
|
||||
"""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
"""Return the default model id, or None if not applicable."""
|
||||
models = self.list_models()
|
||||
if models:
|
||||
return models[0].get("id")
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image.
|
||||
|
||||
Implementations should return the dict from :func:`success_response`
|
||||
or :func:`error_response`. ``kwargs`` may contain forward-compat
|
||||
parameters future versions of the schema will expose — implementations
|
||||
should ignore unknown keys.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def resolve_aspect_ratio(value: Optional[str]) -> str:
|
||||
"""Clamp an aspect_ratio value to the valid set, defaulting to landscape.
|
||||
|
||||
Invalid values are coerced rather than rejected so the tool surface is
|
||||
forgiving of agent mistakes.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
v = value.strip().lower()
|
||||
if v in VALID_ASPECT_RATIOS:
|
||||
return v
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
|
||||
|
||||
def _images_cache_dir() -> Path:
|
||||
"""Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
path = get_hermes_home() / "cache" / "images"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def save_b64_image(
|
||||
b64_data: str,
|
||||
*,
|
||||
prefix: str = "image",
|
||||
extension: str = "png",
|
||||
) -> Path:
|
||||
"""Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
|
||||
|
||||
Returns the absolute :class:`Path` to the saved file.
|
||||
|
||||
Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
|
||||
"""
|
||||
raw = base64.b64decode(b64_data)
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
short = uuid.uuid4().hex[:8]
|
||||
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
||||
path.write_bytes(raw)
|
||||
return path
|
||||
|
||||
|
||||
def success_response(
|
||||
*,
|
||||
image: str,
|
||||
model: str,
|
||||
prompt: str,
|
||||
aspect_ratio: str,
|
||||
provider: str,
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform success response dict.
|
||||
|
||||
``image`` may be an HTTP URL or an absolute filesystem path (for b64
|
||||
providers like OpenAI). Callers that need to pass through additional
|
||||
backend-specific fields can supply ``extra``.
|
||||
"""
|
||||
payload: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"image": image,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
if extra:
|
||||
for k, v in extra.items():
|
||||
payload.setdefault(k, v)
|
||||
return payload
|
||||
|
||||
|
||||
def error_response(
|
||||
*,
|
||||
error: str,
|
||||
error_type: str = "provider_error",
|
||||
provider: str = "",
|
||||
model: str = "",
|
||||
prompt: str = "",
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform error response dict."""
|
||||
return {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": error,
|
||||
"error_type": error_type,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
"""
|
||||
Image Generation Provider Registry
|
||||
==================================
|
||||
|
||||
Central map of registered providers. Populated by plugins at import-time via
|
||||
``PluginContext.register_image_gen_provider()``; consumed by the
|
||||
``image_generate`` tool to dispatch each call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
|
||||
If unset, :func:`get_active_provider` applies fallback logic:
|
||||
|
||||
1. If exactly one provider is registered, use it.
|
||||
2. Otherwise if a provider named ``fal`` is registered, use it (legacy
|
||||
default — matches pre-plugin behavior).
|
||||
3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
|
||||
the user at ``hermes tools``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.image_gen_provider import ImageGenProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, ImageGenProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: ImageGenProvider) -> None:
|
||||
"""Register an image generation provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — this makes hot-reload scenarios (tests, dev loops)
|
||||
behave predictably.
|
||||
"""
|
||||
if not isinstance(provider, ImageGenProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects an ImageGenProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Image gen provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
|
||||
else:
|
||||
logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
|
||||
|
||||
|
||||
def list_providers() -> List[ImageGenProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[ImageGenProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
def get_active_provider() -> Optional[ImageGenProvider]:
|
||||
"""Resolve the currently-active provider.
|
||||
|
||||
Reads ``image_gen.provider`` from config.yaml; falls back per the
|
||||
module docstring.
|
||||
"""
|
||||
configured: Optional[str] = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
|
||||
if isinstance(section, dict):
|
||||
raw = section.get("provider")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
configured = raw.strip()
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read image_gen.provider from config: %s", exc)
|
||||
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None:
|
||||
return provider
|
||||
logger.debug(
|
||||
"image_gen.provider='%s' configured but not registered; falling back",
|
||||
configured,
|
||||
)
|
||||
|
||||
# Fallback: single-provider case
|
||||
if len(snapshot) == 1:
|
||||
return next(iter(snapshot.values()))
|
||||
|
||||
# Fallback: prefer legacy FAL for backward compat
|
||||
if "fal" in snapshot:
|
||||
return snapshot["fal"]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
+2
-43
@@ -31,7 +31,6 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
@@ -313,39 +312,7 @@ class MemoryManager:
|
||||
)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
@staticmethod
|
||||
def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
|
||||
"""Return how to pass metadata to a provider's memory-write hook."""
|
||||
try:
|
||||
signature = inspect.signature(provider.on_memory_write)
|
||||
except (TypeError, ValueError):
|
||||
return "keyword"
|
||||
|
||||
params = list(signature.parameters.values())
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
|
||||
return "keyword"
|
||||
if "metadata" in signature.parameters:
|
||||
return "keyword"
|
||||
|
||||
accepted = [
|
||||
p for p in params
|
||||
if p.kind in (
|
||||
inspect.Parameter.POSITIONAL_ONLY,
|
||||
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
||||
inspect.Parameter.KEYWORD_ONLY,
|
||||
)
|
||||
]
|
||||
if len(accepted) >= 4:
|
||||
return "positional"
|
||||
return "legacy"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Notify external providers when the built-in memory tool writes.
|
||||
|
||||
Skips the builtin provider itself (it's the source of the write).
|
||||
@@ -354,15 +321,7 @@ class MemoryManager:
|
||||
if provider.name == "builtin":
|
||||
continue
|
||||
try:
|
||||
metadata_mode = self._provider_memory_write_metadata_mode(provider)
|
||||
if metadata_mode == "keyword":
|
||||
provider.on_memory_write(
|
||||
action, target, content, metadata=dict(metadata or {})
|
||||
)
|
||||
elif metadata_mode == "positional":
|
||||
provider.on_memory_write(action, target, content, dict(metadata or {}))
|
||||
else:
|
||||
provider.on_memory_write(action, target, content)
|
||||
provider.on_memory_write(action, target, content)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_memory_write failed: %s",
|
||||
|
||||
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
|
||||
on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
|
||||
on_session_end(messages) — end-of-session extraction
|
||||
on_pre_compress(messages) -> str — extract before context compression
|
||||
on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
|
||||
on_memory_write(action, target, content) — mirror built-in memory writes
|
||||
on_delegation(task, result, **kwargs) — parent-side observation of subagent work
|
||||
"""
|
||||
|
||||
@@ -34,7 +34,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -220,21 +220,12 @@ class MemoryProvider(ABC):
|
||||
should all have ``env_var`` set and this method stays no-op).
|
||||
"""
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Called when the built-in memory tool writes an entry.
|
||||
|
||||
action: 'add', 'replace', or 'remove'
|
||||
target: 'memory' or 'user'
|
||||
content: the entry content
|
||||
metadata: structured provenance for the write, when available. Common
|
||||
keys include ``write_origin``, ``execution_context``, ``session_id``,
|
||||
``parent_session_id``, ``platform``, and ``tool_name``.
|
||||
|
||||
Use to mirror built-in memory writes to your backend.
|
||||
"""
|
||||
|
||||
+33
-290
@@ -4,9 +4,7 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
|
||||
and run_agent.py for pre-flight context checks.
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
@@ -22,31 +20,12 @@ from hermes_constants import OPENROUTER_MODELS_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_requests_verify() -> bool | str:
|
||||
"""Resolve SSL verify setting for `requests` calls from env vars.
|
||||
|
||||
The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
|
||||
by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
|
||||
and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
|
||||
that a single env var can cover both `requests` and `httpx` callsites
|
||||
inside the same process.
|
||||
|
||||
Returns either a filesystem path to a CA bundle, or True to defer to
|
||||
the requests default (certifi).
|
||||
"""
|
||||
for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"):
|
||||
val = os.getenv(env_var)
|
||||
if val and os.path.isfile(val):
|
||||
return val
|
||||
return True
|
||||
|
||||
# Provider names that can appear as a "provider:" prefix before a model ID.
|
||||
# Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b")
|
||||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
@@ -57,7 +36,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
|
||||
"ollama",
|
||||
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"arcee-ai", "arceeai",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
@@ -72,13 +51,6 @@ _OLLAMA_TAG_PATTERN = re.compile(
|
||||
)
|
||||
|
||||
|
||||
# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
|
||||
# block, so without an explicit check Ollama reached over Tailscale (e.g.
|
||||
# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
|
||||
# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
|
||||
_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
|
||||
|
||||
|
||||
def _strip_provider_prefix(model: str) -> str:
|
||||
"""Strip a recognised provider prefix from a model string.
|
||||
|
||||
@@ -106,11 +78,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
|
||||
_ENDPOINT_MODEL_CACHE_TTL = 300
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start at 256K (covers GPT-5.x, many current large-context models) and
|
||||
# step down on context-length errors until one works. Tier[0] is also the
|
||||
# default fallback when no detection method succeeds.
|
||||
# We start at 128K (a safe default for most modern models) and step down
|
||||
# on context-length errors until one works.
|
||||
CONTEXT_PROBE_TIERS = [
|
||||
256_000,
|
||||
128_000,
|
||||
64_000,
|
||||
32_000,
|
||||
@@ -145,11 +115,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
|
||||
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
|
||||
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
|
||||
# This hardcoded value is only reached when every probe misses.
|
||||
"gpt-5.5": 1050000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -160,22 +125,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
# Google
|
||||
"gemini": 1048576,
|
||||
# Gemma (open models served via AI Studio)
|
||||
"gemma-4": 256000, # Gemma 4 family
|
||||
"gemma4": 256000, # Ollama-style naming (e.g. gemma4:31b-cloud)
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
# DeepSeek
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
@@ -220,12 +173,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"moonshotai/Kimi-K2.6": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 262144,
|
||||
"mimo-v2-pro": 1048576,
|
||||
"mimo-v2.5-pro": 1048576,
|
||||
"mimo-v2.5": 1048576,
|
||||
"mimo-v2-omni": 262144,
|
||||
"mimo-v2-flash": 262144,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 256000,
|
||||
"mimo-v2-pro": 1000000,
|
||||
"mimo-v2-omni": 256000,
|
||||
"mimo-v2-flash": 256000,
|
||||
"zai-org/GLM-5": 202752,
|
||||
}
|
||||
|
||||
@@ -240,7 +191,6 @@ _CONTEXT_LENGTH_KEYS = (
|
||||
"max_seq_len",
|
||||
"n_ctx_train",
|
||||
"n_ctx",
|
||||
"ctx_size",
|
||||
)
|
||||
|
||||
_MAX_COMPLETION_KEYS = (
|
||||
@@ -284,12 +234,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"chatgpt.com": "openai",
|
||||
"api.anthropic.com": "anthropic",
|
||||
"api.z.ai": "zai",
|
||||
"open.bigmodel.cn": "zai",
|
||||
"api.moonshot.ai": "kimi-coding",
|
||||
"api.moonshot.cn": "kimi-coding-cn",
|
||||
"api.kimi.com": "kimi-coding",
|
||||
"api.stepfun.ai": "stepfun",
|
||||
"api.stepfun.com": "stepfun",
|
||||
"api.arcee.ai": "arcee",
|
||||
"api.minimax": "minimax",
|
||||
"dashscope.aliyuncs.com": "alibaba",
|
||||
@@ -334,15 +281,7 @@ def _is_known_provider_base_url(base_url: str) -> bool:
|
||||
|
||||
|
||||
def is_local_endpoint(base_url: str) -> bool:
|
||||
"""Return True if base_url points to a local machine.
|
||||
|
||||
Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
|
||||
container-internal DNS names (``host.docker.internal`` et al.),
|
||||
RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
|
||||
link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
|
||||
is included so remote-but-trusted Ollama boxes reached over a
|
||||
Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
|
||||
"""
|
||||
"""Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
|
||||
normalized = _normalize_base_url(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
@@ -357,17 +296,14 @@ def is_local_endpoint(base_url: str) -> bool:
|
||||
# Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
|
||||
if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
|
||||
return True
|
||||
# RFC-1918 private ranges, link-local, and Tailscale CGNAT
|
||||
# RFC-1918 private ranges and link-local
|
||||
import ipaddress
|
||||
try:
|
||||
addr = ipaddress.ip_address(host)
|
||||
if addr.is_private or addr.is_loopback or addr.is_link_local:
|
||||
return True
|
||||
if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
|
||||
return True
|
||||
return addr.is_private or addr.is_loopback or addr.is_link_local
|
||||
except ValueError:
|
||||
pass
|
||||
# Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
|
||||
# or Tailscale CGNAT (100.64.x.x–100.127.x.x).
|
||||
parts = host.split(".")
|
||||
if len(parts) == 4:
|
||||
try:
|
||||
@@ -378,8 +314,6 @@ def is_local_endpoint(base_url: str) -> bool:
|
||||
return True
|
||||
if first == 192 and second == 168:
|
||||
return True
|
||||
if first == 100 and 64 <= second <= 127:
|
||||
return True
|
||||
except ValueError:
|
||||
pass
|
||||
return False
|
||||
@@ -528,7 +462,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
|
||||
return _model_metadata_cache
|
||||
|
||||
try:
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
@@ -595,7 +529,6 @@ def fetch_endpoint_model_metadata(
|
||||
server_url.rstrip("/") + "/api/v1/models",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
verify=_resolve_requests_verify(),
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
@@ -644,7 +577,7 @@ def fetch_endpoint_model_metadata(
|
||||
for candidate in candidates:
|
||||
url = candidate.rstrip("/") + "/models"
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
cache: Dict[str, Dict[str, Any]] = {}
|
||||
@@ -675,10 +608,9 @@ def fetch_endpoint_model_metadata(
|
||||
try:
|
||||
# Try /v1/props first (current llama.cpp); fall back to /props for older builds
|
||||
base = candidate.rstrip("/").replace("/v1", "")
|
||||
_verify = _resolve_requests_verify()
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify)
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
|
||||
if not props_resp.ok:
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify)
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5)
|
||||
if props_resp.ok:
|
||||
props = props_resp.json()
|
||||
gen_settings = props.get("default_generation_settings", {})
|
||||
@@ -750,22 +682,6 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
|
||||
return cache.get(key)
|
||||
|
||||
|
||||
def _invalidate_cached_context_length(model: str, base_url: str) -> None:
|
||||
"""Drop a stale cache entry so it gets re-resolved on the next lookup."""
|
||||
key = f"{model}@{base_url}"
|
||||
cache = _load_context_cache()
|
||||
if key not in cache:
|
||||
return
|
||||
del cache[key]
|
||||
path = _get_context_cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w") as f:
|
||||
yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to invalidate context length cache entry %s: %s", key, e)
|
||||
|
||||
|
||||
def get_next_probe_tier(current_length: int) -> Optional[int]:
|
||||
"""Return the next lower probe tier, or None if already at minimum."""
|
||||
for tier in CONTEXT_PROBE_TIERS:
|
||||
@@ -1043,7 +959,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
}
|
||||
resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
|
||||
resp = requests.get(url, headers=headers, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
data = resp.json()
|
||||
@@ -1057,116 +973,6 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
||||
return None
|
||||
|
||||
|
||||
# Known ChatGPT Codex OAuth context windows (observed via live
|
||||
# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
|
||||
# `context_window` values, which are what Codex actually enforces — the
|
||||
# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
|
||||
# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
|
||||
#
|
||||
# Used as a fallback when the live probe fails (no token, network error).
|
||||
# Longest keys first so substring match picks the most specific entry.
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
|
||||
"gpt-5.1-codex-max": 272_000,
|
||||
"gpt-5.1-codex-mini": 272_000,
|
||||
"gpt-5.3-codex": 272_000,
|
||||
"gpt-5.2-codex": 272_000,
|
||||
"gpt-5.4-mini": 272_000,
|
||||
"gpt-5.5": 272_000,
|
||||
"gpt-5.4": 272_000,
|
||||
"gpt-5.2": 272_000,
|
||||
"gpt-5": 272_000,
|
||||
}
|
||||
|
||||
|
||||
_codex_oauth_context_cache: Dict[str, int] = {}
|
||||
_codex_oauth_context_cache_time: float = 0.0
|
||||
_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
|
||||
"""Probe the ChatGPT Codex /models endpoint for per-slug context windows.
|
||||
|
||||
Codex OAuth imposes its own context limits that differ from the direct
|
||||
OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
|
||||
`context_window` field in each model entry is the authoritative source.
|
||||
|
||||
Returns a ``{slug: context_window}`` dict. Empty on failure.
|
||||
"""
|
||||
global _codex_oauth_context_cache, _codex_oauth_context_cache_time
|
||||
now = time.time()
|
||||
if (
|
||||
_codex_oauth_context_cache
|
||||
and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
|
||||
):
|
||||
return _codex_oauth_context_cache
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=10,
|
||||
verify=_resolve_requests_verify(),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.debug(
|
||||
"Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
|
||||
resp.status_code,
|
||||
)
|
||||
return {}
|
||||
data = resp.json()
|
||||
except Exception as exc:
|
||||
logger.debug("Codex /models probe failed: %s", exc)
|
||||
return {}
|
||||
|
||||
entries = data.get("models", []) if isinstance(data, dict) else []
|
||||
result: Dict[str, int] = {}
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
ctx = item.get("context_window")
|
||||
if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
|
||||
result[slug.strip()] = ctx
|
||||
|
||||
if result:
|
||||
_codex_oauth_context_cache = result
|
||||
_codex_oauth_context_cache_time = now
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_codex_oauth_context_length(
|
||||
model: str, access_token: str = ""
|
||||
) -> Optional[int]:
|
||||
"""Resolve a Codex OAuth model's real context window.
|
||||
|
||||
Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
|
||||
have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
|
||||
"""
|
||||
model_bare = _strip_provider_prefix(model).strip()
|
||||
if not model_bare:
|
||||
return None
|
||||
|
||||
if access_token:
|
||||
live = _fetch_codex_oauth_context_lengths(access_token)
|
||||
if model_bare in live:
|
||||
return live[model_bare]
|
||||
# Case-insensitive match in case casing drifts
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in live.items():
|
||||
if slug.lower() == model_lower:
|
||||
return ctx
|
||||
|
||||
# Fallback: longest-key-first substring match over hardcoded defaults.
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in sorted(
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
|
||||
):
|
||||
if slug in model_lower:
|
||||
return ctx
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_nous_context_length(model: str) -> Optional[int]:
|
||||
"""Resolve Nous Portal model context length via OpenRouter metadata.
|
||||
|
||||
@@ -1206,14 +1012,12 @@ def get_model_context_length(
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
provider: str = "",
|
||||
custom_providers: list | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
Resolution order:
|
||||
0. Explicit config override (model.context_length or custom_providers per-model)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
1b. AWS Bedrock static table (must precede custom-endpoint probe)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. Local server query (for local endpoints)
|
||||
4. Anthropic /v1/models API (API-key users only, not OAuth)
|
||||
@@ -1227,23 +1031,6 @@ def get_model_context_length(
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 0b. custom_providers per-model override — check before any probe.
|
||||
# This closes the gap where /model switch and display paths used to fall
|
||||
# back to 128K despite the user having a per-model context_length set.
|
||||
# See #15779.
|
||||
if custom_providers and base_url and model:
|
||||
try:
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
cp_ctx = get_custom_provider_context_length(
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if cp_ctx:
|
||||
return cp_ctx
|
||||
except Exception:
|
||||
pass # fall through to probing
|
||||
|
||||
# Normalise provider-prefixed model names (e.g. "local:model-name" →
|
||||
# "model-name") so cache lookups and server queries use the bare ID that
|
||||
# local servers actually know about. Ollama "model:tag" colons are preserved.
|
||||
@@ -1253,41 +1040,7 @@ def get_model_context_length(
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
if cached is not None:
|
||||
# Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
|
||||
# resolved gpt-5.x to the direct-API value (e.g. 1.05M) via
|
||||
# models.dev and persisted it. Codex OAuth caps at 272K for every
|
||||
# slug, so any cached Codex entry at or above 400K is a leftover
|
||||
# from the old resolution path. Drop it and fall through to the
|
||||
# live /models probe in step 5 below.
|
||||
if provider == "openai-codex" and cached >= 400_000:
|
||||
logger.info(
|
||||
"Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); "
|
||||
"re-resolving via live /models probe",
|
||||
model, base_url, f"{cached:,}",
|
||||
)
|
||||
_invalidate_cached_context_length(model, base_url)
|
||||
else:
|
||||
return cached
|
||||
|
||||
# 1b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels API doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py that reflects
|
||||
# AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
|
||||
# Anthropic API). This must run BEFORE the custom-endpoint probe at
|
||||
# step 2 — bedrock-runtime.<region>.amazonaws.com is not in
|
||||
# _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
|
||||
# fail the /models probe (Bedrock doesn't expose that shape), and fall
|
||||
# back to the 128K default before reaching the original step 4b branch.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
return cached
|
||||
|
||||
# 2. Active endpoint metadata for truly custom/unknown endpoints.
|
||||
# Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
|
||||
@@ -1334,7 +1087,19 @@ def get_model_context_length(
|
||||
if ctx:
|
||||
return ctx
|
||||
|
||||
# 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
|
||||
# 4b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
|
||||
# 5. Provider-aware lookups (before generic OpenRouter cache)
|
||||
# These are provider-specific and take priority over the generic OR cache,
|
||||
@@ -1348,32 +1113,10 @@ def get_model_context_length(
|
||||
if inferred:
|
||||
effective_provider = inferred
|
||||
|
||||
# 5a. Copilot live /models API — max_prompt_tokens from the user's account.
|
||||
# This catches account-specific models (e.g. claude-opus-4.6-1m) that
|
||||
# don't exist in models.dev. For models that ARE in models.dev, this
|
||||
# returns the provider-enforced limit which is what users can actually use.
|
||||
if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
|
||||
try:
|
||||
from hermes_cli.models import get_copilot_model_context
|
||||
ctx = get_copilot_model_context(model, api_key=api_key)
|
||||
if ctx:
|
||||
return ctx
|
||||
except Exception:
|
||||
pass # Fall through to models.dev
|
||||
|
||||
if effective_provider == "nous":
|
||||
ctx = _resolve_nous_context_length(model)
|
||||
if ctx:
|
||||
return ctx
|
||||
if effective_provider == "openai-codex":
|
||||
# Codex OAuth enforces lower context limits than the direct OpenAI
|
||||
# API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
|
||||
# on Codex). Authoritative source is Codex's own /models endpoint.
|
||||
codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
|
||||
if codex_ctx:
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
@@ -1383,7 +1126,7 @@ def get_model_context_length(
|
||||
# 6. OpenRouter live API metadata (provider-unaware fallback)
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
|
||||
return metadata[model].get("context_length", 128000)
|
||||
|
||||
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
||||
# Only check `default_model in model` (is the key a substring of the input).
|
||||
|
||||
@@ -146,7 +146,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"openai-codex": "openai",
|
||||
"zai": "zai",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
"stepfun": "stepfun",
|
||||
"kimi-coding-cn": "kimi-for-coding",
|
||||
"minimax": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
@@ -418,9 +417,6 @@ def list_provider_models(provider: str) -> List[str]:
|
||||
|
||||
Returns an empty list if the provider is unknown or has no data.
|
||||
"""
|
||||
from hermes_cli.models import normalize_provider
|
||||
provider = normalize_provider(provider) or provider
|
||||
|
||||
models = _get_provider_models(provider)
|
||||
if models is None:
|
||||
return []
|
||||
|
||||
@@ -1,190 +0,0 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
|
||||
|
||||
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
|
||||
tool calling. Requests that violate it fail with HTTP 400:
|
||||
|
||||
tools.function.parameters is not a valid moonshot flavored json schema,
|
||||
details: <...>
|
||||
|
||||
Known rejection modes documented at
|
||||
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
|
||||
and MoonshotAI/kimi-cli#1595:
|
||||
|
||||
1. Every property schema must carry a ``type``. Standard JSON Schema allows
|
||||
type to be omitted (the value is then unconstrained); Moonshot refuses.
|
||||
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
|
||||
the parent. Presence of both causes "type should be defined in anyOf
|
||||
items instead of the parent schema".
|
||||
|
||||
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
|
||||
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
|
||||
applies at MCP registration time for all providers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Keys whose values are maps of name → schema (not schemas themselves).
|
||||
# When we recurse, we walk the values of these maps as schemas, but we do
|
||||
# NOT apply the missing-type repair to the map itself.
|
||||
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
|
||||
|
||||
# Keys whose values are lists of schemas.
|
||||
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
|
||||
|
||||
# Keys whose values are a single nested schema.
|
||||
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
|
||||
|
||||
|
||||
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
"""Recursively apply Moonshot repairs to a schema node.
|
||||
|
||||
``is_schema=True`` means this dict is a JSON Schema node and gets the
|
||||
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
|
||||
it's a container map (e.g. the value of ``properties``) and we only
|
||||
recurse into its values.
|
||||
"""
|
||||
if isinstance(node, list):
|
||||
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
|
||||
# every element is itself a schema.
|
||||
return [_repair_schema(item, is_schema=True) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
# Walk the dict, deciding per-key whether recursion is into a schema
|
||||
# node, a container map, or a scalar.
|
||||
repaired: Dict[str, Any] = {}
|
||||
for key, value in node.items():
|
||||
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
|
||||
# Map of name → schema. Don't treat the map itself as a schema
|
||||
# (it has no type / properties of its own), but each value is.
|
||||
repaired[key] = {
|
||||
sub_key: _repair_schema(sub_val, is_schema=True)
|
||||
for sub_key, sub_val in value.items()
|
||||
}
|
||||
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
|
||||
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
|
||||
elif key in _SCHEMA_NODE_KEYS:
|
||||
# items / not / additionalProperties: single nested schema.
|
||||
# additionalProperties can also be a bool — leave those alone.
|
||||
if isinstance(value, dict):
|
||||
repaired[key] = _repair_schema(value, is_schema=True)
|
||||
else:
|
||||
repaired[key] = value
|
||||
else:
|
||||
# Scalars (description, title, format, enum values, etc.) pass through.
|
||||
repaired[key] = value
|
||||
|
||||
if not is_schema:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
return repaired
|
||||
|
||||
# Rule 1: property schemas without type need one. $ref nodes are exempt
|
||||
# — their type comes from the referenced definition.
|
||||
if "$ref" in repaired:
|
||||
return repaired
|
||||
return _fill_missing_type(repaired)
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Infer a reasonable ``type`` if this schema node has none."""
|
||||
if "type" in node and node["type"] not in (None, ""):
|
||||
return node
|
||||
|
||||
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
|
||||
# → type of first enum value, else fall back to ``string`` (safest scalar).
|
||||
if "properties" in node or "required" in node or "additionalProperties" in node:
|
||||
inferred = "object"
|
||||
elif "items" in node or "prefixItems" in node:
|
||||
inferred = "array"
|
||||
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
|
||||
sample = node["enum"][0]
|
||||
if isinstance(sample, bool):
|
||||
inferred = "boolean"
|
||||
elif isinstance(sample, int):
|
||||
inferred = "integer"
|
||||
elif isinstance(sample, float):
|
||||
inferred = "number"
|
||||
else:
|
||||
inferred = "string"
|
||||
else:
|
||||
inferred = "string"
|
||||
|
||||
return {**node, "type": inferred}
|
||||
|
||||
|
||||
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a Moonshot-compatible object schema.
|
||||
|
||||
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
|
||||
applied. Input is not mutated.
|
||||
"""
|
||||
if not isinstance(parameters, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
|
||||
if not isinstance(repaired, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
# Top-level must be an object schema
|
||||
if repaired.get("type") != "object":
|
||||
repaired["type"] = "object"
|
||||
if "properties" not in repaired:
|
||||
repaired["properties"] = {}
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
|
||||
if not tools:
|
||||
return tools
|
||||
|
||||
sanitized: List[Dict[str, Any]] = []
|
||||
any_change = False
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
fn = tool.get("function")
|
||||
if not isinstance(fn, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
params = fn.get("parameters")
|
||||
repaired = sanitize_moonshot_tool_parameters(params)
|
||||
if repaired is not params:
|
||||
any_change = True
|
||||
new_fn = {**fn, "parameters": repaired}
|
||||
sanitized.append({**tool, "function": new_fn})
|
||||
else:
|
||||
sanitized.append(tool)
|
||||
|
||||
return sanitized if any_change else tools
|
||||
|
||||
|
||||
def is_moonshot_model(model: str | None) -> bool:
|
||||
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
|
||||
|
||||
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
|
||||
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
|
||||
Detection by model name covers Nous / OpenRouter / other aggregators that
|
||||
route to Moonshot's inference, where the base URL is the aggregator's, not
|
||||
``api.moonshot.ai``.
|
||||
"""
|
||||
if not model:
|
||||
return False
|
||||
bare = model.strip().lower()
|
||||
# Last path segment (covers aggregator-prefixed slugs)
|
||||
tail = bare.rsplit("/", 1)[-1]
|
||||
if tail.startswith("kimi-") or tail == "kimi":
|
||||
return True
|
||||
# Vendor-prefixed forms commonly used on aggregators
|
||||
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
|
||||
return True
|
||||
return False
|
||||
@@ -180,145 +180,3 @@ def format_remaining(seconds: float) -> str:
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -1,144 +0,0 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
+1
-91
@@ -176,64 +176,6 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# You are a Kanban worker\n"
|
||||
"You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
@@ -408,13 +350,7 @@ PLATFORM_HINTS = {
|
||||
),
|
||||
"cli": (
|
||||
"You are a CLI AI Agent. Try not to use markdown but simple text "
|
||||
"renderable inside a terminal. "
|
||||
"File delivery: there is no attachment channel — the user reads your "
|
||||
"response directly in their terminal. Do NOT emit MEDIA:/path tags "
|
||||
"(those are only intercepted on messaging platforms like Telegram, "
|
||||
"Discord, Slack, etc.; on the CLI they render as literal text). "
|
||||
"When referring to a file you created or changed, just state its "
|
||||
"absolute path in plain text; the user can open it from there."
|
||||
"renderable inside a terminal."
|
||||
),
|
||||
"sms": (
|
||||
"You are communicating via SMS. Keep responses concise and use plain text "
|
||||
@@ -428,32 +364,6 @@ PLATFORM_HINTS = {
|
||||
"MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
|
||||
".heic) appear as photos and other files arrive as attachments."
|
||||
),
|
||||
"mattermost": (
|
||||
"You are in a Mattermost workspace communicating with your user. "
|
||||
"Mattermost renders standard Markdown — headings, bold, italic, code "
|
||||
"blocks, and tables all work. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are uploaded as photo "
|
||||
"attachments, audio and video as file attachments. "
|
||||
"Image URLs in markdown format  are rendered as inline previews automatically."
|
||||
),
|
||||
"matrix": (
|
||||
"You are in a Matrix room communicating with your user. "
|
||||
"Matrix renders Markdown — bold, italic, code blocks, and links work; "
|
||||
"the adapter converts your Markdown to HTML for rich display. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
|
||||
"audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
|
||||
"and other files as downloadable attachments."
|
||||
),
|
||||
"feishu": (
|
||||
"You are in a Feishu (Lark) workspace communicating with your user. "
|
||||
"Feishu renders Markdown in messages — bold, italic, code blocks, and "
|
||||
"links are supported. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
|
||||
"inline, audio files as voice messages, and other files as attachments."
|
||||
),
|
||||
"weixin": (
|
||||
"You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
|
||||
"it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
|
||||
|
||||
+135
-12
@@ -1,29 +1,154 @@
|
||||
"""Shared slash command helpers for skills.
|
||||
"""Shared slash command helpers for skills and built-in prompt-style modes.
|
||||
|
||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||
can invoke skills via /skill-name commands.
|
||||
can invoke skills via /skill-name commands and prompt-only built-ins like
|
||||
/plan.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_preprocessing import (
|
||||
expand_inline_shell as _expand_inline_shell,
|
||||
load_skills_config as _load_skills_config,
|
||||
substitute_template_vars as _substitute_template_vars,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only — no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def _load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available —
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return f"[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def _expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return _run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
user_instruction: str = "",
|
||||
*,
|
||||
now: datetime | None = None,
|
||||
) -> Path:
|
||||
"""Return the default workspace-relative markdown path for a /plan invocation.
|
||||
|
||||
Relative paths are intentional: file tools are task/backend-aware and resolve
|
||||
them against the active working directory for local, docker, ssh, modal,
|
||||
daytona, and similar terminal backends. That keeps the plan with the active
|
||||
workspace instead of the Hermes host's global home directory.
|
||||
"""
|
||||
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
|
||||
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
|
||||
if slug:
|
||||
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
|
||||
slug = slug or "conversation-plan"
|
||||
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
|
||||
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
|
||||
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
@@ -42,9 +167,7 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
|
||||
else:
|
||||
normalized = raw_identifier.lstrip("/")
|
||||
|
||||
loaded_skill = json.loads(
|
||||
skill_view(normalized, task_id=task_id, preprocess=False)
|
||||
)
|
||||
loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@@ -222,7 +345,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
|
||||
from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
disabled = _get_disabled_skill_names()
|
||||
seen_names: set = set()
|
||||
|
||||
@@ -233,7 +356,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
dirs_to_scan.extend(get_external_skills_dirs())
|
||||
|
||||
for scan_dir in dirs_to_scan:
|
||||
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
|
||||
for skill_md in scan_dir.rglob("SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
|
||||
continue
|
||||
try:
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
"""Shared SKILL.md preprocessing helpers."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only -- no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available --
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return "[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def preprocess_skill_content(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None = None,
|
||||
skills_cfg: dict | None = None,
|
||||
) -> str:
|
||||
"""Apply configured SKILL.md template and inline-shell preprocessing."""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
|
||||
if cfg.get("template_vars", True):
|
||||
content = substitute_template_vars(content, skill_dir, session_id)
|
||||
if cfg.get("inline_shell", False):
|
||||
timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = expand_inline_shell(content, skill_dir, timeout)
|
||||
return content
|
||||
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
Excludes ``.git``, ``.github``, ``.hub`` directories.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
for root, dirs, files in os.walk(skills_dir):
|
||||
dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
|
||||
if filename in files:
|
||||
matches.append(Path(root) / filename)
|
||||
|
||||
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
|
||||
response = call_llm(
|
||||
task="title_generation",
|
||||
messages=messages,
|
||||
max_tokens=500,
|
||||
max_tokens=30,
|
||||
temperature=0.3,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
@@ -23,14 +23,9 @@ def get_transport(api_mode: str):
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
# The registry can be partially populated when a specific transport
|
||||
# module was imported directly (for example chat_completions before
|
||||
# codex). Discover on misses, not only when the registry is empty, so
|
||||
# test/order-dependent imports do not make valid api_modes unavailable.
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
@@ -42,15 +37,3 @@ def _discover_transports() -> None:
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
import agent.transports.codex # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
import agent.transports.chat_completions # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
import agent.transports.bedrock # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
@@ -78,71 +78,23 @@ class AnthropicTransport(ProviderTransport):
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize Anthropic response to NormalizedResponse.
|
||||
|
||||
Parses content blocks (text, thinking, tool_use), maps stop_reason
|
||||
to OpenAI finish_reason, and collects reasoning_details in provider_data.
|
||||
kwargs:
|
||||
strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names.
|
||||
"""
|
||||
import json
|
||||
from agent.anthropic_adapter import _to_plain_data
|
||||
from agent.transports.types import ToolCall
|
||||
from agent.anthropic_adapter import normalize_anthropic_response_v2
|
||||
|
||||
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
|
||||
_MCP_PREFIX = "mcp_"
|
||||
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
reasoning_details = []
|
||||
tool_calls = []
|
||||
|
||||
for block in response.content:
|
||||
if block.type == "text":
|
||||
text_parts.append(block.text)
|
||||
elif block.type == "thinking":
|
||||
reasoning_parts.append(block.thinking)
|
||||
block_dict = _to_plain_data(block)
|
||||
if isinstance(block_dict, dict):
|
||||
reasoning_details.append(block_dict)
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
|
||||
name = name[len(_MCP_PREFIX):]
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=block.id,
|
||||
name=name,
|
||||
arguments=json.dumps(block.input),
|
||||
)
|
||||
)
|
||||
|
||||
finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
|
||||
|
||||
provider_data = {}
|
||||
if reasoning_details:
|
||||
provider_data["reasoning_details"] = reasoning_details
|
||||
|
||||
return NormalizedResponse(
|
||||
content="\n".join(text_parts) if text_parts else None,
|
||||
tool_calls=tool_calls or None,
|
||||
finish_reason=finish_reason,
|
||||
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
|
||||
usage=None,
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check Anthropic response structure is valid.
|
||||
|
||||
An empty content list is legitimate when ``stop_reason == "end_turn"``
|
||||
— the model's canonical way of signalling "nothing more to add" after
|
||||
a tool turn that already delivered the user-facing text. Treating it
|
||||
as invalid falsely retries a completed response.
|
||||
"""
|
||||
"""Check Anthropic response structure is valid."""
|
||||
if response is None:
|
||||
return False
|
||||
content_blocks = getattr(response, "content", None)
|
||||
if not isinstance(content_blocks, list):
|
||||
return False
|
||||
if not content_blocks:
|
||||
return getattr(response, "stop_reason", None) == "end_turn"
|
||||
return False
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
"""AWS Bedrock Converse API transport.
|
||||
|
||||
Delegates to the existing adapter functions in agent/bedrock_adapter.py.
|
||||
Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
|
||||
owns format conversion and normalization, while client construction and
|
||||
boto3 calls stay on AIAgent.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class BedrockTransport(ProviderTransport):
|
||||
"""Transport for api_mode='bedrock_converse'."""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "bedrock_converse"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI messages to Bedrock Converse format."""
|
||||
from agent.bedrock_adapter import convert_messages_to_converse
|
||||
return convert_messages_to_converse(messages)
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
|
||||
from agent.bedrock_adapter import convert_tools_to_converse
|
||||
return convert_tools_to_converse(tools)
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build Bedrock converse() kwargs.
|
||||
|
||||
Calls convert_messages and convert_tools internally.
|
||||
|
||||
params:
|
||||
max_tokens: int — output token limit (default 4096)
|
||||
temperature: float | None
|
||||
guardrail_config: dict | None — Bedrock guardrails
|
||||
region: str — AWS region (default 'us-east-1')
|
||||
"""
|
||||
from agent.bedrock_adapter import build_converse_kwargs
|
||||
|
||||
region = params.get("region", "us-east-1")
|
||||
guardrail = params.get("guardrail_config")
|
||||
|
||||
kwargs = build_converse_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
max_tokens=params.get("max_tokens", 4096),
|
||||
temperature=params.get("temperature"),
|
||||
guardrail_config=guardrail,
|
||||
)
|
||||
# Sentinel keys for dispatch — agent pops these before the boto3 call
|
||||
kwargs["__bedrock_converse__"] = True
|
||||
kwargs["__bedrock_region__"] = region
|
||||
return kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize Bedrock response to NormalizedResponse.
|
||||
|
||||
Handles two shapes:
|
||||
1. Raw boto3 dict (from direct converse() calls)
|
||||
2. Already-normalized SimpleNamespace with .choices (from dispatch site)
|
||||
"""
|
||||
from agent.bedrock_adapter import normalize_converse_response
|
||||
|
||||
# Normalize to OpenAI-compatible SimpleNamespace
|
||||
if hasattr(response, "choices") and response.choices:
|
||||
# Already normalized at dispatch site
|
||||
ns = response
|
||||
else:
|
||||
# Raw boto3 dict
|
||||
ns = normalize_converse_response(response)
|
||||
|
||||
choice = ns.choices[0]
|
||||
msg = choice.message
|
||||
finish_reason = choice.finish_reason or "stop"
|
||||
|
||||
tool_calls = None
|
||||
if msg.tool_calls:
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
)
|
||||
for tc in msg.tool_calls
|
||||
]
|
||||
|
||||
usage = None
|
||||
if hasattr(ns, "usage") and ns.usage:
|
||||
u = ns.usage
|
||||
usage = Usage(
|
||||
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
|
||||
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
|
||||
total_tokens=getattr(u, "total_tokens", 0) or 0,
|
||||
)
|
||||
|
||||
reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
|
||||
|
||||
return NormalizedResponse(
|
||||
content=msg.content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
reasoning=reasoning,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check Bedrock response structure.
|
||||
|
||||
After normalize_converse_response, the response has OpenAI-compatible
|
||||
.choices — same check as chat_completions.
|
||||
"""
|
||||
if response is None:
|
||||
return False
|
||||
# Raw Bedrock dict response — check for 'output' key
|
||||
if isinstance(response, dict):
|
||||
return "output" in response
|
||||
# Already-normalized SimpleNamespace
|
||||
if hasattr(response, "choices"):
|
||||
return bool(response.choices)
|
||||
return False
|
||||
|
||||
def map_finish_reason(self, raw_reason: str) -> str:
|
||||
"""Map Bedrock stop reason to OpenAI finish_reason.
|
||||
|
||||
The adapter already does this mapping inside normalize_converse_response,
|
||||
so this is only used for direct access to raw responses.
|
||||
"""
|
||||
_MAP = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"guardrail_intervened": "content_filter",
|
||||
"content_filtered": "content_filter",
|
||||
}
|
||||
return _MAP.get(raw_reason, "stop")
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("bedrock_converse", BedrockTransport)
|
||||
@@ -1,394 +0,0 @@
|
||||
"""OpenAI Chat Completions transport.
|
||||
|
||||
Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
|
||||
providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
|
||||
|
||||
Messages and tools are already in OpenAI format — convert_messages and
|
||||
convert_tools are near-identity. The complexity lives in build_kwargs
|
||||
which has provider-specific conditionals for max_tokens defaults,
|
||||
reasoning configuration, temperature handling, and extra_body assembly.
|
||||
"""
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class ChatCompletionsTransport(ProviderTransport):
|
||||
"""Transport for api_mode='chat_completions'.
|
||||
|
||||
The default path for OpenAI-compatible providers.
|
||||
"""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "chat_completions"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
|
||||
needs_sanitize = True
|
||||
break
|
||||
if needs_sanitize:
|
||||
break
|
||||
|
||||
if not needs_sanitize:
|
||||
return messages
|
||||
|
||||
sanitized = copy.deepcopy(messages)
|
||||
for msg in sanitized:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict):
|
||||
tc.pop("call_id", None)
|
||||
tc.pop("response_item_id", None)
|
||||
return sanitized
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Tools are already in OpenAI format — identity."""
|
||||
return tools
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build chat.completions.create() kwargs.
|
||||
|
||||
This is the most complex transport method — it handles ~16 providers
|
||||
via params rather than subclasses.
|
||||
|
||||
params:
|
||||
timeout: float — API call timeout
|
||||
max_tokens: int | None — user-configured max tokens
|
||||
ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
|
||||
max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
|
||||
reasoning_config: dict | None
|
||||
request_overrides: dict | None
|
||||
session_id: str | None
|
||||
qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
|
||||
model_lower: str — lowercase model name for pattern matching
|
||||
# Provider detection flags (all optional, default False)
|
||||
is_openrouter: bool
|
||||
is_nous: bool
|
||||
is_qwen_portal: bool
|
||||
is_github_models: bool
|
||||
is_nvidia_nim: bool
|
||||
is_kimi: bool
|
||||
is_custom_provider: bool
|
||||
ollama_num_ctx: int | None
|
||||
# Provider routing
|
||||
provider_preferences: dict | None
|
||||
# Qwen-specific
|
||||
qwen_prepare_fn: callable | None — runs AFTER codex sanitization
|
||||
qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
|
||||
# Temperature
|
||||
fixed_temperature: Any — from _fixed_temperature_for_model()
|
||||
omit_temperature: bool
|
||||
# Reasoning
|
||||
supports_reasoning: bool
|
||||
github_reasoning_extra: dict | None
|
||||
# Claude on OpenRouter/Nous max output
|
||||
anthropic_max_output: int | None
|
||||
# Extra
|
||||
extra_body_additions: dict | None — pre-built extra_body entries
|
||||
"""
|
||||
# Codex sanitization: drop reasoning_items / call_id / response_item_id
|
||||
sanitized = self.convert_messages(messages)
|
||||
|
||||
# Qwen portal prep AFTER codex sanitization. If sanitize already
|
||||
# deepcopied, reuse that copy via the in-place variant to avoid a
|
||||
# second deepcopy.
|
||||
is_qwen = params.get("is_qwen_portal", False)
|
||||
if is_qwen:
|
||||
qwen_prep = params.get("qwen_prepare_fn")
|
||||
qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
|
||||
if sanitized is messages:
|
||||
if qwen_prep is not None:
|
||||
sanitized = qwen_prep(sanitized)
|
||||
else:
|
||||
# Already deepcopied — transform in place
|
||||
if qwen_prep_inplace is not None:
|
||||
qwen_prep_inplace(sanitized)
|
||||
elif qwen_prep is not None:
|
||||
sanitized = qwen_prep(sanitized)
|
||||
|
||||
# Developer role swap for GPT-5/Codex models
|
||||
model_lower = params.get("model_lower", (model or "").lower())
|
||||
if (
|
||||
sanitized
|
||||
and isinstance(sanitized[0], dict)
|
||||
and sanitized[0].get("role") == "system"
|
||||
and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||
):
|
||||
sanitized = list(sanitized)
|
||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||
|
||||
api_kwargs: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": sanitized,
|
||||
}
|
||||
|
||||
timeout = params.get("timeout")
|
||||
if timeout is not None:
|
||||
api_kwargs["timeout"] = timeout
|
||||
|
||||
# Temperature
|
||||
fixed_temp = params.get("fixed_temperature")
|
||||
omit_temp = params.get("omit_temperature", False)
|
||||
if omit_temp:
|
||||
api_kwargs.pop("temperature", None)
|
||||
elif fixed_temp is not None:
|
||||
api_kwargs["temperature"] = fixed_temp
|
||||
|
||||
# Qwen metadata (caller precomputes {sessionId, promptId})
|
||||
qwen_meta = params.get("qwen_session_metadata")
|
||||
if qwen_meta and is_qwen:
|
||||
api_kwargs["metadata"] = qwen_meta
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
|
||||
# etc.) compatible, in addition to direct moonshot.ai endpoints.
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > provider default
|
||||
max_tokens_fn = params.get("max_tokens_param_fn")
|
||||
ephemeral = params.get("ephemeral_max_output_tokens")
|
||||
max_tokens = params.get("max_tokens")
|
||||
anthropic_max_out = params.get("anthropic_max_output")
|
||||
is_nvidia_nim = params.get("is_nvidia_nim", False)
|
||||
is_kimi = params.get("is_kimi", False)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||
elif max_tokens is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(max_tokens))
|
||||
elif is_nvidia_nim and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(16384))
|
||||
elif is_qwen and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(65536))
|
||||
elif is_kimi and max_tokens_fn:
|
||||
# Kimi/Moonshot: 32000 matches Kimi CLI's default
|
||||
api_kwargs.update(max_tokens_fn(32000))
|
||||
elif anthropic_max_out is not None:
|
||||
api_kwargs["max_tokens"] = anthropic_max_out
|
||||
|
||||
# Kimi: top-level reasoning_effort (unless thinking disabled)
|
||||
if is_kimi:
|
||||
_kimi_thinking_off = bool(
|
||||
reasoning_config
|
||||
and isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _kimi_thinking_off:
|
||||
_kimi_effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in ("low", "medium", "high"):
|
||||
_kimi_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _kimi_effort
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: Dict[str, Any] = {}
|
||||
|
||||
is_openrouter = params.get("is_openrouter", False)
|
||||
is_nous = params.get("is_nous", False)
|
||||
is_github_models = params.get("is_github_models", False)
|
||||
|
||||
provider_prefs = params.get("provider_preferences")
|
||||
if provider_prefs and is_openrouter:
|
||||
extra_body["provider"] = provider_prefs
|
||||
|
||||
# Kimi extra_body.thinking
|
||||
if is_kimi:
|
||||
_kimi_thinking_enabled = True
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
_kimi_thinking_enabled = False
|
||||
extra_body["thinking"] = {
|
||||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
# Reasoning
|
||||
if params.get("supports_reasoning", False):
|
||||
if is_github_models:
|
||||
gh_reasoning = params.get("github_reasoning_extra")
|
||||
if gh_reasoning is not None:
|
||||
extra_body["reasoning"] = gh_reasoning
|
||||
else:
|
||||
if reasoning_config is not None:
|
||||
rc = dict(reasoning_config)
|
||||
if is_nous and rc.get("enabled") is False:
|
||||
pass # omit for Nous when disabled
|
||||
else:
|
||||
extra_body["reasoning"] = rc
|
||||
else:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
|
||||
if is_nous:
|
||||
extra_body["tags"] = ["product=hermes-agent"]
|
||||
|
||||
# Ollama num_ctx
|
||||
ollama_ctx = params.get("ollama_num_ctx")
|
||||
if ollama_ctx:
|
||||
options = extra_body.get("options", {})
|
||||
options["num_ctx"] = ollama_ctx
|
||||
extra_body["options"] = options
|
||||
|
||||
# Ollama/custom think=false
|
||||
if params.get("is_custom_provider", False):
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||
_enabled = reasoning_config.get("enabled", True)
|
||||
if _effort == "none" or _enabled is False:
|
||||
extra_body["think"] = False
|
||||
|
||||
if is_qwen:
|
||||
extra_body["vl_high_resolution_images"] = True
|
||||
|
||||
# Merge any pre-built extra_body additions
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
extra_body.update(additions)
|
||||
|
||||
if extra_body:
|
||||
api_kwargs["extra_body"] = extra_body
|
||||
|
||||
# Request overrides last (service_tier etc.)
|
||||
overrides = params.get("request_overrides")
|
||||
if overrides:
|
||||
api_kwargs.update(overrides)
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
|
||||
|
||||
For chat_completions, this is near-identity — the response is already
|
||||
in OpenAI format. extra_content on tool_calls (Gemini thought_signature)
|
||||
is preserved via ToolCall.provider_data. reasoning_details (OpenRouter
|
||||
unified format) and reasoning_content (DeepSeek/Moonshot) are also
|
||||
preserved for downstream replay.
|
||||
"""
|
||||
choice = response.choices[0]
|
||||
msg = choice.message
|
||||
finish_reason = choice.finish_reason or "stop"
|
||||
|
||||
tool_calls = None
|
||||
if msg.tool_calls:
|
||||
tool_calls = []
|
||||
for tc in msg.tool_calls:
|
||||
# Preserve provider-specific extras on the tool call.
|
||||
# Gemini 3 thinking models attach extra_content with
|
||||
# thought_signature — without replay on the next turn the API
|
||||
# rejects the request with 400.
|
||||
tc_provider_data: Dict[str, Any] = {}
|
||||
extra = getattr(tc, "extra_content", None)
|
||||
if extra is None and hasattr(tc, "model_extra"):
|
||||
extra = (tc.model_extra or {}).get("extra_content")
|
||||
if extra is not None:
|
||||
if hasattr(extra, "model_dump"):
|
||||
try:
|
||||
extra = extra.model_dump()
|
||||
except Exception:
|
||||
pass
|
||||
tc_provider_data["extra_content"] = extra
|
||||
tool_calls.append(ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
provider_data=tc_provider_data or None,
|
||||
))
|
||||
|
||||
usage = None
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
u = response.usage
|
||||
usage = Usage(
|
||||
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
|
||||
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
|
||||
total_tokens=getattr(u, "total_tokens", 0) or 0,
|
||||
)
|
||||
|
||||
# Preserve reasoning fields separately. DeepSeek/Moonshot use
|
||||
# ``reasoning_content``; others use ``reasoning``. Downstream code
|
||||
# (_extract_reasoning, thinking-prefill retry) reads both distinctly,
|
||||
# so keep them apart in provider_data rather than merging.
|
||||
reasoning = getattr(msg, "reasoning", None)
|
||||
reasoning_content = getattr(msg, "reasoning_content", None)
|
||||
|
||||
provider_data: Dict[str, Any] = {}
|
||||
if reasoning_content:
|
||||
provider_data["reasoning_content"] = reasoning_content
|
||||
rd = getattr(msg, "reasoning_details", None)
|
||||
if rd:
|
||||
provider_data["reasoning_details"] = rd
|
||||
|
||||
return NormalizedResponse(
|
||||
content=msg.content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
reasoning=reasoning,
|
||||
usage=usage,
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check that response has valid choices."""
|
||||
if response is None:
|
||||
return False
|
||||
if not hasattr(response, "choices") or response.choices is None:
|
||||
return False
|
||||
if not response.choices:
|
||||
return False
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
|
||||
usage = getattr(response, "usage", None)
|
||||
if usage is None:
|
||||
return None
|
||||
details = getattr(usage, "prompt_tokens_details", None)
|
||||
if details is None:
|
||||
return None
|
||||
cached = getattr(details, "cached_tokens", 0) or 0
|
||||
written = getattr(details, "cache_write_tokens", 0) or 0
|
||||
if cached or written:
|
||||
return {"cached_tokens": cached, "creation_tokens": written}
|
||||
return None
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("chat_completions", ChatCompletionsTransport)
|
||||
@@ -1,237 +0,0 @@
|
||||
"""OpenAI Responses API (Codex) transport.
|
||||
|
||||
Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
|
||||
This transport owns format conversion and normalization — NOT client lifecycle,
|
||||
streaming, or the _run_codex_stream() call path.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class ResponsesApiTransport(ProviderTransport):
|
||||
"""Transport for api_mode='codex_responses'.
|
||||
|
||||
Wraps the functions extracted into codex_responses_adapter.py (PR 1).
|
||||
"""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "codex_responses"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI chat messages to Responses API input items."""
|
||||
from agent.codex_responses_adapter import _chat_messages_to_responses_input
|
||||
return _chat_messages_to_responses_input(messages)
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI tool schemas to Responses API function definitions."""
|
||||
from agent.codex_responses_adapter import _responses_tools
|
||||
return _responses_tools(tools)
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build Responses API kwargs.
|
||||
|
||||
Calls convert_messages and convert_tools internally.
|
||||
|
||||
params:
|
||||
instructions: str — system prompt (extracted from messages[0] if not given)
|
||||
reasoning_config: dict | None — {effort, enabled}
|
||||
session_id: str | None — used for prompt_cache_key + xAI conv header
|
||||
max_tokens: int | None — max_output_tokens
|
||||
request_overrides: dict | None — extra kwargs merged in
|
||||
provider: str | None — provider name for backend-specific logic
|
||||
base_url: str | None — endpoint URL
|
||||
base_url_hostname: str | None — hostname for backend detection
|
||||
is_github_responses: bool — Copilot/GitHub models backend
|
||||
is_codex_backend: bool — chatgpt.com/backend-api/codex
|
||||
is_xai_responses: bool — xAI/Grok backend
|
||||
github_reasoning_extra: dict | None — Copilot reasoning params
|
||||
"""
|
||||
from agent.codex_responses_adapter import (
|
||||
_chat_messages_to_responses_input,
|
||||
_responses_tools,
|
||||
)
|
||||
|
||||
from run_agent import DEFAULT_AGENT_IDENTITY
|
||||
|
||||
instructions = params.get("instructions", "")
|
||||
payload_messages = messages
|
||||
if not instructions:
|
||||
if messages and messages[0].get("role") == "system":
|
||||
instructions = str(messages[0].get("content") or "").strip()
|
||||
payload_messages = messages[1:]
|
||||
if not instructions:
|
||||
instructions = DEFAULT_AGENT_IDENTITY
|
||||
|
||||
is_github_responses = params.get("is_github_responses", False)
|
||||
is_codex_backend = params.get("is_codex_backend", False)
|
||||
is_xai_responses = params.get("is_xai_responses", False)
|
||||
|
||||
# Resolve reasoning effort
|
||||
reasoning_effort = "medium"
|
||||
reasoning_enabled = True
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
reasoning_enabled = False
|
||||
elif reasoning_config.get("effort"):
|
||||
reasoning_effort = reasoning_config["effort"]
|
||||
|
||||
_effort_clamp = {"minimal": "low"}
|
||||
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
|
||||
|
||||
kwargs = {
|
||||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": _chat_messages_to_responses_input(payload_messages),
|
||||
"tools": _responses_tools(tools),
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": True,
|
||||
"store": False,
|
||||
}
|
||||
|
||||
session_id = params.get("session_id")
|
||||
if not is_github_responses and session_id:
|
||||
kwargs["prompt_cache_key"] = session_id
|
||||
|
||||
if reasoning_enabled and is_xai_responses:
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
elif reasoning_enabled:
|
||||
if is_github_responses:
|
||||
github_reasoning = params.get("github_reasoning_extra")
|
||||
if github_reasoning is not None:
|
||||
kwargs["reasoning"] = github_reasoning
|
||||
else:
|
||||
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
elif not is_github_responses and not is_xai_responses:
|
||||
kwargs["include"] = []
|
||||
|
||||
request_overrides = params.get("request_overrides")
|
||||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
if is_codex_backend:
|
||||
prompt_cache_key = kwargs.get("prompt_cache_key")
|
||||
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
||||
if cache_scope_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["session_id"] = cache_scope_id
|
||||
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
|
||||
if is_xai_responses and session_id:
|
||||
kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
|
||||
|
||||
return kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize Codex Responses API response to NormalizedResponse."""
|
||||
from agent.codex_responses_adapter import (
|
||||
_normalize_codex_response,
|
||||
_extract_responses_message_text,
|
||||
_extract_responses_reasoning_text,
|
||||
)
|
||||
|
||||
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
|
||||
msg, finish_reason = _normalize_codex_response(response)
|
||||
|
||||
tool_calls = None
|
||||
if msg and msg.tool_calls:
|
||||
tool_calls = []
|
||||
for tc in msg.tool_calls:
|
||||
provider_data = {}
|
||||
if hasattr(tc, "call_id") and tc.call_id:
|
||||
provider_data["call_id"] = tc.call_id
|
||||
if hasattr(tc, "response_item_id") and tc.response_item_id:
|
||||
provider_data["response_item_id"] = tc.response_item_id
|
||||
tool_calls.append(ToolCall(
|
||||
id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
|
||||
name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
|
||||
arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
|
||||
provider_data=provider_data or None,
|
||||
))
|
||||
|
||||
# Extract reasoning items for provider_data
|
||||
provider_data = {}
|
||||
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
||||
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
||||
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
|
||||
provider_data["codex_message_items"] = msg.codex_message_items
|
||||
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
||||
provider_data["reasoning_details"] = msg.reasoning_details
|
||||
|
||||
return NormalizedResponse(
|
||||
content=msg.content if msg else None,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason or "stop",
|
||||
reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
|
||||
usage=None, # Codex usage is extracted separately in normalize_usage()
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check Codex Responses API response has valid output structure.
|
||||
|
||||
Returns True only if response.output is a non-empty list.
|
||||
Does NOT check output_text fallback — the caller handles that
|
||||
with diagnostic logging for stream backfill recovery.
|
||||
"""
|
||||
if response is None:
|
||||
return False
|
||||
output = getattr(response, "output", None)
|
||||
if not isinstance(output, list) or not output:
|
||||
return False
|
||||
return True
|
||||
|
||||
def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
|
||||
"""Validate and sanitize Codex API kwargs before the call.
|
||||
|
||||
Normalizes input items, strips unsupported fields, validates structure.
|
||||
"""
|
||||
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
|
||||
return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
|
||||
|
||||
def map_finish_reason(self, raw_reason: str) -> str:
|
||||
"""Map Codex response.status to OpenAI finish_reason.
|
||||
|
||||
Codex uses response.status ('completed', 'incomplete') +
|
||||
response.incomplete_details.reason for granular mapping.
|
||||
This method handles the simple status string; the caller
|
||||
should check incomplete_details separately for 'max_output_tokens'.
|
||||
"""
|
||||
_MAP = {
|
||||
"completed": "stop",
|
||||
"incomplete": "length",
|
||||
"failed": "stop",
|
||||
"cancelled": "stop",
|
||||
}
|
||||
return _MAP.get(raw_reason, "stop")
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("codex_responses", ResponsesApiTransport)
|
||||
@@ -37,44 +37,6 @@ class ToolCall:
|
||||
arguments: str # JSON string
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The agent loop reads tc.function.name / tc.function.arguments
|
||||
# throughout run_agent.py (45+ sites). These properties let
|
||||
# NormalizedResponse pass through without the _nr_to_assistant_message
|
||||
# shim, while keeping ToolCall's canonical fields flat.
|
||||
@property
|
||||
def type(self) -> str:
|
||||
return "function"
|
||||
|
||||
@property
|
||||
def function(self) -> "ToolCall":
|
||||
"""Return self so tc.function.name / tc.function.arguments work."""
|
||||
return self
|
||||
|
||||
@property
|
||||
def call_id(self) -> Optional[str]:
|
||||
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
|
||||
return (self.provider_data or {}).get("call_id")
|
||||
|
||||
@property
|
||||
def response_item_id(self) -> Optional[str]:
|
||||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
@property
|
||||
def extra_content(self) -> Optional[Dict[str, Any]]:
|
||||
"""Gemini extra_content (thought_signature) from provider_data.
|
||||
|
||||
Gemini 3 thinking models attach ``extra_content`` with a
|
||||
``thought_signature`` to each tool call. This signature must be
|
||||
replayed on subsequent API calls — without it the API rejects the
|
||||
request with HTTP 400. The chat_completions transport stores this
|
||||
in ``provider_data["extra_content"]``; this property exposes it so
|
||||
``_build_assistant_message`` can ``getattr(tc, "extra_content")``
|
||||
uniformly.
|
||||
"""
|
||||
return (self.provider_data or {}).get("extra_content")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Usage:
|
||||
@@ -97,7 +59,7 @@ class NormalizedResponse:
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
@@ -108,29 +70,6 @@ class NormalizedResponse:
|
||||
usage: Optional[Usage] = None
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The shim _nr_to_assistant_message() mapped these from provider_data.
|
||||
# These properties let NormalizedResponse pass through directly.
|
||||
@property
|
||||
def reasoning_content(self) -> Optional[str]:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_content")
|
||||
|
||||
@property
|
||||
def reasoning_details(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_details")
|
||||
|
||||
@property
|
||||
def codex_reasoning_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
@property
|
||||
def codex_message_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_message_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
|
||||
@@ -533,22 +533,10 @@ def normalize_usage(
|
||||
prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
|
||||
output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
|
||||
details = getattr(response_usage, "prompt_tokens_details", None)
|
||||
# Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
|
||||
# top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
|
||||
# AI Gateway, Cline) expose when routing Claude models — without this
|
||||
# fallback, cache writes are undercounted as 0 and cache reads can be
|
||||
# missed when the proxy only surfaces them at the top level.
|
||||
# Port of cline/cline#10266.
|
||||
cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
|
||||
if not cache_read_tokens:
|
||||
cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
|
||||
cache_write_tokens = _to_int(
|
||||
getattr(details, "cache_write_tokens", 0) if details else 0
|
||||
)
|
||||
if not cache_write_tokens:
|
||||
cache_write_tokens = _to_int(
|
||||
getattr(response_usage, "cache_creation_input_tokens", 0)
|
||||
)
|
||||
input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)
|
||||
|
||||
reasoning_tokens = 0
|
||||
|
||||
+6
-2
@@ -951,9 +951,13 @@ class BatchRunner:
|
||||
root_logger.setLevel(original_level)
|
||||
|
||||
# Aggregate all batch statistics and update checkpoint
|
||||
all_completed_prompts = list(completed_prompts_set)
|
||||
total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
|
||||
|
||||
|
||||
for batch_result in results:
|
||||
# Add newly completed prompts
|
||||
all_completed_prompts.extend(batch_result.get("completed_prompts", []))
|
||||
|
||||
# Aggregate tool stats
|
||||
for tool_name, stats in batch_result.get("tool_stats", {}).items():
|
||||
if tool_name not in total_tool_stats:
|
||||
@@ -973,7 +977,7 @@ class BatchRunner:
|
||||
|
||||
# Save final checkpoint (best-effort; incremental writes already happened)
|
||||
try:
|
||||
checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
|
||||
checkpoint_data["completed_prompts"] = all_completed_prompts
|
||||
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
|
||||
except Exception as ckpt_err:
|
||||
print(f"âš ï¸ Warning: Failed to save final checkpoint: {ckpt_err}")
|
||||
|
||||
+10
-52
@@ -326,16 +326,6 @@ compression:
|
||||
# To pin a specific model/provider for compression summaries, use the
|
||||
# auxiliary section below (auxiliary.compression.provider / model).
|
||||
|
||||
# =============================================================================
|
||||
# Anthropic prompt caching TTL
|
||||
# =============================================================================
|
||||
# When prompt caching is active (Claude via OpenRouter or native Anthropic),
|
||||
# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
|
||||
# "1h". Other values are ignored and "5m" is used.
|
||||
#
|
||||
prompt_caching:
|
||||
cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
|
||||
|
||||
# =============================================================================
|
||||
# Auxiliary Models (Advanced — Experimental)
|
||||
# =============================================================================
|
||||
@@ -517,13 +507,6 @@ agent:
|
||||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Max app-level retry attempts for API errors (connection drops, provider
|
||||
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
|
||||
# to 1 if you use fallback providers and want fast failover on flaky
|
||||
# primaries (default 3). The OpenAI SDK does its own low-level retries
|
||||
# underneath this wrapper — this is the Hermes-level loop.
|
||||
# api_max_retries: 3
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
@@ -787,20 +770,10 @@ code_execution:
|
||||
# Subagent Delegation
|
||||
# =============================================================================
|
||||
# The delegate_task tool spawns child agents with isolated context.
|
||||
# Supports single tasks and batch mode (default 3 parallel, configurable).
|
||||
# Supports single tasks and batch mode (up to 3 parallel).
|
||||
delegation:
|
||||
max_iterations: 50 # Max tool-calling turns per child (default: 50)
|
||||
# max_concurrent_children: 3 # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
|
||||
# WARNING: values above 10 multiply API cost linearly.
|
||||
# max_spawn_depth: 1 # Delegation tree depth cap (range: 1-3, default: 1 = flat).
|
||||
# Raise to 2 to allow workers to spawn their own subagents.
|
||||
# Requires role="orchestrator" on intermediate agents.
|
||||
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
|
||||
# subagent_auto_approve: false # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
|
||||
# or auto-approve "once" (true) instead of blocking on stdin.
|
||||
# The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
|
||||
# Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
|
||||
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
|
||||
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
|
||||
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
|
||||
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
|
||||
# # Resolves full credentials (base_url, api_key) automatically.
|
||||
@@ -824,9 +797,7 @@ delegation:
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
|
||||
# true: Compact single-line banner
|
||||
# false: Full ASCII banner with tool/skill summary (default)
|
||||
# Use compact banner mode
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
@@ -840,15 +811,12 @@ display:
|
||||
# Gateway-only natural mid-turn assistant updates.
|
||||
# When true, completed assistant status messages are sent as separate chat
|
||||
# messages. This is independent of tool_progress and gateway streaming.
|
||||
# true: Send mid-turn assistant updates as separate messages (default)
|
||||
# false: Only send the final response
|
||||
interim_assistant_messages: true
|
||||
|
||||
# What Enter does when Hermes is already busy (CLI and gateway platforms).
|
||||
# What Enter does when Hermes is already busy in the CLI.
|
||||
# interrupt: Interrupt the current run and redirect Hermes (default)
|
||||
# queue: Queue your message for the next turn
|
||||
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
|
||||
# Toggle at runtime with /busy_input_mode <interrupt|queue>.
|
||||
# Ctrl+C always interrupts regardless of this setting.
|
||||
busy_input_mode: interrupt
|
||||
|
||||
# Background process notifications (gateway/messaging only).
|
||||
@@ -864,22 +832,17 @@ display:
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
# true: Ring the terminal bell on each response
|
||||
# false: Silent (default)
|
||||
bell_on_complete: false
|
||||
|
||||
# Show model reasoning/thinking before each response.
|
||||
# When enabled, a dim box shows the model's thought process above the response.
|
||||
# Toggle at runtime with /reasoning show or /reasoning hide.
|
||||
# true: Show the reasoning box
|
||||
# false: Hide reasoning (default)
|
||||
show_reasoning: false
|
||||
|
||||
# Stream tokens to the terminal as they arrive instead of waiting for the
|
||||
# full response. The response box opens on first token and text appears
|
||||
# line-by-line. Tool calls are still captured silently.
|
||||
# true: Stream tokens as they arrive (default)
|
||||
# false: Wait for the full response before rendering
|
||||
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
|
||||
streaming: true
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
@@ -889,15 +852,10 @@ display:
|
||||
# response box label, and branding text. Change at runtime with /skin <name>.
|
||||
#
|
||||
# Built-in skins:
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# daylight — Bright light-mode theme
|
||||
# warm-lightmode — Warm paper-tone light-mode theme
|
||||
# poseidon — Sea-green/teal Olympian theme
|
||||
# sisyphus — Earthy stone-and-moss theme
|
||||
# charizard — Fiery orange dragon theme
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
#
|
||||
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
|
||||
# Schema (all fields optional, missing values inherit from default):
|
||||
|
||||
+1
-72
@@ -16,7 +16,7 @@ import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Optional, Dict, List, Any, Union
|
||||
from typing import Optional, Dict, List, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -371,39 +371,6 @@ def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
raise
|
||||
|
||||
|
||||
def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
|
||||
"""Normalize and validate a cron job workdir.
|
||||
|
||||
Rules:
|
||||
- Empty / None → None (feature off, preserves old behaviour).
|
||||
- ``~`` is expanded. Relative paths are rejected — cron jobs run detached
|
||||
from any shell cwd, so relative paths have no stable meaning.
|
||||
- The path must exist and be a directory at create/update time. We do
|
||||
NOT re-check at run time (a user might briefly unmount the dir; the
|
||||
scheduler will just fall back to old behaviour with a logged warning).
|
||||
|
||||
Returns the absolute path string, or None when disabled.
|
||||
Raises ValueError on invalid input.
|
||||
"""
|
||||
if workdir is None:
|
||||
return None
|
||||
raw = str(workdir).strip()
|
||||
if not raw:
|
||||
return None
|
||||
expanded = Path(raw).expanduser()
|
||||
if not expanded.is_absolute():
|
||||
raise ValueError(
|
||||
f"Cron workdir must be an absolute path (got {raw!r}). "
|
||||
f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous."
|
||||
)
|
||||
resolved = expanded.resolve()
|
||||
if not resolved.exists():
|
||||
raise ValueError(f"Cron workdir does not exist: {resolved}")
|
||||
if not resolved.is_dir():
|
||||
raise ValueError(f"Cron workdir is not a directory: {resolved}")
|
||||
return str(resolved)
|
||||
|
||||
|
||||
def create_job(
|
||||
prompt: str,
|
||||
schedule: str,
|
||||
@@ -417,9 +384,6 @@ def create_job(
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
context_from: Optional[Union[str, List[str]]] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new cron job.
|
||||
@@ -439,18 +403,6 @@ def create_job(
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
context_from: Optional job ID (or list of job IDs) whose most recent output
|
||||
is injected into the prompt as context before each run.
|
||||
Useful for chaining cron jobs: job A finds data, job B processes it.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
workdir: Optional absolute path. When set, the job runs as if launched
|
||||
from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
|
||||
that directory are injected into the system prompt, and the
|
||||
terminal/file/code_exec tools use it as their working directory
|
||||
(via TERMINAL_CWD). When unset, the old behaviour is preserved
|
||||
(no context files injected, tools use the scheduler's cwd).
|
||||
|
||||
Returns:
|
||||
The created job dict
|
||||
@@ -481,17 +433,6 @@ def create_job(
|
||||
normalized_base_url = normalized_base_url or None
|
||||
normalized_script = str(script).strip() if isinstance(script, str) else None
|
||||
normalized_script = normalized_script or None
|
||||
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
|
||||
# Normalize context_from: accept str or list of str, store as list or None
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from.strip()] if context_from.strip() else None
|
||||
elif isinstance(context_from, list):
|
||||
context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
|
||||
else:
|
||||
context_from = None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
@@ -504,7 +445,6 @@ def create_job(
|
||||
"provider": normalized_provider,
|
||||
"base_url": normalized_base_url,
|
||||
"script": normalized_script,
|
||||
"context_from": context_from,
|
||||
"schedule": parsed_schedule,
|
||||
"schedule_display": parsed_schedule.get("display", schedule),
|
||||
"repeat": {
|
||||
@@ -524,8 +464,6 @@ def create_job(
|
||||
# Delivery configuration
|
||||
"deliver": deliver,
|
||||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
"enabled_toolsets": normalized_toolsets,
|
||||
"workdir": normalized_workdir,
|
||||
}
|
||||
|
||||
jobs = load_jobs()
|
||||
@@ -559,15 +497,6 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
|
||||
if job["id"] != job_id:
|
||||
continue
|
||||
|
||||
# Validate / normalize workdir if present in updates. Empty string or
|
||||
# None both mean "clear the field" (restore old behaviour).
|
||||
if "workdir" in updates:
|
||||
_wd = updates["workdir"]
|
||||
if _wd in (None, "", False):
|
||||
updates["workdir"] = None
|
||||
else:
|
||||
updates["workdir"] = _normalize_workdir(_wd)
|
||||
|
||||
updated = _apply_skill_fields({**job, **updates})
|
||||
schedule_changed = "schedule" in updates
|
||||
|
||||
|
||||
+11
-170
@@ -40,37 +40,6 @@ from hermes_time import now as _hermes_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
|
||||
"""Resolve the toolset list for a cron job.
|
||||
|
||||
Precedence:
|
||||
1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
|
||||
Keeps the agent's job-scoped toolset override intact — #6130.
|
||||
2. Per-platform ``hermes tools`` config for the ``cron`` platform.
|
||||
Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
|
||||
so users can gate cron toolsets globally without recreating every job.
|
||||
3. ``None`` on any lookup failure — AIAgent loads the full default set
|
||||
(legacy behavior before this change, preserved as the safety net).
|
||||
|
||||
_DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
|
||||
``_get_platform_tools`` for unconfigured platforms, so fresh installs
|
||||
get cron WITHOUT ``moa`` by default (issue reported by Norbert —
|
||||
surprise $4.63 run).
|
||||
"""
|
||||
per_job = job.get("enabled_toolsets")
|
||||
if per_job:
|
||||
return per_job
|
||||
try:
|
||||
from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load
|
||||
return sorted(_get_platform_tools(cfg or {}, "cron"))
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Cron toolset resolution failed, falling back to full default toolset: %s",
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
# Valid delivery platforms — used to validate user-supplied platform names
|
||||
# in cron delivery targets, preventing env var enumeration via crafted names.
|
||||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
@@ -283,11 +252,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
|
||||
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
|
||||
|
||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
try:
|
||||
result = future.result(timeout=30)
|
||||
except TimeoutError:
|
||||
future.cancel()
|
||||
raise
|
||||
result = future.result(timeout=30)
|
||||
if result and not getattr(result, "success", True):
|
||||
logger.warning(
|
||||
"Job '%s': media send failed for %s: %s",
|
||||
@@ -417,11 +382,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
|
||||
loop,
|
||||
)
|
||||
try:
|
||||
send_result = future.result(timeout=60)
|
||||
except TimeoutError:
|
||||
future.cancel()
|
||||
raise
|
||||
send_result = future.result(timeout=60)
|
||||
if send_result and not getattr(send_result, "success", True):
|
||||
err = getattr(send_result, "error", "unknown")
|
||||
logger.warning(
|
||||
@@ -671,47 +632,6 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
f"{prompt}"
|
||||
)
|
||||
|
||||
# Inject output from referenced cron jobs as context.
|
||||
context_from = job.get("context_from")
|
||||
if context_from:
|
||||
from cron.jobs import OUTPUT_DIR
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from]
|
||||
for source_job_id in context_from:
|
||||
# Guard against path traversal — valid job IDs are 12-char hex strings
|
||||
if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
|
||||
logger.warning("context_from: skipping invalid job_id %r", source_job_id)
|
||||
continue
|
||||
try:
|
||||
job_output_dir = OUTPUT_DIR / source_job_id
|
||||
if not job_output_dir.exists():
|
||||
continue # silent skip — no output yet
|
||||
output_files = sorted(
|
||||
job_output_dir.glob("*.md"),
|
||||
key=lambda f: f.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not output_files:
|
||||
continue # silent skip — no output yet
|
||||
latest_output = output_files[0].read_text(encoding="utf-8").strip()
|
||||
# Truncate to 8K characters to avoid prompt bloat
|
||||
_MAX_CONTEXT_CHARS = 8000
|
||||
if len(latest_output) > _MAX_CONTEXT_CHARS:
|
||||
latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
|
||||
if latest_output:
|
||||
prompt = (
|
||||
f"## Output from job '{source_job_id}'\n"
|
||||
"The following is the most recent output from a preceding "
|
||||
"cron job. Use it as context for your analysis.\n\n"
|
||||
f"```\n{latest_output}\n```\n\n"
|
||||
f"{prompt}"
|
||||
)
|
||||
else:
|
||||
continue # silent skip — empty output
|
||||
except (OSError, PermissionError) as e:
|
||||
logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
|
||||
# silent skip — do not pollute the prompt with error messages
|
||||
|
||||
# Always prepend cron execution guidance so the agent knows how
|
||||
# delivery works and can suppress delivery when appropriate.
|
||||
cron_hint = (
|
||||
@@ -836,30 +756,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
chat_name=origin.get("chat_name", "") if origin else "",
|
||||
)
|
||||
|
||||
# Per-job working directory. When set (and validated at create/update
|
||||
# time), we point TERMINAL_CWD at it so:
|
||||
# - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md /
|
||||
# .cursorrules from the job's project dir, AND
|
||||
# - the terminal, file, and code-exec tools run commands from there.
|
||||
#
|
||||
# tick() serializes workdir-jobs outside the parallel pool, so mutating
|
||||
# os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less
|
||||
# jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
|
||||
# (skip_context_files=True, tools use whatever cwd the scheduler has).
|
||||
_job_workdir = (job.get("workdir") or "").strip() or None
|
||||
if _job_workdir and not Path(_job_workdir).is_dir():
|
||||
# Directory was removed between create-time validation and now. Log
|
||||
# and drop back to old behaviour rather than crashing the job.
|
||||
logger.warning(
|
||||
"Job '%s': configured workdir %r no longer exists — running without it",
|
||||
job_id, _job_workdir,
|
||||
)
|
||||
_job_workdir = None
|
||||
_prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_")
|
||||
if _job_workdir:
|
||||
os.environ["TERMINAL_CWD"] = _job_workdir
|
||||
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
|
||||
|
||||
try:
|
||||
# Re-read .env and config.yaml fresh every run so provider/key
|
||||
# changes take effect without a gateway restart.
|
||||
@@ -936,7 +832,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
from hermes_cli.auth import AuthError
|
||||
try:
|
||||
runtime_kwargs = {
|
||||
"requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
@@ -944,28 +839,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
if job.get("base_url"):
|
||||
runtime_kwargs["explicit_base_url"] = job.get("base_url")
|
||||
runtime = resolve_runtime_provider(**runtime_kwargs)
|
||||
except AuthError as auth_exc:
|
||||
# Primary provider auth failed — try fallback chain before giving up.
|
||||
logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
|
||||
fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
|
||||
fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
|
||||
runtime = None
|
||||
for entry in fb_list:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
try:
|
||||
fb_kwargs = {"requested": entry.get("provider")}
|
||||
if entry.get("base_url"):
|
||||
fb_kwargs["explicit_base_url"] = entry["base_url"]
|
||||
if entry.get("api_key"):
|
||||
fb_kwargs["explicit_api_key"] = entry["api_key"]
|
||||
runtime = resolve_runtime_provider(**fb_kwargs)
|
||||
logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider"))
|
||||
break
|
||||
except Exception as fb_exc:
|
||||
logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc)
|
||||
if runtime is None:
|
||||
raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
|
||||
except Exception as exc:
|
||||
message = format_runtime_provider_error(exc)
|
||||
raise RuntimeError(message) from exc
|
||||
@@ -1005,13 +878,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
# When a workdir is configured, inject AGENTS.md / CLAUDE.md /
|
||||
# .cursorrules from that directory; otherwise preserve the old
|
||||
# behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
|
||||
skip_context_files=not bool(_job_workdir),
|
||||
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
|
||||
skip_memory=True, # Cron system prompts would corrupt user representations
|
||||
platform="cron",
|
||||
session_id=_cron_session_id,
|
||||
@@ -1095,12 +964,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
f"— last activity: {_last_desc}"
|
||||
)
|
||||
|
||||
# Guard against non-dict returns from run_conversation under error conditions
|
||||
if not isinstance(result, dict):
|
||||
raise RuntimeError(
|
||||
f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
|
||||
)
|
||||
|
||||
final_response = result.get("final_response", "") or ""
|
||||
# Strip leaked placeholder text that upstream may inject on empty completions.
|
||||
if final_response.strip() == "(No response generated)":
|
||||
@@ -1150,14 +1013,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
return False, output, "", error_msg
|
||||
|
||||
finally:
|
||||
# Restore TERMINAL_CWD to whatever it was before this job ran. We
|
||||
# only ever mutate it when the job has a workdir; see the setup block
|
||||
# at the top of run_job for the serialization guarantee.
|
||||
if _job_workdir:
|
||||
if _prior_terminal_cwd == "_UNSET_":
|
||||
os.environ.pop("TERMINAL_CWD", None)
|
||||
else:
|
||||
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
|
||||
# Clean up ContextVar session/delivery state for this job.
|
||||
clear_session_vars(_ctx_tokens)
|
||||
if _session_db:
|
||||
@@ -1285,28 +1140,14 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
mark_job_run(job["id"], False, str(e))
|
||||
return False
|
||||
|
||||
# Partition due jobs: those with a per-job workdir mutate
|
||||
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
|
||||
# so they MUST run sequentially to avoid corrupting each other. Jobs
|
||||
# without a workdir leave env untouched and stay parallel-safe.
|
||||
workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
|
||||
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
|
||||
|
||||
_results: list = []
|
||||
|
||||
# Sequential pass for workdir jobs.
|
||||
for job in workdir_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_results.append(_ctx.run(_process_job, job))
|
||||
|
||||
# Parallel pass for the rest — same behaviour as before.
|
||||
if parallel_jobs:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in parallel_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results.extend(f.result() for f in _futures)
|
||||
# Run all due jobs concurrently, each in its own ContextVar copy
|
||||
# so session/delivery state stays isolated per-thread.
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in due_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results = [f.result() for f in _futures]
|
||||
|
||||
return sum(_results)
|
||||
finally:
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
#
|
||||
# docker-compose.yml for Hermes Agent
|
||||
#
|
||||
# Usage:
|
||||
# HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
|
||||
#
|
||||
# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
|
||||
# files created inside the container stay readable/writable on the host.
|
||||
# The entrypoint remaps the internal `hermes` user to these values via
|
||||
# usermod/groupmod + gosu.
|
||||
#
|
||||
# Security notes:
|
||||
# - The dashboard service binds to 127.0.0.1 by default. It stores API
|
||||
# keys; exposing it on LAN without auth is unsafe. If you want remote
|
||||
# access, use an SSH tunnel or put it behind a reverse proxy that
|
||||
# adds authentication — do NOT pass --insecure --host 0.0.0.0.
|
||||
# - The gateway's API server is off unless you uncomment API_SERVER_KEY
|
||||
# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
|
||||
# this on an internet-facing host.
|
||||
#
|
||||
services:
|
||||
gateway:
|
||||
build: .
|
||||
image: hermes-agent
|
||||
container_name: hermes
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# To expose the OpenAI-compatible API server beyond localhost,
|
||||
# uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
|
||||
# - API_SERVER_HOST=0.0.0.0
|
||||
# - API_SERVER_KEY=${API_SERVER_KEY}
|
||||
command: ["gateway", "run"]
|
||||
|
||||
dashboard:
|
||||
image: hermes-agent
|
||||
container_name: hermes-dashboard
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- gateway
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
|
||||
command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
|
||||
+2
-35
@@ -22,18 +22,9 @@ if [ "$(id -u)" = "0" ]; then
|
||||
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
|
||||
# files created by previous runs (under the old UID) become inaccessible.
|
||||
# Always chown -R when UID was remapped; otherwise only if top-level is wrong.
|
||||
actual_hermes_uid=$(id -u hermes)
|
||||
needs_chown=false
|
||||
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
|
||||
needs_chown=true
|
||||
elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
needs_chown=true
|
||||
fi
|
||||
if [ "$needs_chown" = true ]; then
|
||||
echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
|
||||
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
|
||||
# In rootless Podman the container's "root" is mapped to an unprivileged
|
||||
# host UID — chown will fail. That's fine: the volume is already owned
|
||||
# by the mapped user on the host side.
|
||||
@@ -41,15 +32,6 @@ if [ "$(id -u)" = "0" ]; then
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
exec gosu hermes "$0" "$@"
|
||||
fi
|
||||
@@ -86,19 +68,4 @@ if [ -d "$INSTALL_DIR/skills" ]; then
|
||||
python3 "$INSTALL_DIR/tools/skills_sync.py"
|
||||
fi
|
||||
|
||||
# Final exec: two supported invocation patterns.
|
||||
#
|
||||
# docker run <image> -> exec `hermes` with no args (legacy default)
|
||||
# docker run <image> chat -q "..." -> exec `hermes chat -q "..."` (legacy wrap)
|
||||
# docker run <image> sleep infinity -> exec `sleep infinity` directly
|
||||
# docker run <image> bash -> exec `bash` directly
|
||||
#
|
||||
# If the first positional arg resolves to an executable on PATH, we assume the
|
||||
# caller wants to run it directly (needed by the launcher which runs long-lived
|
||||
# `sleep infinity` sandbox containers — see tools/environments/docker.py).
|
||||
# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
|
||||
# preserving the documented `docker run <image> <subcommand>` behavior.
|
||||
if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
|
||||
exec "$@"
|
||||
fi
|
||||
exec hermes "$@"
|
||||
|
||||
Binary file not shown.
+3
-10
@@ -135,7 +135,7 @@ class SessionResetPolicy:
|
||||
mode=mode if mode is not None else "both",
|
||||
at_hour=at_hour if at_hour is not None else 4,
|
||||
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
|
||||
notify=_coerce_bool(notify, True),
|
||||
notify=notify if notify is not None else True,
|
||||
notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
|
||||
)
|
||||
|
||||
@@ -178,7 +178,7 @@ class PlatformConfig:
|
||||
home_channel = HomeChannel.from_dict(data["home_channel"])
|
||||
|
||||
return cls(
|
||||
enabled=_coerce_bool(data.get("enabled"), False),
|
||||
enabled=data.get("enabled", False),
|
||||
token=data.get("token"),
|
||||
api_key=data.get("api_key"),
|
||||
home_channel=home_channel,
|
||||
@@ -435,7 +435,7 @@ class GatewayConfig:
|
||||
reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
|
||||
quick_commands=quick_commands,
|
||||
sessions_dir=sessions_dir,
|
||||
always_log_local=_coerce_bool(data.get("always_log_local"), True),
|
||||
always_log_local=data.get("always_log_local", True),
|
||||
stt_enabled=_coerce_bool(stt_enabled, True),
|
||||
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
|
||||
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
|
||||
@@ -616,8 +616,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(frc, list):
|
||||
frc = ",".join(str(v) for v in frc)
|
||||
os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
|
||||
if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
|
||||
os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()
|
||||
|
||||
# Discord settings → env vars (env vars take precedence)
|
||||
discord_cfg = yaml_cfg.get("discord", {})
|
||||
@@ -687,11 +685,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
|
||||
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
|
||||
if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
|
||||
gac = telegram_cfg["group_allowed_chats"]
|
||||
if isinstance(gac, list):
|
||||
gac = ",".join(str(v) for v in gac)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
|
||||
if "disable_link_previews" in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
|
||||
+14
-60
@@ -21,7 +21,6 @@ Errors in hooks are caught and logged but never block the main pipeline.
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
@@ -104,28 +103,16 @@ class HookRegistry:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module.
|
||||
# Register in sys.modules BEFORE exec_module so Pydantic /
|
||||
# dataclasses / typing introspection can resolve forward
|
||||
# references (triggered by `from __future__ import annotations`
|
||||
# in the handler). Without this, a handler that declares a
|
||||
# Pydantic BaseModel for webhook/event payloads fails at first
|
||||
# dispatch with "TypeAdapter ... is not fully defined".
|
||||
module_name = f"hermes_hook_{hook_name}"
|
||||
# Dynamically load the handler module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name, handler_path
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
try:
|
||||
spec.loader.exec_module(module)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
@@ -148,22 +135,9 @@ class HookRegistry:
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)
|
||||
|
||||
def _resolve_handlers(self, event_type: str) -> List[Callable]:
|
||||
"""Return all handlers that should fire for ``event_type``.
|
||||
|
||||
Exact matches fire first, followed by wildcard matches (e.g.
|
||||
``command:*`` matches ``command:reset``).
|
||||
"""
|
||||
handlers = list(self._handlers.get(event_type, []))
|
||||
if ":" in event_type:
|
||||
base = event_type.split(":")[0]
|
||||
wildcard_key = f"{base}:*"
|
||||
handlers.extend(self._handlers.get(wildcard_key, []))
|
||||
return handlers
|
||||
|
||||
async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""
|
||||
Fire all handlers registered for an event, discarding return values.
|
||||
Fire all handlers registered for an event.
|
||||
|
||||
Supports wildcard matching: handlers registered for "command:*" will
|
||||
fire for any "command:..." event. Handlers registered for a base type
|
||||
@@ -177,7 +151,16 @@ class HookRegistry:
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
for fn in self._resolve_handlers(event_type):
|
||||
# Collect handlers: exact match + wildcard match
|
||||
handlers = list(self._handlers.get(event_type, []))
|
||||
|
||||
# Check for wildcard patterns (e.g., "command:*" matches "command:reset")
|
||||
if ":" in event_type:
|
||||
base = event_type.split(":")[0]
|
||||
wildcard_key = f"{base}:*"
|
||||
handlers.extend(self._handlers.get(wildcard_key, []))
|
||||
|
||||
for fn in handlers:
|
||||
try:
|
||||
result = fn(event_type, context)
|
||||
# Support both sync and async handlers
|
||||
@@ -185,32 +168,3 @@ class HookRegistry:
|
||||
await result
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
|
||||
|
||||
async def emit_collect(
|
||||
self,
|
||||
event_type: str,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
) -> List[Any]:
|
||||
"""Fire handlers and return their non-None return values in order.
|
||||
|
||||
Like :meth:`emit` but captures each handler's return value. Used for
|
||||
decision-style hooks (e.g. ``command:<name>`` policies that want to
|
||||
allow/deny/rewrite the command before normal dispatch).
|
||||
|
||||
Exceptions from individual handlers are logged but do not abort the
|
||||
remaining handlers.
|
||||
"""
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
results: List[Any] = []
|
||||
for fn in self._resolve_handlers(event_type):
|
||||
try:
|
||||
result = fn(event_type, context)
|
||||
if asyncio.iscoroutine(result):
|
||||
result = await result
|
||||
if result is not None:
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
|
||||
return results
|
||||
|
||||
+22
-150
@@ -9,7 +9,6 @@ Exposes an HTTP server with endpoints:
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- POST /v1/runs — start a run, returns run_id immediately (202)
|
||||
- GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- GET /health — health check
|
||||
- GET /health/detailed — rich status for cross-container dashboard probing
|
||||
|
||||
@@ -587,9 +586,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
|
||||
# Creation timestamps for orphaned-run TTL sweep
|
||||
self._run_streams_created: Dict[str, float] = {}
|
||||
# Active run agent/task references for stop support
|
||||
self._active_run_agents: Dict[str, Any] = {}
|
||||
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
|
||||
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
|
||||
|
||||
@staticmethod
|
||||
@@ -1208,12 +1204,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
If the client disconnects mid-stream, ``agent.interrupt()`` is
|
||||
called so the agent stops issuing upstream LLM calls, then the
|
||||
asyncio task is cancelled. When ``store=True`` an initial
|
||||
``in_progress`` snapshot is persisted immediately after
|
||||
``response.created`` and disconnects update it to an
|
||||
``incomplete`` snapshot so GET /v1/responses/{id} and
|
||||
``previous_response_id`` chaining still have something to
|
||||
recover from.
|
||||
asyncio task is cancelled. When ``store=True`` the full response
|
||||
is persisted to the ResponseStore in a ``finally`` block so GET
|
||||
/v1/responses/{id} and ``previous_response_id`` chaining work the
|
||||
same as the batch path.
|
||||
"""
|
||||
import queue as _q
|
||||
|
||||
@@ -1275,60 +1269,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
final_response_text = ""
|
||||
agent_error: Optional[str] = None
|
||||
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
terminal_snapshot_persisted = False
|
||||
|
||||
def _persist_response_snapshot(
|
||||
response_env: Dict[str, Any],
|
||||
*,
|
||||
conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
if not store:
|
||||
return
|
||||
if conversation_history_snapshot is None:
|
||||
conversation_history_snapshot = list(conversation_history)
|
||||
conversation_history_snapshot.append({"role": "user", "content": user_message})
|
||||
self._response_store.put(response_id, {
|
||||
"response": response_env,
|
||||
"conversation_history": conversation_history_snapshot,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
def _persist_incomplete_if_needed() -> None:
|
||||
"""Persist an ``incomplete`` snapshot if no terminal one was written.
|
||||
|
||||
Called from both the client-disconnect (``ConnectionResetError``)
|
||||
and server-cancellation (``asyncio.CancelledError``) paths so
|
||||
GET /v1/responses/{id} and ``previous_response_id`` chaining keep
|
||||
working after abrupt stream termination.
|
||||
"""
|
||||
if not store or terminal_snapshot_persisted:
|
||||
return
|
||||
incomplete_text = "".join(final_text_parts) or final_response_text
|
||||
incomplete_items: List[Dict[str, Any]] = list(emitted_items)
|
||||
if incomplete_text:
|
||||
incomplete_items.append({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": incomplete_text}],
|
||||
})
|
||||
incomplete_env = _envelope("incomplete")
|
||||
incomplete_env["output"] = incomplete_items
|
||||
incomplete_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
incomplete_history = list(conversation_history)
|
||||
incomplete_history.append({"role": "user", "content": user_message})
|
||||
if incomplete_text:
|
||||
incomplete_history.append({"role": "assistant", "content": incomplete_text})
|
||||
_persist_response_snapshot(
|
||||
incomplete_env,
|
||||
conversation_history_snapshot=incomplete_history,
|
||||
)
|
||||
|
||||
try:
|
||||
# response.created — initial envelope, status=in_progress
|
||||
@@ -1338,7 +1278,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"type": "response.created",
|
||||
"response": created_env,
|
||||
})
|
||||
_persist_response_snapshot(created_env)
|
||||
last_activity = time.monotonic()
|
||||
|
||||
async def _open_message_item() -> None:
|
||||
@@ -1595,18 +1534,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
_failed_history = list(conversation_history)
|
||||
_failed_history.append({"role": "user", "content": user_message})
|
||||
if final_response_text or agent_error:
|
||||
_failed_history.append({
|
||||
"role": "assistant",
|
||||
"content": final_response_text or agent_error,
|
||||
})
|
||||
_persist_response_snapshot(
|
||||
failed_env,
|
||||
conversation_history_snapshot=_failed_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.failed", {
|
||||
"type": "response.failed",
|
||||
"response": failed_env,
|
||||
@@ -1619,24 +1546,30 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
_persist_response_snapshot(
|
||||
completed_env,
|
||||
conversation_history_snapshot=full_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.completed", {
|
||||
"type": "response.completed",
|
||||
"response": completed_env,
|
||||
})
|
||||
|
||||
# Persist for future chaining / GET retrieval, mirroring
|
||||
# the batch path behavior.
|
||||
if store:
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
self._response_store.put(response_id, {
|
||||
"response": completed_env,
|
||||
"conversation_history": full_history,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
|
||||
_persist_incomplete_if_needed()
|
||||
# Client disconnected — interrupt the agent so it stops
|
||||
# making upstream LLM calls, then cancel the task.
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
@@ -1652,22 +1585,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
|
||||
except asyncio.CancelledError:
|
||||
# Server-side cancellation (e.g. shutdown, request timeout) —
|
||||
# persist an incomplete snapshot so GET /v1/responses/{id} and
|
||||
# previous_response_id chaining still work, then re-raise so the
|
||||
# runtime's cancellation semantics are respected.
|
||||
_persist_incomplete_if_needed()
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("SSE task cancelled")
|
||||
except Exception:
|
||||
pass
|
||||
if not agent_task.done():
|
||||
agent_task.cancel()
|
||||
logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
|
||||
raise
|
||||
|
||||
return response
|
||||
|
||||
@@ -2445,7 +2362,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
stream_delta_callback=_text_cb,
|
||||
tool_progress_callback=event_cb,
|
||||
)
|
||||
self._active_run_agents[run_id] = agent
|
||||
def _run_sync():
|
||||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
@@ -2485,11 +2401,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
q.put_nowait(None)
|
||||
except Exception:
|
||||
pass
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
task = asyncio.create_task(_run_and_close())
|
||||
self._active_run_tasks[run_id] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
@@ -2548,44 +2461,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
run_id = request.match_info["run_id"]
|
||||
agent = self._active_run_agents.get(run_id)
|
||||
task = self._active_run_tasks.get(run_id)
|
||||
|
||||
if agent is None and task is None:
|
||||
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
|
||||
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("Stop requested via API")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if task is not None and not task.done():
|
||||
task.cancel()
|
||||
# Bounded wait: run_conversation() executes in the default
|
||||
# executor thread which task.cancel() cannot preempt — we rely on
|
||||
# agent.interrupt() above to break the loop. Cap the wait so a
|
||||
# slow/unresponsive interrupt can't hang this handler.
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[api_server] stop for run %s timed out after 5s; "
|
||||
"agent may still be finishing the current step",
|
||||
run_id,
|
||||
)
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "stopping"})
|
||||
|
||||
async def _sweep_orphaned_runs(self) -> None:
|
||||
"""Periodically clean up run streams that were never consumed."""
|
||||
while True:
|
||||
@@ -2600,8 +2475,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
logger.debug("[api_server] sweeping orphaned run %s", run_id)
|
||||
self._run_streams.pop(run_id, None)
|
||||
self._run_streams_created.pop(run_id, None)
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BasePlatformAdapter interface
|
||||
@@ -2637,7 +2510,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# Structured event streaming
|
||||
self._app.router.add_post("/v1/runs", self._handle_runs)
|
||||
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
|
||||
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
|
||||
# Start background sweep to clean up orphaned (unconsumed) run streams
|
||||
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
|
||||
try:
|
||||
|
||||
+31
-421
@@ -19,8 +19,6 @@ import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from utils import normalize_proxy_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -148,102 +146,7 @@ def _detect_macos_system_proxy() -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def _split_host_port(value: str) -> tuple[str, int | None]:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
return "", None
|
||||
if "://" in raw:
|
||||
parsed = urlsplit(raw)
|
||||
return (parsed.hostname or "").lower().rstrip("."), parsed.port
|
||||
if raw.startswith("[") and "]" in raw:
|
||||
host, _, rest = raw[1:].partition("]")
|
||||
port = None
|
||||
if rest.startswith(":") and rest[1:].isdigit():
|
||||
port = int(rest[1:])
|
||||
return host.lower().rstrip("."), port
|
||||
if raw.count(":") == 1:
|
||||
host, _, maybe_port = raw.rpartition(":")
|
||||
if maybe_port.isdigit():
|
||||
return host.lower().rstrip("."), int(maybe_port)
|
||||
return raw.lower().strip("[]").rstrip("."), None
|
||||
|
||||
|
||||
def _no_proxy_entries() -> list[str]:
|
||||
entries: list[str] = []
|
||||
for key in ("NO_PROXY", "no_proxy"):
|
||||
raw = os.environ.get(key, "")
|
||||
entries.extend(part.strip() for part in raw.split(",") if part.strip())
|
||||
return entries
|
||||
|
||||
|
||||
def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
|
||||
token = str(entry or "").strip().lower()
|
||||
if not token:
|
||||
return False
|
||||
if token == "*":
|
||||
return True
|
||||
|
||||
token_host, token_port = _split_host_port(token)
|
||||
if token_port is not None and port is not None and token_port != port:
|
||||
return False
|
||||
if token_port is not None and port is None:
|
||||
return False
|
||||
if not token_host:
|
||||
return False
|
||||
|
||||
try:
|
||||
network = ipaddress.ip_network(token_host, strict=False)
|
||||
try:
|
||||
return ipaddress.ip_address(host) in network
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
token_ip = ipaddress.ip_address(token_host)
|
||||
try:
|
||||
return ipaddress.ip_address(host) == token_ip
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if token_host.startswith("*."):
|
||||
suffix = token_host[1:]
|
||||
return host.endswith(suffix)
|
||||
if token_host.startswith("."):
|
||||
return host == token_host[1:] or host.endswith(token_host)
|
||||
return host == token_host or host.endswith(f".{token_host}")
|
||||
|
||||
|
||||
def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
|
||||
"""Return True when NO_PROXY/no_proxy matches at least one target host.
|
||||
|
||||
Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
|
||||
CIDR ranges, optional host:port entries, and ``*``.
|
||||
"""
|
||||
entries = _no_proxy_entries()
|
||||
if not entries or not target_hosts:
|
||||
return False
|
||||
if isinstance(target_hosts, str):
|
||||
candidates = [target_hosts]
|
||||
else:
|
||||
candidates = list(target_hosts)
|
||||
for candidate in candidates:
|
||||
host, port = _split_host_port(str(candidate))
|
||||
if not host:
|
||||
continue
|
||||
if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def resolve_proxy_url(
|
||||
platform_env_var: str | None = None,
|
||||
*,
|
||||
target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
|
||||
) -> str | None:
|
||||
def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
"""Return a proxy URL from env vars, or macOS system proxy.
|
||||
|
||||
Check order:
|
||||
@@ -251,26 +154,18 @@ def resolve_proxy_url(
|
||||
1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
|
||||
2. macOS system proxy via ``scutil --proxy`` (auto-detect)
|
||||
|
||||
Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
|
||||
of ``target_hosts``.
|
||||
Returns *None* if no proxy is found.
|
||||
"""
|
||||
if platform_env_var:
|
||||
value = (os.environ.get(platform_env_var) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
return value
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
detected = normalize_proxy_url(_detect_macos_system_proxy())
|
||||
if detected and should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return detected
|
||||
return value
|
||||
return _detect_macos_system_proxy()
|
||||
|
||||
|
||||
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
@@ -855,10 +750,7 @@ class MessageEvent:
|
||||
if not self.is_command():
|
||||
return self.text
|
||||
parts = self.text.split(maxsplit=1)
|
||||
args = parts[1] if len(parts) > 1 else ""
|
||||
# iOS auto-corrects -- to — (em dash) and - to – (en dash)
|
||||
args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
|
||||
return args
|
||||
return parts[1] if len(parts) > 1 else ""
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -1003,16 +895,10 @@ class BasePlatformAdapter(ABC):
|
||||
self._fatal_error_retryable = True
|
||||
self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
|
||||
|
||||
# Track active message handlers per session for interrupt support.
|
||||
# _active_sessions stores the per-session interrupt Event; _session_tasks
|
||||
# maps session → the specific Task currently processing it so that
|
||||
# session-terminating commands (/stop, /new, /reset) can cancel the
|
||||
# right task and release the adapter-level guard deterministically.
|
||||
# Without the owner-task map, an old task's finally block could delete
|
||||
# a newer task's guard, leaving stale busy state.
|
||||
# Track active message handlers per session for interrupt support
|
||||
# Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
|
||||
self._active_sessions: Dict[str, asyncio.Event] = {}
|
||||
self._pending_messages: Dict[str, MessageEvent] = {}
|
||||
self._session_tasks: Dict[str, asyncio.Task] = {}
|
||||
# Background message-processing tasks spawned by handle_message().
|
||||
# Gateway shutdown cancels these so an old gateway instance doesn't keep
|
||||
# working on a task after --replace or manual restarts.
|
||||
@@ -1025,20 +911,7 @@ class BasePlatformAdapter(ABC):
|
||||
self._post_delivery_callbacks: Dict[str, Any] = {}
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
|
||||
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
|
||||
# Per-chat overrides live in two sets populated from ``_voice_mode``:
|
||||
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
|
||||
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
|
||||
# the global default is False.
|
||||
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
|
||||
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
|
||||
# global default is True.
|
||||
# The gate in _process_message() is:
|
||||
# fire if chat in _auto_tts_enabled_chats
|
||||
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
|
||||
self._auto_tts_default: bool = False
|
||||
self._auto_tts_enabled_chats: set = set()
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
self._auto_tts_disabled_chats: set = set()
|
||||
# Chats where typing indicator is paused (e.g. during approval waits).
|
||||
# _keep_typing skips send_typing when the chat_id is in this set.
|
||||
@@ -1060,21 +933,6 @@ class BasePlatformAdapter(ABC):
|
||||
def fatal_error_retryable(self) -> bool:
|
||||
return self._fatal_error_retryable
|
||||
|
||||
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
|
||||
"""Whether auto-TTS on voice input should fire for ``chat_id``.
|
||||
|
||||
Decision layers (Issue #16007):
|
||||
1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
|
||||
``voice.auto_tts`` is False).
|
||||
2. Explicit ``/voice off`` → never fire.
|
||||
3. Fall back to the global ``voice.auto_tts`` config default.
|
||||
"""
|
||||
if chat_id in self._auto_tts_enabled_chats:
|
||||
return True
|
||||
if chat_id in self._auto_tts_disabled_chats:
|
||||
return False
|
||||
return bool(self._auto_tts_default)
|
||||
|
||||
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
|
||||
self._fatal_error_handler = handler
|
||||
|
||||
@@ -1483,7 +1341,7 @@ class BasePlatformAdapter(ABC):
|
||||
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
|
||||
# and quoted/backticked paths for LLM-formatted outputs.
|
||||
media_pattern = re.compile(
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
|
||||
)
|
||||
for match in media_pattern.finditer(content):
|
||||
path = match.group("path").strip()
|
||||
@@ -1817,222 +1675,6 @@ class BasePlatformAdapter(ABC):
|
||||
return f"{existing_text}\n\n{new_text}".strip()
|
||||
return existing_text
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session task + guard ownership helpers
|
||||
# ------------------------------------------------------------------
|
||||
# These were introduced together with the _session_tasks owner map to
|
||||
# make session lifecycle reconciliation deterministic across (a) the
|
||||
# normal completion path, (b) /stop/ /new/ /reset bypass commands,
|
||||
# and (c) stale-lock self-heal on the next inbound message.
|
||||
|
||||
def _release_session_guard(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
guard: Optional[asyncio.Event] = None,
|
||||
) -> None:
|
||||
"""Release the adapter-level guard for a session.
|
||||
|
||||
When ``guard`` is provided, only release the entry if it still points
|
||||
at that exact Event. This lets reset-like commands swap in a temporary
|
||||
guard while the old processing task unwinds, without having the old
|
||||
task's cleanup accidentally clear the replacement guard.
|
||||
"""
|
||||
current_guard = self._active_sessions.get(session_key)
|
||||
if current_guard is None:
|
||||
return
|
||||
if guard is not None and current_guard is not guard:
|
||||
return
|
||||
del self._active_sessions[session_key]
|
||||
|
||||
def _session_task_is_stale(self, session_key: str) -> bool:
|
||||
"""Return True if the owner task for ``session_key`` is done/cancelled.
|
||||
|
||||
A lock is "stale" when the adapter still has ``_active_sessions[key]``
|
||||
AND a known owner task in ``_session_tasks`` that has already exited.
|
||||
When there is no owner task at all, that usually means the guard was
|
||||
installed by some path other than handle_message() (tests sometimes
|
||||
install guards directly) — don't treat that as stale. The on-entry
|
||||
self-heal only needs to handle the production split-brain case where
|
||||
an owner task was recorded, then exited without clearing its guard.
|
||||
"""
|
||||
task = self._session_tasks.get(session_key)
|
||||
if task is None:
|
||||
return False
|
||||
done = getattr(task, "done", None)
|
||||
return bool(done and done())
|
||||
|
||||
def _heal_stale_session_lock(self, session_key: str) -> bool:
|
||||
"""Clear a stale session lock if the owner task is already gone.
|
||||
|
||||
Returns True if a stale lock was healed. Returns False if there is
|
||||
no lock, or the owner task is still alive (the normal busy case).
|
||||
|
||||
This is the on-entry safety net sidbin's issue #11016 analysis calls
|
||||
for: without it, a split-brain — adapter still thinks the session is
|
||||
active, but nothing is actually processing — traps the chat in
|
||||
infinite "Interrupting current task..." until the gateway is
|
||||
restarted.
|
||||
"""
|
||||
if session_key not in self._active_sessions:
|
||||
return False
|
||||
if not self._session_task_is_stale(session_key):
|
||||
return False
|
||||
logger.warning(
|
||||
"[%s] Healing stale session lock for %s (owner task is done/absent)",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
self._active_sessions.pop(session_key, None)
|
||||
self._pending_messages.pop(session_key, None)
|
||||
self._session_tasks.pop(session_key, None)
|
||||
return True
|
||||
|
||||
def _start_session_processing(
|
||||
self,
|
||||
event: MessageEvent,
|
||||
session_key: str,
|
||||
*,
|
||||
interrupt_event: Optional[asyncio.Event] = None,
|
||||
) -> bool:
|
||||
"""Spawn a background processing task under the given session guard.
|
||||
|
||||
Returns True on success. If the runtime stubs ``create_task`` with a
|
||||
non-Task sentinel (some tests do this), the guard is rolled back and
|
||||
False is returned so the caller isn't left holding a half-installed
|
||||
session lock.
|
||||
"""
|
||||
guard = interrupt_event or asyncio.Event()
|
||||
self._active_sessions[session_key] = guard
|
||||
|
||||
task = asyncio.create_task(self._process_message_background(event, session_key))
|
||||
self._session_tasks[session_key] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
# Tests stub create_task() with lightweight sentinels that are not
|
||||
# hashable and do not support lifecycle callbacks.
|
||||
self._session_tasks.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=guard)
|
||||
return False
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
return True
|
||||
|
||||
async def cancel_session_processing(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
release_guard: bool = True,
|
||||
discard_pending: bool = True,
|
||||
) -> None:
|
||||
"""Cancel in-flight processing for a single session.
|
||||
|
||||
``release_guard=False`` keeps the adapter-level session guard in place
|
||||
so reset-like commands can finish atomically before follow-up messages
|
||||
are allowed to start a fresh background task.
|
||||
"""
|
||||
task = self._session_tasks.pop(session_key, None)
|
||||
if task is not None and not task.done():
|
||||
logger.debug(
|
||||
"[%s] Cancelling active processing for session %s",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Session cancellation raised while unwinding %s",
|
||||
self.name,
|
||||
session_key,
|
||||
exc_info=True,
|
||||
)
|
||||
if discard_pending:
|
||||
self._pending_messages.pop(session_key, None)
|
||||
if release_guard:
|
||||
self._release_session_guard(session_key)
|
||||
|
||||
async def _drain_pending_after_session_command(
|
||||
self,
|
||||
session_key: str,
|
||||
command_guard: asyncio.Event,
|
||||
) -> None:
|
||||
"""Resume the latest queued follow-up once a session command completes.
|
||||
|
||||
Called at the tail of /stop, /new, and /reset dispatch. Releases the
|
||||
command-scoped guard, then — if a follow-up message landed while the
|
||||
command was running — spawns a fresh processing task for it.
|
||||
"""
|
||||
pending_event = self._pending_messages.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
if pending_event is None:
|
||||
return
|
||||
self._start_session_processing(pending_event, session_key)
|
||||
|
||||
async def _dispatch_active_session_command(
|
||||
self,
|
||||
event: MessageEvent,
|
||||
session_key: str,
|
||||
cmd: str,
|
||||
) -> None:
|
||||
"""Dispatch a reset-like bypass command while preserving guard ordering.
|
||||
|
||||
/stop, /new, and /reset must:
|
||||
1. Keep the session guard installed while the runner processes the
|
||||
command (so a racing follow-up message stays queued, not
|
||||
dispatched as a second parallel run).
|
||||
2. Cancel the old in-flight adapter task only AFTER the runner has
|
||||
finished handling the command (so the runner sees consistent
|
||||
state and its response is sent in order).
|
||||
3. Release the command-scoped guard and drain the latest queued
|
||||
follow-up exactly once, after 1 and 2 complete.
|
||||
"""
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name,
|
||||
cmd,
|
||||
session_key,
|
||||
)
|
||||
|
||||
current_guard = self._active_sessions.get(session_key)
|
||||
command_guard = asyncio.Event()
|
||||
self._active_sessions[session_key] = command_guard
|
||||
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
|
||||
try:
|
||||
response = await self._message_handler(event)
|
||||
# Old adapter task (if any) is cancelled AFTER the runner has
|
||||
# fully handled the command — keeps ordering deterministic.
|
||||
await self.cancel_session_processing(
|
||||
session_key,
|
||||
release_guard=False,
|
||||
discard_pending=False,
|
||||
)
|
||||
if response:
|
||||
await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=response,
|
||||
reply_to=event.message_id,
|
||||
metadata=thread_meta,
|
||||
)
|
||||
except Exception:
|
||||
# On failure, restore the original guard if one still exists so
|
||||
# we don't leave the session in a half-reset state.
|
||||
if self._active_sessions.get(session_key) is command_guard:
|
||||
if session_key in self._session_tasks and current_guard is not None:
|
||||
self._active_sessions[session_key] = current_guard
|
||||
else:
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
raise
|
||||
|
||||
await self._drain_pending_after_session_command(session_key, command_guard)
|
||||
|
||||
async def handle_message(self, event: MessageEvent) -> None:
|
||||
"""
|
||||
Process an incoming message.
|
||||
@@ -2049,15 +1691,7 @@ class BasePlatformAdapter(ABC):
|
||||
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
|
||||
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
|
||||
)
|
||||
|
||||
# On-entry self-heal: if the adapter still has an _active_sessions
|
||||
# entry for this key but the owner task has already exited (done or
|
||||
# cancelled), the lock is stale. Clear it and fall through to
|
||||
# normal dispatch so the user isn't trapped behind a dead guard —
|
||||
# this is the split-brain tail described in issue #11016.
|
||||
if session_key in self._active_sessions:
|
||||
self._heal_stale_session_lock(session_key)
|
||||
|
||||
|
||||
# Check if there's already an active handler for this session
|
||||
if session_key in self._active_sessions:
|
||||
# Certain commands must bypass the active-session guard and be
|
||||
@@ -2074,23 +1708,6 @@ class BasePlatformAdapter(ABC):
|
||||
from hermes_cli.commands import should_bypass_active_session
|
||||
|
||||
if should_bypass_active_session(cmd):
|
||||
# /stop, /new, /reset must cancel the in-flight adapter task
|
||||
# and preserve ordering of queued follow-ups. Route those
|
||||
# through the dedicated handoff path that serializes
|
||||
# cancellation + runner response + pending drain.
|
||||
if cmd in ("stop", "new", "reset"):
|
||||
try:
|
||||
await self._dispatch_active_session_command(event, session_key, cmd)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[%s] Command '/%s' dispatch failed: %s",
|
||||
self.name, cmd, e, exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
# Other bypass commands (/approve, /deny, /status,
|
||||
# /background, /restart) just need direct dispatch — they
|
||||
# don't cancel the running task.
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name, cmd, session_key,
|
||||
@@ -2136,9 +1753,19 @@ class BasePlatformAdapter(ABC):
|
||||
# starts would also pass the _active_sessions check and spawn a
|
||||
# duplicate task. (grammY sequentialize / aiogram EventIsolation
|
||||
# pattern — set the guard synchronously, not inside the task.)
|
||||
# _start_session_processing installs the guard AND the owner-task
|
||||
# mapping atomically so stale-lock detection works.
|
||||
self._start_session_processing(event, session_key)
|
||||
self._active_sessions[session_key] = asyncio.Event()
|
||||
|
||||
# Spawn background task to process this message
|
||||
task = asyncio.create_task(self._process_message_background(event, session_key))
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
# Some tests stub create_task() with lightweight sentinels that are not
|
||||
# hashable and do not support lifecycle callbacks.
|
||||
return
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
@@ -2242,14 +1869,12 @@ class BasePlatformAdapter(ABC):
|
||||
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
|
||||
|
||||
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
|
||||
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
|
||||
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
|
||||
# True globally and no ``/voice off`` has been issued.
|
||||
# Skipped when the chat has voice mode disabled (/voice off)
|
||||
_tts_path = None
|
||||
if (self._should_auto_tts_for_chat(event.source.chat_id)
|
||||
and event.message_type == MessageType.VOICE
|
||||
if (event.message_type == MessageType.VOICE
|
||||
and text_content
|
||||
and not media_files):
|
||||
and not media_files
|
||||
and event.source.chat_id not in self._auto_tts_disabled_chats):
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
@@ -2500,9 +2125,6 @@ class BasePlatformAdapter(ABC):
|
||||
drain_task = asyncio.create_task(
|
||||
self._process_message_background(late_pending, session_key)
|
||||
)
|
||||
# Hand ownership of the session to the drain task so stale-lock
|
||||
# detection keeps working while it runs.
|
||||
self._session_tasks[session_key] = drain_task
|
||||
try:
|
||||
self._background_tasks.add(drain_task)
|
||||
drain_task.add_done_callback(self._background_tasks.discard)
|
||||
@@ -2512,14 +2134,9 @@ class BasePlatformAdapter(ABC):
|
||||
# Leave _active_sessions[session_key] populated — the drain
|
||||
# task's own lifecycle will clean it up.
|
||||
else:
|
||||
# Clean up session tracking. Guard-match both deletes so a
|
||||
# reset-like command that already swapped in its own
|
||||
# command_guard (and cancelled us) can't be accidentally
|
||||
# cleared by our unwind. The command owns the session now.
|
||||
current_task = asyncio.current_task()
|
||||
if current_task is not None and self._session_tasks.get(session_key) is current_task:
|
||||
del self._session_tasks[session_key]
|
||||
self._release_session_guard(session_key, guard=interrupt_event)
|
||||
# Clean up session tracking
|
||||
if session_key in self._active_sessions:
|
||||
del self._active_sessions[session_key]
|
||||
|
||||
async def cancel_background_tasks(self) -> None:
|
||||
"""Cancel any in-flight background message-processing tasks.
|
||||
@@ -2549,7 +2166,6 @@ class BasePlatformAdapter(ABC):
|
||||
# will be in self._background_tasks now. Re-check.
|
||||
self._background_tasks.clear()
|
||||
self._expected_cancelled_tasks.clear()
|
||||
self._session_tasks.clear()
|
||||
self._pending_messages.clear()
|
||||
self._active_sessions.clear()
|
||||
|
||||
@@ -2573,9 +2189,6 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt: Optional[str] = None,
|
||||
chat_id_alt: Optional[str] = None,
|
||||
is_bot: bool = False,
|
||||
guild_id: Optional[str] = None,
|
||||
parent_chat_id: Optional[str] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> SessionSource:
|
||||
"""Helper to build a SessionSource for this platform."""
|
||||
# Normalize empty topic to None
|
||||
@@ -2593,9 +2206,6 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt=user_id_alt,
|
||||
chat_id_alt=chat_id_alt,
|
||||
is_bot=is_bot,
|
||||
guild_id=str(guild_id) if guild_id else None,
|
||||
parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
|
||||
message_id=str(message_id) if message_id else None,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -99,7 +99,6 @@ def _normalize_server_url(raw: str) -> str:
|
||||
|
||||
class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
platform = Platform.BLUEBUBBLES
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
@@ -392,13 +391,6 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
# Text sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
|
||||
# Use the base splitter but skip pagination indicators — iMessage
|
||||
# bubbles flow naturally without "(1/3)" suffixes.
|
||||
chunks = BasePlatformAdapter.truncate_message(content, max_length)
|
||||
return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -406,19 +398,10 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
text = self.format_message(content)
|
||||
text = strip_markdown(content or "")
|
||||
if not text:
|
||||
return SendResult(success=False, error="BlueBubbles send requires text")
|
||||
# Split on paragraph breaks first (double newlines) so each thought
|
||||
# becomes its own iMessage bubble, then truncate any that are still
|
||||
# too long.
|
||||
paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
|
||||
chunks: List[str] = []
|
||||
for para in (paragraphs or [text]):
|
||||
if len(para) <= self.MAX_MESSAGE_LENGTH:
|
||||
chunks.append(para)
|
||||
else:
|
||||
chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
|
||||
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
|
||||
last = SendResult(success=True)
|
||||
for chunk in chunks:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
|
||||
+52
-296
@@ -23,7 +23,6 @@ from typing import Callable, Dict, Optional, Any
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
|
||||
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
|
||||
|
||||
try:
|
||||
import discord
|
||||
@@ -528,7 +527,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# Reply threading mode: "off" (no replies), "first" (reply on first
|
||||
# chunk only, default), "all" (reply-reference on every chunk).
|
||||
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
|
||||
self._slash_commands: bool = self.config.extra.get("slash_commands", True)
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
@@ -746,8 +744,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
)
|
||||
|
||||
# Register slash commands
|
||||
if self._slash_commands:
|
||||
self._register_slash_commands()
|
||||
self._register_slash_commands()
|
||||
|
||||
# Start the bot in background
|
||||
self._bot_task = asyncio.create_task(self._client.start(self.config.token))
|
||||
@@ -803,27 +800,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not self._client:
|
||||
return
|
||||
try:
|
||||
sync_policy = self._get_discord_command_sync_policy()
|
||||
if sync_policy == "off":
|
||||
logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
|
||||
return
|
||||
|
||||
if sync_policy == "bulk":
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
|
||||
return
|
||||
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
|
||||
logger.info(
|
||||
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
|
||||
self.name,
|
||||
summary["total"],
|
||||
summary["unchanged"],
|
||||
summary["updated"],
|
||||
summary["recreated"],
|
||||
summary["created"],
|
||||
summary["deleted"],
|
||||
)
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
|
||||
except asyncio.CancelledError:
|
||||
@@ -831,183 +809,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)
|
||||
|
||||
def _get_discord_command_sync_policy(self) -> str:
|
||||
raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
|
||||
if raw in _DISCORD_COMMAND_SYNC_POLICIES:
|
||||
return raw
|
||||
if raw:
|
||||
logger.warning(
|
||||
"[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
|
||||
self.name,
|
||||
raw,
|
||||
)
|
||||
return "safe"
|
||||
|
||||
def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Reduce command payloads to the semantic fields Hermes manages."""
|
||||
contexts = payload.get("contexts")
|
||||
integration_types = payload.get("integration_types")
|
||||
return {
|
||||
"type": int(payload.get("type", 1) or 1),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"default_member_permissions": self._normalize_permissions(
|
||||
payload.get("default_member_permissions")
|
||||
),
|
||||
"dm_permission": bool(payload.get("dm_permission", True)),
|
||||
"nsfw": bool(payload.get("nsfw", False)),
|
||||
"contexts": sorted(int(c) for c in contexts) if contexts else None,
|
||||
"integration_types": (
|
||||
sorted(int(i) for i in integration_types) if integration_types else None
|
||||
),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_permissions(value: Any) -> Optional[str]:
|
||||
"""Discord emits default_member_permissions as str server-side but discord.py
|
||||
sets it as int locally. Normalize to str-or-None so the comparison is stable."""
|
||||
if value is None:
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
|
||||
"""Build a canonical-ready dict from an AppCommand.
|
||||
|
||||
discord.py's AppCommand.to_dict() does NOT include nsfw,
|
||||
dm_permission, or default_member_permissions (they live only on the
|
||||
attributes). Pull them from the attributes so the canonicalizer sees
|
||||
the real server-side values instead of defaults — otherwise any
|
||||
command using non-default permissions would diff on every startup.
|
||||
"""
|
||||
payload = dict(command.to_dict())
|
||||
nsfw = getattr(command, "nsfw", None)
|
||||
if nsfw is not None:
|
||||
payload["nsfw"] = bool(nsfw)
|
||||
guild_only = getattr(command, "guild_only", None)
|
||||
if guild_only is not None:
|
||||
payload["dm_permission"] = not bool(guild_only)
|
||||
default_permissions = getattr(command, "default_member_permissions", None)
|
||||
if default_permissions is not None:
|
||||
payload["default_member_permissions"] = getattr(
|
||||
default_permissions, "value", default_permissions
|
||||
)
|
||||
return payload
|
||||
|
||||
def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
"type": int(payload.get("type", 0) or 0),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"required": bool(payload.get("required", False)),
|
||||
"autocomplete": bool(payload.get("autocomplete", False)),
|
||||
"choices": [
|
||||
{
|
||||
"name": str(choice.get("name", "") or ""),
|
||||
"value": choice.get("value"),
|
||||
}
|
||||
for choice in payload.get("choices", []) or []
|
||||
if isinstance(choice, dict)
|
||||
],
|
||||
"channel_types": list(payload.get("channel_types", []) or []),
|
||||
"min_value": payload.get("min_value"),
|
||||
"max_value": payload.get("max_value"),
|
||||
"min_length": payload.get("min_length"),
|
||||
"max_length": payload.get("max_length"),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Fields supported by discord.py's edit_global_command route."""
|
||||
canonical = self._canonicalize_app_command_payload(payload)
|
||||
return {
|
||||
"name": canonical["name"],
|
||||
"description": canonical["description"],
|
||||
"options": canonical["options"],
|
||||
}
|
||||
|
||||
async def _safe_sync_slash_commands(self) -> Dict[str, int]:
|
||||
"""Diff existing global commands and only mutate the commands that changed."""
|
||||
if not self._client:
|
||||
return {
|
||||
"total": 0,
|
||||
"unchanged": 0,
|
||||
"updated": 0,
|
||||
"recreated": 0,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
|
||||
tree = self._client.tree
|
||||
app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
|
||||
if not app_id:
|
||||
raise RuntimeError("Discord application ID is unavailable for slash command sync")
|
||||
|
||||
desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
|
||||
desired_by_key = {
|
||||
(int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
|
||||
for payload in desired_payloads
|
||||
}
|
||||
existing_commands = await tree.fetch_commands()
|
||||
existing_by_key = {
|
||||
(
|
||||
int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
|
||||
str(command.name or "").lower(),
|
||||
): command
|
||||
for command in existing_commands
|
||||
}
|
||||
|
||||
unchanged = 0
|
||||
updated = 0
|
||||
recreated = 0
|
||||
created = 0
|
||||
deleted = 0
|
||||
http = self._client.http
|
||||
|
||||
for key, desired in desired_by_key.items():
|
||||
current = existing_by_key.pop(key, None)
|
||||
if current is None:
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
created += 1
|
||||
continue
|
||||
|
||||
current_existing_payload = self._existing_command_to_payload(current)
|
||||
current_payload = self._canonicalize_app_command_payload(current_existing_payload)
|
||||
desired_payload = self._canonicalize_app_command_payload(desired)
|
||||
if current_payload == desired_payload:
|
||||
unchanged += 1
|
||||
continue
|
||||
|
||||
if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
recreated += 1
|
||||
continue
|
||||
|
||||
await http.edit_global_command(app_id, current.id, desired)
|
||||
updated += 1
|
||||
|
||||
for current in existing_by_key.values():
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
deleted += 1
|
||||
|
||||
return {
|
||||
"total": len(desired_payloads),
|
||||
"unchanged": unchanged,
|
||||
"updated": updated,
|
||||
"recreated": recreated,
|
||||
"created": created,
|
||||
"deleted": deleted,
|
||||
}
|
||||
|
||||
async def _add_reaction(self, message: Any, emoji: str) -> bool:
|
||||
"""Add an emoji reaction to a Discord message."""
|
||||
if not message or not hasattr(message, "add_reaction"):
|
||||
@@ -2246,6 +2047,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_usage(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/usage")
|
||||
|
||||
@tree.command(name="provider", description="Show available providers")
|
||||
async def slash_provider(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/provider")
|
||||
|
||||
@tree.command(name="help", description="Show available commands")
|
||||
async def slash_help(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/help")
|
||||
@@ -2315,46 +2120,19 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_background(interaction: discord.Interaction, prompt: str):
|
||||
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
|
||||
|
||||
@tree.command(name="btw", description="Ephemeral side question using session context")
|
||||
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
|
||||
async def slash_btw(interaction: discord.Interaction, question: str):
|
||||
await self._run_simple_slash(interaction, f"/btw {question}")
|
||||
|
||||
# ── Auto-register any gateway-available commands not yet on the tree ──
|
||||
# This ensures new commands added to COMMAND_REGISTRY in
|
||||
# hermes_cli/commands.py automatically appear as Discord slash
|
||||
# commands without needing a manual entry here.
|
||||
def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
|
||||
"""Build a discord.app_commands.Command that proxies to _run_simple_slash."""
|
||||
discord_name = _name.lower()[:32]
|
||||
desc = (_description or f"Run /{_name}")[:100]
|
||||
has_args = bool(_args_hint)
|
||||
|
||||
if has_args:
|
||||
def _make_args_handler(__name: str, __hint: str):
|
||||
@discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
|
||||
async def _handler(interaction: discord.Interaction, args: str = ""):
|
||||
await self._run_simple_slash(
|
||||
interaction, f"/{__name} {args}".strip()
|
||||
)
|
||||
_handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
|
||||
return _handler
|
||||
|
||||
handler = _make_args_handler(_name, _args_hint)
|
||||
else:
|
||||
def _make_simple_handler(__name: str):
|
||||
async def _handler(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, f"/{__name}")
|
||||
_handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
|
||||
return _handler
|
||||
|
||||
handler = _make_simple_handler(_name)
|
||||
|
||||
return discord.app_commands.Command(
|
||||
name=discord_name,
|
||||
description=desc,
|
||||
callback=handler,
|
||||
)
|
||||
|
||||
already_registered: set[str] = set()
|
||||
try:
|
||||
from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates
|
||||
|
||||
already_registered = set()
|
||||
try:
|
||||
already_registered = {cmd.name for cmd in tree.get_commands()}
|
||||
except Exception:
|
||||
@@ -2369,10 +2147,38 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
discord_name = cmd_def.name.lower()[:32]
|
||||
if discord_name in already_registered:
|
||||
continue
|
||||
auto_cmd = _build_auto_slash_command(
|
||||
cmd_def.name,
|
||||
cmd_def.description,
|
||||
cmd_def.args_hint,
|
||||
# Skip aliases that overlap with already-registered names
|
||||
# (aliases for explicitly registered commands are handled above).
|
||||
desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
|
||||
has_args = bool(cmd_def.args_hint)
|
||||
|
||||
if has_args:
|
||||
# Command takes optional arguments — create handler with
|
||||
# an optional ``args`` string parameter.
|
||||
def _make_args_handler(_name: str, _hint: str):
|
||||
@discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
|
||||
async def _handler(interaction: discord.Interaction, args: str = ""):
|
||||
await self._run_simple_slash(
|
||||
interaction, f"/{_name} {args}".strip()
|
||||
)
|
||||
_handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
|
||||
return _handler
|
||||
|
||||
handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
|
||||
else:
|
||||
# Parameterless command.
|
||||
def _make_simple_handler(_name: str):
|
||||
async def _handler(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, f"/{_name}")
|
||||
_handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
|
||||
return _handler
|
||||
|
||||
handler = _make_simple_handler(cmd_def.name)
|
||||
|
||||
auto_cmd = discord.app_commands.Command(
|
||||
name=discord_name,
|
||||
description=desc,
|
||||
callback=handler,
|
||||
)
|
||||
try:
|
||||
tree.add_command(auto_cmd)
|
||||
@@ -2389,35 +2195,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)
|
||||
|
||||
# ── Plugin-registered slash commands ──
|
||||
# Plugins register via PluginContext.register_command(); we mirror
|
||||
# those into Discord's native slash picker so users get the same
|
||||
# autocomplete UX as for built-in commands. No per-platform plugin
|
||||
# API needed — plugin commands are platform-agnostic.
|
||||
try:
|
||||
from hermes_cli.commands import _iter_plugin_command_entries
|
||||
|
||||
for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
|
||||
discord_name = plugin_name.lower()[:32]
|
||||
if discord_name in already_registered:
|
||||
continue
|
||||
auto_cmd = _build_auto_slash_command(
|
||||
plugin_name,
|
||||
plugin_desc,
|
||||
plugin_args_hint,
|
||||
)
|
||||
try:
|
||||
tree.add_command(auto_cmd)
|
||||
already_registered.add(discord_name)
|
||||
except Exception:
|
||||
# Silently skip commands that fail registration (e.g.
|
||||
# name conflict with a subcommand group).
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Discord auto-register from plugin commands failed: %s", e
|
||||
)
|
||||
|
||||
# Register skills under a single /skill command group with category
|
||||
# subcommand groups. This uses 1 top-level slot instead of N,
|
||||
# supporting up to 25 categories × 25 skills = 625 skills.
|
||||
@@ -2710,12 +2487,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
|
||||
|
||||
def _discord_free_response_channels(self) -> set:
|
||||
"""Return Discord channel IDs where no bot mention is required.
|
||||
|
||||
A single ``"*"`` entry (either from a list or a comma-separated
|
||||
string) is preserved in the returned set so callers can short-circuit
|
||||
on wildcard membership, consistent with ``allowed_channels``.
|
||||
"""
|
||||
"""Return Discord channel IDs where no bot mention is required."""
|
||||
raw = self.config.extra.get("free_response_channels")
|
||||
if raw is None:
|
||||
raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
|
||||
@@ -3208,14 +2980,14 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
|
||||
if allowed_channels_raw:
|
||||
allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
|
||||
if "*" not in allowed_channels and not (channel_ids & allowed_channels):
|
||||
if not (channel_ids & allowed_channels):
|
||||
logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
# Check ignored channels - never respond even when mentioned
|
||||
ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
|
||||
if "*" in ignored_channels or (channel_ids & ignored_channels):
|
||||
if channel_ids & ignored_channels:
|
||||
logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
@@ -3229,11 +3001,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
|
||||
current_channel_id = str(message.channel.id)
|
||||
is_voice_linked_channel = current_channel_id in voice_linked_ids
|
||||
is_free_channel = (
|
||||
"*" in free_channels
|
||||
or bool(channel_ids & free_channels)
|
||||
or is_voice_linked_channel
|
||||
)
|
||||
is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel
|
||||
|
||||
# Skip the mention check if the message is in a thread where
|
||||
# the bot has previously participated (auto-created or replied in).
|
||||
@@ -3256,7 +3024,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
parent_channel_id = str(message.channel.id)
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
@@ -3316,9 +3083,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
is_bot=getattr(message.author, "bot", False),
|
||||
guild_id=str(message.guild.id) if message.guild else None,
|
||||
parent_chat_id=parent_channel_id,
|
||||
message_id=str(message.id),
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
@@ -3870,15 +3634,6 @@ if DISCORD_AVAILABLE:
|
||||
|
||||
self.resolved = True
|
||||
model_id = interaction.data["values"][0]
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Switching Model",
|
||||
description=f"Switching to `{model_id}`...",
|
||||
color=discord.Color.blue(),
|
||||
),
|
||||
view=None,
|
||||
)
|
||||
|
||||
try:
|
||||
result_text = await self.on_model_selected(
|
||||
@@ -3889,13 +3644,14 @@ if DISCORD_AVAILABLE:
|
||||
except Exception as exc:
|
||||
result_text = f"Error switching model: {exc}"
|
||||
|
||||
await interaction.edit_original_response(
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Model Switched",
|
||||
description=result_text,
|
||||
color=discord.Color.green(),
|
||||
),
|
||||
view=None,
|
||||
view=self,
|
||||
)
|
||||
|
||||
async def _on_back(self, interaction: discord.Interaction):
|
||||
|
||||
@@ -545,7 +545,6 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
"""Send a file as an email attachment."""
|
||||
try:
|
||||
|
||||
+80
-349
@@ -14,35 +14,6 @@ Supports:
|
||||
- Interactive card button-click events routed as synthetic COMMAND events
|
||||
- Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
|
||||
- Verification token validation as second auth layer (matches openclaw)
|
||||
|
||||
Feishu identity model
|
||||
---------------------
|
||||
Feishu uses three user-ID tiers (official docs:
|
||||
https://open.feishu.cn/document/home/user-identity-introduction/introduction):
|
||||
|
||||
open_id (ou_xxx) — **App-scoped**. The same person gets a different
|
||||
open_id under each Feishu app. Always available in
|
||||
event payloads without extra permissions.
|
||||
user_id (u_xxx) — **Tenant-scoped**. Stable within a company but
|
||||
requires the ``contact:user.employee_id:readonly``
|
||||
scope. May not be present.
|
||||
union_id (on_xxx) — **Developer-scoped**. Same across all apps owned by
|
||||
one developer/ISV. Best cross-app stable ID.
|
||||
|
||||
For bots specifically:
|
||||
|
||||
app_id — The application's canonical credential identifier.
|
||||
bot open_id — Returned by ``/bot/v3/info``. This is the bot's own
|
||||
open_id *within its app context* and is what Feishu
|
||||
puts in ``mentions[].id.open_id`` when someone
|
||||
@-mentions the bot. Used for mention gating only.
|
||||
|
||||
In single-bot mode (what Hermes currently supports), open_id works as a
|
||||
de-facto unique user identifier since there is only one app context.
|
||||
|
||||
Session-key participant isolation prefers ``union_id`` (via user_id_alt)
|
||||
over ``open_id`` (via user_id) so that sessions stay stable if the same
|
||||
user is seen through different apps in the future.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -64,7 +35,7 @@ from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Sequence
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlencode
|
||||
from urllib.request import Request, urlopen
|
||||
@@ -102,9 +73,7 @@ try:
|
||||
UpdateMessageRequest,
|
||||
UpdateMessageRequestBody,
|
||||
)
|
||||
from lark_oapi.core import AccessTokenType, HttpMethod
|
||||
from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
|
||||
from lark_oapi.core.model import BaseRequest
|
||||
from lark_oapi.event.callback.model.p2_card_action_trigger import (
|
||||
CallBackCard,
|
||||
P2CardActionTriggerResponse,
|
||||
@@ -265,8 +234,6 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
|
||||
_PREFERRED_LOCALES = ("zh_cn", "en_us")
|
||||
_MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
|
||||
_MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
|
||||
_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、,。;:!?()[]{}<>\"'`")
|
||||
_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。!?")
|
||||
_WHITESPACE_RE = re.compile(r"\s+")
|
||||
_SUPPORTED_CARD_TEXT_KEYS = (
|
||||
"title",
|
||||
@@ -310,36 +277,12 @@ class FeishuPostMediaRef:
|
||||
resource_type: str = "file"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FeishuMentionRef:
|
||||
name: str = ""
|
||||
open_id: str = ""
|
||||
is_all: bool = False
|
||||
is_self: bool = False
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _FeishuBotIdentity:
|
||||
open_id: str = ""
|
||||
user_id: str = ""
|
||||
name: str = ""
|
||||
|
||||
def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
|
||||
# Precedence: open_id > user_id > name. IDs are authoritative when both
|
||||
# sides have them; the next tier is only considered when either side
|
||||
# lacks the current one.
|
||||
if open_id and self.open_id:
|
||||
return open_id == self.open_id
|
||||
if user_id and self.user_id:
|
||||
return user_id == self.user_id
|
||||
return bool(self.name) and name == self.name
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FeishuPostParseResult:
|
||||
text_content: str
|
||||
image_keys: List[str] = field(default_factory=list)
|
||||
media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
|
||||
mentioned_ids: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -349,14 +292,14 @@ class FeishuNormalizedMessage:
|
||||
preferred_message_type: str = "text"
|
||||
image_keys: List[str] = field(default_factory=list)
|
||||
media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
|
||||
mentions: List[FeishuMentionRef] = field(default_factory=list)
|
||||
mentioned_ids: List[str] = field(default_factory=list)
|
||||
relation_kind: str = "plain"
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class FeishuAdapterSettings:
|
||||
app_id: str # Canonical bot/app identifier (credential, not from event payloads)
|
||||
app_id: str
|
||||
app_secret: str
|
||||
domain_name: str
|
||||
connection_mode: str
|
||||
@@ -364,11 +307,7 @@ class FeishuAdapterSettings:
|
||||
verification_token: str
|
||||
group_policy: str
|
||||
allowed_group_users: frozenset[str]
|
||||
# Bot's own open_id (app-scoped) — returned by /bot/v3/info. Used only for
|
||||
# @mention matching: Feishu puts this value in mentions[].id.open_id when
|
||||
# a user @-mentions the bot in a group chat.
|
||||
bot_open_id: str
|
||||
# Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
|
||||
bot_user_id: str
|
||||
bot_name: str
|
||||
dedup_cache_size: int
|
||||
@@ -566,17 +505,14 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
|
||||
return rows or [[{"tag": "md", "text": content}]]
|
||||
|
||||
|
||||
def parse_feishu_post_payload(
|
||||
payload: Any,
|
||||
*,
|
||||
mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
|
||||
) -> FeishuPostParseResult:
|
||||
def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
|
||||
resolved = _resolve_post_payload(payload)
|
||||
if not resolved:
|
||||
return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)
|
||||
|
||||
image_keys: List[str] = []
|
||||
media_refs: List[FeishuPostMediaRef] = []
|
||||
mentioned_ids: List[str] = []
|
||||
parts: List[str] = []
|
||||
|
||||
title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
|
||||
@@ -587,10 +523,7 @@ def parse_feishu_post_payload(
|
||||
if not isinstance(row, list):
|
||||
continue
|
||||
row_text = _normalize_feishu_text(
|
||||
"".join(
|
||||
_render_post_element(item, image_keys, media_refs, mentions_map)
|
||||
for item in row
|
||||
)
|
||||
"".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
|
||||
)
|
||||
if row_text:
|
||||
parts.append(row_text)
|
||||
@@ -599,6 +532,7 @@ def parse_feishu_post_payload(
|
||||
text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
|
||||
image_keys=image_keys,
|
||||
media_refs=media_refs,
|
||||
mentioned_ids=mentioned_ids,
|
||||
)
|
||||
|
||||
|
||||
@@ -650,7 +584,7 @@ def _render_post_element(
|
||||
element: Any,
|
||||
image_keys: List[str],
|
||||
media_refs: List[FeishuPostMediaRef],
|
||||
mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
|
||||
mentioned_ids: List[str],
|
||||
) -> str:
|
||||
if isinstance(element, str):
|
||||
return element
|
||||
@@ -668,21 +602,19 @@ def _render_post_element(
|
||||
escaped_label = _escape_markdown_text(label)
|
||||
return f"[{escaped_label}]({href})" if href else escaped_label
|
||||
if tag == "at":
|
||||
# Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
|
||||
# the real ref in mentions_map for the display name.
|
||||
placeholder = str(element.get("user_id", "")).strip()
|
||||
if placeholder == "@_all":
|
||||
# Feishu SDK sometimes omits @_all from the top-level mentions
|
||||
# payload; record it here so the caller's mention list stays complete.
|
||||
if mentions_map is not None and "@_all" not in mentions_map:
|
||||
mentions_map["@_all"] = FeishuMentionRef(is_all=True)
|
||||
return "@all"
|
||||
ref = (mentions_map or {}).get(placeholder)
|
||||
if ref is not None:
|
||||
display_name = ref.name or ref.open_id or "user"
|
||||
else:
|
||||
display_name = str(element.get("user_name", "")).strip() or "user"
|
||||
return f"@{_escape_markdown_text(display_name)}"
|
||||
mentioned_id = (
|
||||
str(element.get("open_id", "")).strip()
|
||||
or str(element.get("user_id", "")).strip()
|
||||
)
|
||||
if mentioned_id and mentioned_id not in mentioned_ids:
|
||||
mentioned_ids.append(mentioned_id)
|
||||
display_name = (
|
||||
str(element.get("user_name", "")).strip()
|
||||
or str(element.get("name", "")).strip()
|
||||
or str(element.get("text", "")).strip()
|
||||
or mentioned_id
|
||||
)
|
||||
return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
|
||||
if tag in {"img", "image"}:
|
||||
image_key = str(element.get("image_key", "")).strip()
|
||||
if image_key and image_key not in image_keys:
|
||||
@@ -720,7 +652,8 @@ def _render_post_element(
|
||||
|
||||
nested_parts: List[str] = []
|
||||
for key in ("text", "title", "content", "children", "elements"):
|
||||
extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
|
||||
value = element.get(key)
|
||||
extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
|
||||
if extracted:
|
||||
nested_parts.append(extracted)
|
||||
return " ".join(part for part in nested_parts if part)
|
||||
@@ -730,7 +663,7 @@ def _render_nested_post(
|
||||
value: Any,
|
||||
image_keys: List[str],
|
||||
media_refs: List[FeishuPostMediaRef],
|
||||
mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
|
||||
mentioned_ids: List[str],
|
||||
) -> str:
|
||||
if isinstance(value, str):
|
||||
return _escape_markdown_text(value)
|
||||
@@ -738,17 +671,17 @@ def _render_nested_post(
|
||||
return " ".join(
|
||||
part
|
||||
for item in value
|
||||
for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
|
||||
for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
|
||||
if part
|
||||
)
|
||||
if isinstance(value, dict):
|
||||
direct = _render_post_element(value, image_keys, media_refs, mentions_map)
|
||||
direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
|
||||
if direct:
|
||||
return direct
|
||||
return " ".join(
|
||||
part
|
||||
for item in value.values()
|
||||
for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
|
||||
for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
|
||||
if part
|
||||
)
|
||||
return ""
|
||||
@@ -759,48 +692,31 @@ def _render_nested_post(
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def normalize_feishu_message(
|
||||
*,
|
||||
message_type: str,
|
||||
raw_content: str,
|
||||
mentions: Optional[Sequence[Any]] = None,
|
||||
bot: _FeishuBotIdentity = _FeishuBotIdentity(),
|
||||
) -> FeishuNormalizedMessage:
|
||||
def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
|
||||
normalized_type = str(message_type or "").strip().lower()
|
||||
payload = _load_feishu_payload(raw_content)
|
||||
mentions_map = _build_mentions_map(mentions, bot)
|
||||
|
||||
if normalized_type == "text":
|
||||
text = str(payload.get("text", "") or "")
|
||||
# Feishu SDK sometimes omits @_all from the mentions payload even when
|
||||
# the text literal contains it (confirmed via im.v1.message.get).
|
||||
if "@_all" in text and "@_all" not in mentions_map:
|
||||
mentions_map["@_all"] = FeishuMentionRef(is_all=True)
|
||||
return FeishuNormalizedMessage(
|
||||
raw_type=normalized_type,
|
||||
text_content=_normalize_feishu_text(text, mentions_map),
|
||||
mentions=list(mentions_map.values()),
|
||||
text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
|
||||
)
|
||||
if normalized_type == "post":
|
||||
# The walker writes back to mentions_map if it encounters
|
||||
# <at user_id="@_all">, so reading .values() after parsing is enough.
|
||||
parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
|
||||
parsed_post = parse_feishu_post_payload(payload)
|
||||
return FeishuNormalizedMessage(
|
||||
raw_type=normalized_type,
|
||||
text_content=parsed_post.text_content,
|
||||
image_keys=list(parsed_post.image_keys),
|
||||
media_refs=list(parsed_post.media_refs),
|
||||
mentions=list(mentions_map.values()),
|
||||
mentioned_ids=list(parsed_post.mentioned_ids),
|
||||
relation_kind="post",
|
||||
)
|
||||
mention_refs = list(mentions_map.values())
|
||||
if normalized_type == "image":
|
||||
image_key = str(payload.get("image_key", "") or "").strip()
|
||||
alt_text = _normalize_feishu_text(
|
||||
str(payload.get("text", "") or "")
|
||||
or str(payload.get("alt", "") or "")
|
||||
or FALLBACK_IMAGE_TEXT,
|
||||
mentions_map,
|
||||
or FALLBACK_IMAGE_TEXT
|
||||
)
|
||||
return FeishuNormalizedMessage(
|
||||
raw_type=normalized_type,
|
||||
@@ -808,7 +724,6 @@ def normalize_feishu_message(
|
||||
preferred_message_type="photo",
|
||||
image_keys=[image_key] if image_key else [],
|
||||
relation_kind="image",
|
||||
mentions=mention_refs,
|
||||
)
|
||||
if normalized_type in {"file", "audio", "media"}:
|
||||
media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
|
||||
@@ -820,7 +735,6 @@ def normalize_feishu_message(
|
||||
media_refs=[media_ref] if media_ref.file_key else [],
|
||||
relation_kind=normalized_type,
|
||||
metadata={"placeholder_text": placeholder},
|
||||
mentions=mention_refs,
|
||||
)
|
||||
if normalized_type == "merge_forward":
|
||||
return _normalize_merge_forward_message(payload)
|
||||
@@ -1095,20 +1009,8 @@ def _first_non_empty_text(*values: Any) -> str:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _normalize_feishu_text(
|
||||
text: str,
|
||||
mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
|
||||
) -> str:
|
||||
def _sub(match: "re.Match[str]") -> str:
|
||||
key = match.group(0)
|
||||
ref = (mentions_map or {}).get(key)
|
||||
if ref is None:
|
||||
return " "
|
||||
name = ref.name or ref.open_id or "user"
|
||||
return f"@{name}"
|
||||
|
||||
cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
|
||||
cleaned = cleaned.replace("@_all", "@all")
|
||||
def _normalize_feishu_text(text: str) -> str:
|
||||
cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
|
||||
cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
|
||||
cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
|
||||
cleaned = "\n".join(line for line in cleaned.split("\n") if line)
|
||||
@@ -1127,117 +1029,6 @@ def _unique_lines(lines: List[str]) -> List[str]:
|
||||
return unique
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mention helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _extract_mention_ids(mention: Any) -> tuple[str, str]:
|
||||
# Returns (open_id, user_id). im.v1.message.get hands back id as a string
|
||||
# plus id_type discriminator; event payloads hand back a nested UserId
|
||||
# object carrying both fields.
|
||||
mention_id = getattr(mention, "id", None)
|
||||
if isinstance(mention_id, str):
|
||||
id_type = str(getattr(mention, "id_type", "") or "").lower()
|
||||
if id_type == "open_id":
|
||||
return mention_id, ""
|
||||
if id_type == "user_id":
|
||||
return "", mention_id
|
||||
return "", ""
|
||||
if mention_id is None:
|
||||
return "", ""
|
||||
return (
|
||||
str(getattr(mention_id, "open_id", "") or ""),
|
||||
str(getattr(mention_id, "user_id", "") or ""),
|
||||
)
|
||||
|
||||
|
||||
def _build_mentions_map(
|
||||
mentions: Optional[Sequence[Any]],
|
||||
bot: _FeishuBotIdentity,
|
||||
) -> Dict[str, FeishuMentionRef]:
|
||||
result: Dict[str, FeishuMentionRef] = {}
|
||||
for mention in mentions or []:
|
||||
key = str(getattr(mention, "key", "") or "")
|
||||
if not key:
|
||||
continue
|
||||
if key == "@_all":
|
||||
result[key] = FeishuMentionRef(is_all=True)
|
||||
continue
|
||||
open_id, user_id = _extract_mention_ids(mention)
|
||||
name = str(getattr(mention, "name", "") or "").strip()
|
||||
result[key] = FeishuMentionRef(
|
||||
name=name,
|
||||
open_id=open_id,
|
||||
is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
|
||||
parts: List[str] = []
|
||||
seen: set = set()
|
||||
for ref in mentions:
|
||||
if ref.is_self:
|
||||
continue
|
||||
signature = (ref.is_all, ref.open_id, ref.name)
|
||||
if signature in seen:
|
||||
continue
|
||||
seen.add(signature)
|
||||
if ref.is_all:
|
||||
parts.append("@all")
|
||||
elif ref.open_id:
|
||||
parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
|
||||
else:
|
||||
parts.append(ref.name or "unknown")
|
||||
return f"[Mentioned: {', '.join(parts)}]" if parts else ""
|
||||
|
||||
|
||||
def _strip_edge_self_mentions(
|
||||
text: str,
|
||||
mentions: Sequence[FeishuMentionRef],
|
||||
) -> str:
|
||||
# Leading: strip consecutive self-mentions unconditionally.
|
||||
# Trailing: strip only when followed by whitespace/terminal punct, so
|
||||
# mid-sentence references ("don't @Bot again") stay intact.
|
||||
# Leading word-boundary prevents @Al from eating @Alice.
|
||||
if not text:
|
||||
return text
|
||||
self_names = [
|
||||
f"@{ref.name or ref.open_id or 'user'}"
|
||||
for ref in mentions
|
||||
if ref.is_self
|
||||
]
|
||||
if not self_names:
|
||||
return text
|
||||
|
||||
remaining = text.lstrip()
|
||||
while True:
|
||||
for nm in self_names:
|
||||
if not remaining.startswith(nm):
|
||||
continue
|
||||
after = remaining[len(nm):]
|
||||
if after and after[0] not in _MENTION_BOUNDARY_CHARS:
|
||||
continue
|
||||
remaining = after.lstrip()
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
while True:
|
||||
i = len(remaining)
|
||||
while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
|
||||
i -= 1
|
||||
body = remaining[:i]
|
||||
tail = remaining[i:]
|
||||
for nm in self_names:
|
||||
if body.endswith(nm):
|
||||
remaining = body[: -len(nm)].rstrip() + tail
|
||||
break
|
||||
else:
|
||||
return remaining
|
||||
|
||||
|
||||
def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
|
||||
"""Run the official Lark WS client in its own thread-local event loop."""
|
||||
import lark_oapi.ws.client as ws_client_module
|
||||
@@ -1700,7 +1491,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
content = self.format_message(content)
|
||||
try:
|
||||
msg_type, payload = self._build_outbound_payload(content)
|
||||
body = self._build_update_message_body(msg_type=msg_type, content=payload)
|
||||
@@ -2680,22 +2470,13 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
chat_type: str,
|
||||
message_id: str,
|
||||
) -> None:
|
||||
text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
|
||||
|
||||
if inbound_type == MessageType.TEXT:
|
||||
text = _strip_edge_self_mentions(text, mentions)
|
||||
if text.startswith("/"):
|
||||
inbound_type = MessageType.COMMAND
|
||||
|
||||
# Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
|
||||
text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
|
||||
if inbound_type == MessageType.TEXT and not text and not media_urls:
|
||||
logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
|
||||
logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
|
||||
return
|
||||
|
||||
if inbound_type != MessageType.COMMAND:
|
||||
hint = _build_mention_hint(mentions)
|
||||
if hint:
|
||||
text = f"{hint}\n\n{text}" if text else hint
|
||||
if inbound_type == MessageType.TEXT and text.startswith("/"):
|
||||
inbound_type = MessageType.COMMAND
|
||||
|
||||
reply_to_message_id = (
|
||||
getattr(message, "parent_id", None)
|
||||
@@ -3154,20 +2935,14 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
# Message content extraction and resource download
|
||||
# =========================================================================
|
||||
|
||||
async def _extract_message_content(
|
||||
self, message: Any
|
||||
) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
|
||||
async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
|
||||
"""Extract text and cached media from a normalized Feishu message."""
|
||||
raw_content = getattr(message, "content", "") or ""
|
||||
raw_type = getattr(message, "message_type", "") or ""
|
||||
message_id = str(getattr(message, "message_id", "") or "")
|
||||
logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)
|
||||
|
||||
normalized = normalize_feishu_message(
|
||||
message_type=raw_type,
|
||||
raw_content=raw_content,
|
||||
mentions=getattr(message, "mentions", None),
|
||||
bot=self._bot_identity(),
|
||||
)
|
||||
normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
|
||||
media_urls, media_types = await self._download_feishu_message_resources(
|
||||
message_id=message_id,
|
||||
normalized=normalized,
|
||||
@@ -3184,7 +2959,7 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
if injected:
|
||||
text = injected
|
||||
|
||||
return text, inbound_type, media_urls, media_types, list(normalized.mentions)
|
||||
return text, inbound_type, media_urls, media_types
|
||||
|
||||
async def _download_feishu_message_resources(
|
||||
self,
|
||||
@@ -3448,22 +3223,10 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
return "group"
|
||||
|
||||
async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
|
||||
"""Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
|
||||
|
||||
Preference order for the primary ``user_id`` field:
|
||||
1. user_id (tenant-scoped, most stable — requires permission scope)
|
||||
2. open_id (app-scoped, always available — different per bot app)
|
||||
|
||||
``user_id_alt`` carries the union_id (developer-scoped, stable across
|
||||
all apps by the same developer). Session-key generation prefers
|
||||
user_id_alt when present, so participant isolation stays stable even
|
||||
if the primary ID is the app-scoped open_id.
|
||||
"""
|
||||
open_id = getattr(sender_id, "open_id", None) or None
|
||||
user_id = getattr(sender_id, "user_id", None) or None
|
||||
union_id = getattr(sender_id, "union_id", None) or None
|
||||
# Prefer tenant-scoped user_id; fall back to app-scoped open_id.
|
||||
primary_id = user_id or open_id
|
||||
primary_id = open_id or user_id
|
||||
display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
|
||||
return {
|
||||
"user_id": primary_id,
|
||||
@@ -3545,31 +3308,15 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
body = getattr(parent, "body", None)
|
||||
msg_type = getattr(parent, "msg_type", "") or ""
|
||||
raw_content = getattr(body, "content", "") or ""
|
||||
parent_mentions = getattr(parent, "mentions", None) if parent else None
|
||||
text = self._extract_text_from_raw_content(
|
||||
msg_type=msg_type,
|
||||
raw_content=raw_content,
|
||||
mentions=parent_mentions,
|
||||
)
|
||||
text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
|
||||
self._message_text_cache[message_id] = text
|
||||
return text
|
||||
except Exception:
|
||||
logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
|
||||
return None
|
||||
|
||||
def _extract_text_from_raw_content(
|
||||
self,
|
||||
*,
|
||||
msg_type: str,
|
||||
raw_content: str,
|
||||
mentions: Optional[Sequence[Any]] = None,
|
||||
) -> Optional[str]:
|
||||
normalized = normalize_feishu_message(
|
||||
message_type=msg_type,
|
||||
raw_content=raw_content,
|
||||
mentions=mentions,
|
||||
bot=self._bot_identity(),
|
||||
)
|
||||
def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
|
||||
normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
|
||||
if normalized.text_content:
|
||||
return normalized.text_content
|
||||
placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
|
||||
@@ -3639,10 +3386,10 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
normalized = normalize_feishu_message(
|
||||
message_type=getattr(message, "message_type", "") or "",
|
||||
raw_content=raw_content,
|
||||
mentions=getattr(message, "mentions", None),
|
||||
bot=self._bot_identity(),
|
||||
)
|
||||
return self._post_mentions_bot(normalized.mentions)
|
||||
if normalized.mentioned_ids:
|
||||
return self._post_mentions_bot(normalized.mentioned_ids)
|
||||
return False
|
||||
|
||||
def _is_self_sent_bot_message(self, event: Any) -> bool:
|
||||
"""Return True only for Feishu events emitted by this Hermes bot."""
|
||||
@@ -3662,37 +3409,30 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
def _message_mentions_bot(self, mentions: List[Any]) -> bool:
|
||||
# IDs trump names: when both sides have open_id (or both user_id),
|
||||
# match requires equal IDs. Name fallback only when either side
|
||||
# lacks an ID.
|
||||
"""Check whether any mention targets the configured or inferred bot identity."""
|
||||
for mention in mentions:
|
||||
mention_id = getattr(mention, "id", None)
|
||||
mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
|
||||
mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
|
||||
mention_open_id = getattr(mention_id, "open_id", None)
|
||||
mention_user_id = getattr(mention_id, "user_id", None)
|
||||
mention_name = (getattr(mention, "name", None) or "").strip()
|
||||
|
||||
if mention_open_id and self._bot_open_id:
|
||||
if mention_open_id == self._bot_open_id:
|
||||
return True
|
||||
continue # IDs differ — not the bot; skip name fallback.
|
||||
if mention_user_id and self._bot_user_id:
|
||||
if mention_user_id == self._bot_user_id:
|
||||
return True
|
||||
continue
|
||||
if self._bot_open_id and mention_open_id == self._bot_open_id:
|
||||
return True
|
||||
if self._bot_user_id and mention_user_id == self._bot_user_id:
|
||||
return True
|
||||
if self._bot_name and mention_name == self._bot_name:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
|
||||
return any(m.is_self for m in mentions)
|
||||
|
||||
def _bot_identity(self) -> _FeishuBotIdentity:
|
||||
return _FeishuBotIdentity(
|
||||
open_id=self._bot_open_id,
|
||||
user_id=self._bot_user_id,
|
||||
name=self._bot_name,
|
||||
)
|
||||
def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
|
||||
if not mentioned_ids:
|
||||
return False
|
||||
if self._bot_open_id and self._bot_open_id in mentioned_ids:
|
||||
return True
|
||||
if self._bot_user_id and self._bot_user_id in mentioned_ids:
|
||||
return True
|
||||
return False
|
||||
|
||||
async def _hydrate_bot_identity(self) -> None:
|
||||
"""Best-effort discovery of bot identity for precise group mention gating
|
||||
@@ -3717,15 +3457,14 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
# uses via probe_bot().
|
||||
if not self._bot_open_id or not self._bot_name:
|
||||
try:
|
||||
req = (
|
||||
BaseRequest.builder()
|
||||
.http_method(HttpMethod.GET)
|
||||
.uri("/open-apis/bot/v3/info")
|
||||
.token_types({AccessTokenType.TENANT})
|
||||
.build()
|
||||
resp = await asyncio.to_thread(
|
||||
self._client.request,
|
||||
method="GET",
|
||||
url="/open-apis/bot/v3/info",
|
||||
body=None,
|
||||
raw_response=True,
|
||||
)
|
||||
resp = await asyncio.to_thread(self._client.request, req)
|
||||
content = getattr(getattr(resp, "raw", None), "content", None)
|
||||
content = getattr(resp, "content", None)
|
||||
if content:
|
||||
payload = json.loads(content)
|
||||
parsed = _parse_bot_response(payload) or {}
|
||||
@@ -4473,9 +4212,6 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
|
||||
|
||||
Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
|
||||
Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
|
||||
|
||||
Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
|
||||
that Feishu puts in @mention payloads. It is NOT the app_id.
|
||||
"""
|
||||
if FEISHU_AVAILABLE:
|
||||
return _probe_bot_sdk(app_id, app_secret, domain)
|
||||
@@ -4496,12 +4232,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:
|
||||
|
||||
|
||||
def _parse_bot_response(data: dict) -> Optional[dict]:
|
||||
# /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
|
||||
"""Extract bot_name and bot_open_id from a /bot/v3/info response."""
|
||||
if data.get("code") != 0:
|
||||
return None
|
||||
bot = data.get("bot") or data.get("data", {}).get("bot") or {}
|
||||
return {
|
||||
"bot_name": bot.get("app_name") or bot.get("bot_name"),
|
||||
"bot_name": bot.get("bot_name"),
|
||||
"bot_open_id": bot.get("open_id"),
|
||||
}
|
||||
|
||||
@@ -4510,18 +4246,13 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
|
||||
"""Probe bot info using lark_oapi SDK."""
|
||||
try:
|
||||
client = _build_onboard_client(app_id, app_secret, domain)
|
||||
req = (
|
||||
BaseRequest.builder()
|
||||
.http_method(HttpMethod.GET)
|
||||
.uri("/open-apis/bot/v3/info")
|
||||
.token_types({AccessTokenType.TENANT})
|
||||
.build()
|
||||
resp = client.request(
|
||||
method="GET",
|
||||
url="/open-apis/bot/v3/info",
|
||||
body=None,
|
||||
raw_response=True,
|
||||
)
|
||||
resp = client.request(req)
|
||||
content = getattr(getattr(resp, "raw", None), "content", None)
|
||||
if content is None:
|
||||
return None
|
||||
return _parse_bot_response(json.loads(content))
|
||||
return _parse_bot_response(json.loads(resp.content))
|
||||
except Exception as exc:
|
||||
logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
|
||||
return None
|
||||
|
||||
@@ -532,20 +532,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
await crypto_store.open()
|
||||
|
||||
# Bind the store to the runtime device_id before any
|
||||
# put_account() runs. PgCryptoStore defaults _device_id
|
||||
# to "" and its crypto_account UPSERT never updates the
|
||||
# device_id column on conflict — so once put_account
|
||||
# writes blank, it stays blank forever. That breaks
|
||||
# every downstream device-scoped olm operation: peer
|
||||
# to-device ciphertext can't find our identity key and
|
||||
# no megolm sessions ever land. Setting _device_id here
|
||||
# (in-memory; the on-disk row may not exist yet) makes
|
||||
# the first put_account write the correct value.
|
||||
# DeviceID is a NewType(str) so plain str works at runtime.
|
||||
if client.device_id:
|
||||
await crypto_store.put_device_id(client.device_id)
|
||||
|
||||
crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
|
||||
olm = OlmMachine(client, crypto_store, crypto_state)
|
||||
|
||||
|
||||
@@ -26,8 +26,9 @@ from .adapter import ( # noqa: F401
|
||||
# -- Onboard (QR-code scan-to-configure) -----------------------------------
|
||||
from .onboard import ( # noqa: F401
|
||||
BindStatus,
|
||||
create_bind_task,
|
||||
poll_bind_result,
|
||||
build_connect_url,
|
||||
qr_register,
|
||||
)
|
||||
from .crypto import decrypt_secret, generate_bind_key # noqa: F401
|
||||
|
||||
@@ -43,8 +44,9 @@ __all__ = [
|
||||
"_ssrf_redirect_guard",
|
||||
# onboard
|
||||
"BindStatus",
|
||||
"create_bind_task",
|
||||
"poll_bind_result",
|
||||
"build_connect_url",
|
||||
"qr_register",
|
||||
# crypto
|
||||
"decrypt_secret",
|
||||
"generate_bind_key",
|
||||
|
||||
@@ -535,9 +535,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
quick_disconnect_count = 0
|
||||
else:
|
||||
backoff_idx += 1
|
||||
if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
|
||||
logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
|
||||
return
|
||||
|
||||
except Exception as exc:
|
||||
if not self._running:
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
"""
|
||||
QQBot scan-to-configure (QR code onboard) module.
|
||||
|
||||
Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
|
||||
entry-point ``qr_register()`` that handles the full flow (create task →
|
||||
display QR code → poll → decrypt credentials).
|
||||
|
||||
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
|
||||
generate a QR-code URL and poll for scan completion. On success the caller
|
||||
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
|
||||
@@ -16,20 +12,18 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from enum import IntEnum
|
||||
from typing import Optional, Tuple
|
||||
from typing import Tuple
|
||||
from urllib.parse import quote
|
||||
|
||||
from .constants import (
|
||||
ONBOARD_API_TIMEOUT,
|
||||
ONBOARD_CREATE_PATH,
|
||||
ONBOARD_POLL_INTERVAL,
|
||||
ONBOARD_POLL_PATH,
|
||||
PORTAL_HOST,
|
||||
QR_URL_TEMPLATE,
|
||||
)
|
||||
from .crypto import decrypt_secret, generate_bind_key
|
||||
from .crypto import generate_bind_key
|
||||
from .utils import get_api_headers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -41,7 +35,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BindStatus(IntEnum):
|
||||
"""Status codes returned by ``_poll_bind_result``."""
|
||||
"""Status codes returned by ``poll_bind_result``."""
|
||||
|
||||
NONE = 0
|
||||
PENDING = 1
|
||||
@@ -50,40 +44,18 @@ class BindStatus(IntEnum):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# QR rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
try:
|
||||
import qrcode as _qrcode_mod
|
||||
except (ImportError, TypeError):
|
||||
_qrcode_mod = None # type: ignore[assignment]
|
||||
|
||||
|
||||
def _render_qr(url: str) -> bool:
|
||||
"""Try to render a QR code in the terminal. Returns True if successful."""
|
||||
if _qrcode_mod is None:
|
||||
return False
|
||||
try:
|
||||
qr = _qrcode_mod.QRCode(
|
||||
error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
|
||||
border=2,
|
||||
)
|
||||
qr.add_data(url)
|
||||
qr.make(fit=True)
|
||||
qr.print_ascii(invert=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
|
||||
async def create_bind_task(
|
||||
timeout: float = ONBOARD_API_TIMEOUT,
|
||||
) -> Tuple[str, str]:
|
||||
"""Create a bind task and return *(task_id, aes_key_base64)*.
|
||||
|
||||
The AES key is generated locally and sent to the server so it can
|
||||
encrypt the bot credentials before returning them.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the API returns a non-zero ``retcode``.
|
||||
"""
|
||||
@@ -92,8 +64,8 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
|
||||
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
|
||||
key = generate_bind_key()
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = client.post(url, json={"key": key}, headers=get_api_headers())
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = await client.post(url, json={"key": key}, headers=get_api_headers())
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -108,7 +80,7 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
|
||||
return task_id, key
|
||||
|
||||
|
||||
def _poll_bind_result(
|
||||
async def poll_bind_result(
|
||||
task_id: str,
|
||||
timeout: float = ONBOARD_API_TIMEOUT,
|
||||
) -> Tuple[BindStatus, str, str, str]:
|
||||
@@ -117,6 +89,12 @@ def _poll_bind_result(
|
||||
Returns:
|
||||
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
|
||||
|
||||
* ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
|
||||
:func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
|
||||
key from :func:`create_bind_task`.
|
||||
* ``user_openid`` is the OpenID of the person who scanned the code
|
||||
(available when ``status == COMPLETED``).
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the API returns a non-zero ``retcode``.
|
||||
"""
|
||||
@@ -124,8 +102,8 @@ def _poll_bind_result(
|
||||
|
||||
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -144,77 +122,3 @@ def _poll_bind_result(
|
||||
def build_connect_url(task_id: str) -> str:
|
||||
"""Build the QR-code target URL for a given *task_id*."""
|
||||
return QR_URL_TEMPLATE.format(task_id=quote(task_id))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entry-point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MAX_REFRESHES = 3
|
||||
|
||||
|
||||
def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
|
||||
"""Run the QQBot scan-to-configure QR registration flow.
|
||||
|
||||
Mirrors ``feishu.qr_register()``: handles create → display → poll →
|
||||
decrypt in one call. Unexpected errors propagate to the caller.
|
||||
|
||||
:returns:
|
||||
``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
|
||||
success, or ``None`` on failure / expiry / cancellation.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
|
||||
for refresh_count in range(_MAX_REFRESHES + 1):
|
||||
# ── Create bind task ──
|
||||
try:
|
||||
task_id, aes_key = _create_bind_task()
|
||||
except Exception as exc:
|
||||
logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
|
||||
return None
|
||||
|
||||
url = build_connect_url(task_id)
|
||||
|
||||
# ── Display QR code + URL ──
|
||||
print()
|
||||
if _render_qr(url):
|
||||
print(f" Scan the QR code above, or open this URL directly:\n {url}")
|
||||
else:
|
||||
print(f" Open this URL in QQ on your phone:\n {url}")
|
||||
print(" Tip: pip install qrcode to display a scannable QR code here")
|
||||
print()
|
||||
|
||||
# ── Poll loop ──
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
|
||||
except Exception:
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
if status == BindStatus.COMPLETED:
|
||||
client_secret = decrypt_secret(encrypted_secret, aes_key)
|
||||
print()
|
||||
print(f" QR scan complete! (App ID: {app_id})")
|
||||
if user_openid:
|
||||
print(f" Scanner's OpenID: {user_openid}")
|
||||
return {
|
||||
"app_id": app_id,
|
||||
"client_secret": client_secret,
|
||||
"user_openid": user_openid,
|
||||
}
|
||||
|
||||
if status == BindStatus.EXPIRED:
|
||||
if refresh_count >= _MAX_REFRESHES:
|
||||
logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
|
||||
return None
|
||||
print(f"\n QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
|
||||
break # next for-loop iteration creates a new task
|
||||
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
else:
|
||||
# deadline reached without completing
|
||||
logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
@@ -38,7 +38,6 @@ from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
safe_url_for_log,
|
||||
@@ -114,11 +113,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
|
||||
self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
|
||||
self._THREAD_CACHE_TTL = 60.0
|
||||
# Track message IDs that should get reaction lifecycle (DMs / @mentions).
|
||||
self._reacting_message_ids: set = set()
|
||||
# Track active assistant thread status indicators so stop_typing can
|
||||
# clear them (chat_id → thread_ts).
|
||||
self._active_status_threads: Dict[str, str] = {}
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
@@ -368,7 +362,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
if not thread_ts:
|
||||
return # Can only set status in a thread context
|
||||
|
||||
self._active_status_threads[chat_id] = thread_ts
|
||||
try:
|
||||
await self._get_client(chat_id).assistant_threads_setStatus(
|
||||
channel_id=chat_id,
|
||||
@@ -380,22 +373,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# in an assistant-enabled context. Falls back to reactions.
|
||||
logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
|
||||
|
||||
async def stop_typing(self, chat_id: str) -> None:
|
||||
"""Clear the assistant thread status indicator."""
|
||||
if not self._app:
|
||||
return
|
||||
thread_ts = self._active_status_threads.pop(chat_id, None)
|
||||
if not thread_ts:
|
||||
return
|
||||
try:
|
||||
await self._get_client(chat_id).assistant_threads_setStatus(
|
||||
channel_id=chat_id,
|
||||
thread_ts=thread_ts,
|
||||
status="",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
|
||||
|
||||
def _dm_top_level_threads_as_sessions(self) -> bool:
|
||||
"""Whether top-level Slack DMs get per-message session threads.
|
||||
|
||||
@@ -607,38 +584,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
|
||||
return False
|
||||
|
||||
def _reactions_enabled(self) -> bool:
|
||||
"""Check if message reactions are enabled via config/env."""
|
||||
return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
|
||||
|
||||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""Add an in-progress reaction when message processing begins."""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
ts = getattr(event, "message_id", None)
|
||||
if not ts or ts not in self._reacting_message_ids:
|
||||
return
|
||||
channel_id = getattr(event.source, "chat_id", None)
|
||||
if channel_id:
|
||||
await self._add_reaction(channel_id, ts, "eyes")
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
|
||||
"""Swap the in-progress reaction for a final success/failure reaction."""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
ts = getattr(event, "message_id", None)
|
||||
if not ts or ts not in self._reacting_message_ids:
|
||||
return
|
||||
self._reacting_message_ids.discard(ts)
|
||||
channel_id = getattr(event.source, "chat_id", None)
|
||||
if not channel_id:
|
||||
return
|
||||
await self._remove_reaction(channel_id, ts, "eyes")
|
||||
if outcome == ProcessingOutcome.SUCCESS:
|
||||
await self._add_reaction(channel_id, ts, "white_check_mark")
|
||||
elif outcome == ProcessingOutcome.FAILURE:
|
||||
await self._add_reaction(channel_id, ts, "x")
|
||||
|
||||
# ----- User identity resolution -----
|
||||
|
||||
async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
|
||||
@@ -1268,12 +1213,17 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# Only react when bot is directly addressed (DM or @mention).
|
||||
# In listen-all channels (require_mention=false), reacting to every
|
||||
# casual message would be noisy.
|
||||
_should_react = (is_dm or is_mentioned) and self._reactions_enabled()
|
||||
_should_react = is_dm or is_mentioned
|
||||
|
||||
if _should_react:
|
||||
self._reacting_message_ids.add(ts)
|
||||
await self._add_reaction(channel_id, ts, "eyes")
|
||||
|
||||
await self.handle_message(msg_event)
|
||||
|
||||
if _should_react:
|
||||
await self._remove_reaction(channel_id, ts, "eyes")
|
||||
await self._add_reaction(channel_id, ts, "white_check_mark")
|
||||
|
||||
# ----- Approval button support (Block Kit) -----
|
||||
|
||||
async def send_exec_approval(
|
||||
|
||||
@@ -703,6 +703,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
|
||||
}
|
||||
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
|
||||
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
|
||||
fallback_ips = self._fallback_ips()
|
||||
if not fallback_ips:
|
||||
@@ -713,8 +714,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
|
||||
proxy_targets = ["api.telegram.org", *fallback_ips]
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
|
||||
if fallback_ips and not proxy_url and not disable_fallback:
|
||||
logger.info(
|
||||
"[%s] Telegram fallback IPs active: %s",
|
||||
@@ -795,28 +794,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
# Telegram pushes updates to our HTTP endpoint. This
|
||||
# enables cloud platforms (Fly.io, Railway) to auto-wake
|
||||
# suspended machines on inbound HTTP traffic.
|
||||
#
|
||||
# SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
|
||||
# python-telegram-bot passes secret_token=None and the
|
||||
# webhook endpoint accepts any HTTP POST — attackers can
|
||||
# inject forged updates as if from Telegram. Refuse to
|
||||
# start rather than silently run in fail-open mode.
|
||||
# See GHSA-3vpc-7q5r-276h.
|
||||
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
|
||||
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
|
||||
if not webhook_secret:
|
||||
raise RuntimeError(
|
||||
"TELEGRAM_WEBHOOK_SECRET is required when "
|
||||
"TELEGRAM_WEBHOOK_URL is set. Without it, the "
|
||||
"webhook endpoint accepts forged updates from "
|
||||
"anyone who can reach it — see "
|
||||
"https://github.com/NousResearch/hermes-agent/"
|
||||
"security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
|
||||
"Generate a secret and set it in your .env:\n"
|
||||
" export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
|
||||
"Then register it with Telegram when setting the "
|
||||
"webhook via setWebhook's secret_token parameter."
|
||||
)
|
||||
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
|
||||
from urllib.parse import urlparse
|
||||
webhook_path = urlparse(webhook_url).path or "/telegram"
|
||||
|
||||
|
||||
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
|
||||
_SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
|
||||
|
||||
|
||||
def _resolve_proxy_url(target_hosts=None) -> str | None:
|
||||
def _resolve_proxy_url() -> str | None:
|
||||
# Delegate to shared implementation (env vars + macOS system proxy detection)
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)
|
||||
return resolve_proxy_url("TELEGRAM_PROXY")
|
||||
|
||||
|
||||
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
|
||||
def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
|
||||
self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
|
||||
proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
|
||||
proxy_url = _resolve_proxy_url()
|
||||
if proxy_url and "proxy" not in transport_kwargs:
|
||||
transport_kwargs["proxy"] = proxy_url
|
||||
self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
|
||||
|
||||
@@ -508,11 +508,6 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
self._remember_chat_req_id(chat_id, self._payload_req_id(payload))
|
||||
|
||||
text, reply_text = self._extract_text(body)
|
||||
# Strip leading @mention in group chats so slash commands like
|
||||
# "@BotName /approve" are correctly recognized as "/approve".
|
||||
# Mirrors what the Telegram adapter does (re.sub @botname).
|
||||
if is_group and text:
|
||||
text = re.sub(r"^@\S+\s*", "", text).strip()
|
||||
media_urls, media_types = await self._extract_media(body)
|
||||
message_type = self._derive_message_type(body, text, media_types)
|
||||
has_reply_context = bool(reply_text and (text or media_urls))
|
||||
@@ -1469,134 +1464,3 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
"name": chat_id,
|
||||
"type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# QR code scan flow for obtaining bot credentials
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
|
||||
_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
|
||||
_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
|
||||
_QR_POLL_INTERVAL = 3 # seconds
|
||||
_QR_POLL_TIMEOUT = 300 # 5 minutes
|
||||
|
||||
|
||||
def qr_scan_for_bot_info(
|
||||
*,
|
||||
timeout_seconds: int = _QR_POLL_TIMEOUT,
|
||||
) -> Optional[Dict[str, str]]:
|
||||
"""Run the WeCom QR scan flow to obtain bot_id and secret.
|
||||
|
||||
Fetches a QR code from WeCom, renders it in the terminal, and polls
|
||||
until the user scans it or the timeout expires.
|
||||
|
||||
Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
|
||||
failure or timeout.
|
||||
|
||||
Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
|
||||
used here are not part of WeCom's public developer API — they back the
|
||||
admin-console web UI's bot-creation flow and may change without notice.
|
||||
The same pattern is used by the feishu/dingtalk QR setup wizards.
|
||||
"""
|
||||
try:
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
except ImportError: # pragma: no cover
|
||||
logger.error("urllib is required for WeCom QR scan")
|
||||
return None
|
||||
|
||||
generate_url = f"{_QR_GENERATE_URL}?source=hermes"
|
||||
|
||||
# ── Step 1: Fetch QR code ──
|
||||
print(" Connecting to WeCom...", end="", flush=True)
|
||||
try:
|
||||
req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
raw = json.loads(resp.read().decode("utf-8"))
|
||||
except Exception as exc:
|
||||
logger.error("WeCom QR: failed to fetch QR code: %s", exc)
|
||||
print(f" failed: {exc}")
|
||||
return None
|
||||
|
||||
data = raw.get("data") or {}
|
||||
scode = str(data.get("scode") or "").strip()
|
||||
auth_url = str(data.get("auth_url") or "").strip()
|
||||
|
||||
if not scode or not auth_url:
|
||||
logger.error("WeCom QR: unexpected response format: %s", raw)
|
||||
print(" failed: unexpected response format")
|
||||
return None
|
||||
|
||||
print(" done.")
|
||||
|
||||
# ── Step 2: Render QR code in terminal ──
|
||||
print()
|
||||
qr_rendered = False
|
||||
try:
|
||||
import qrcode as _qrcode
|
||||
qr = _qrcode.QRCode()
|
||||
qr.add_data(auth_url)
|
||||
qr.make(fit=True)
|
||||
qr.print_ascii(invert=True)
|
||||
qr_rendered = True
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
|
||||
if qr_rendered:
|
||||
print(f"\n Scan the QR code above, or open this URL directly:\n {page_url}")
|
||||
else:
|
||||
print(f" Open this URL in WeCom on your phone:\n\n {page_url}\n")
|
||||
print(" Tip: pip install qrcode to display a scannable QR code here next time")
|
||||
print()
|
||||
print(" Fetching configuration results...", end="", flush=True)
|
||||
|
||||
# ── Step 3: Poll for result ──
|
||||
import time
|
||||
deadline = time.time() + timeout_seconds
|
||||
query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
|
||||
poll_count = 0
|
||||
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
result = json.loads(resp.read().decode("utf-8"))
|
||||
except Exception as exc:
|
||||
logger.debug("WeCom QR poll error: %s", exc)
|
||||
time.sleep(_QR_POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
poll_count += 1
|
||||
# Print a dot on every poll so progress is visible within 3s.
|
||||
print(".", end="", flush=True)
|
||||
|
||||
result_data = result.get("data") or {}
|
||||
status = str(result_data.get("status") or "").lower()
|
||||
|
||||
if status == "success":
|
||||
print() # newline after "Fetching configuration results..." dots
|
||||
bot_info = result_data.get("bot_info") or {}
|
||||
bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
|
||||
secret = str(bot_info.get("secret") or "").strip()
|
||||
if bot_id and secret:
|
||||
return {"bot_id": bot_id, "secret": secret}
|
||||
logger.warning(
|
||||
"WeCom QR: scan reported success but bot_info missing or incomplete: %s",
|
||||
result_data,
|
||||
)
|
||||
print(
|
||||
" QR scan reported success but no bot credentials were returned.\n"
|
||||
" This usually means the bot was not actually created on the WeCom side.\n"
|
||||
" Falling back to manual credential entry."
|
||||
)
|
||||
return None
|
||||
|
||||
time.sleep(_QR_POLL_INTERVAL)
|
||||
|
||||
print() # newline after dots
|
||||
print(f" QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
|
||||
return None
|
||||
|
||||
+624
-1096
File diff suppressed because it is too large
Load Diff
+17
-105
@@ -60,10 +60,6 @@ from .config import (
|
||||
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
||||
HomeChannel,
|
||||
)
|
||||
from .whatsapp_identity import (
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -84,12 +80,9 @@ class SessionSource:
|
||||
user_name: Optional[str] = None
|
||||
thread_id: Optional[str] = None # For forum topics, Discord threads, etc.
|
||||
chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack)
|
||||
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
|
||||
user_id_alt: Optional[str] = None # Signal UUID (alternative to phone number)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
|
||||
guild_id: Optional[str] = None # Discord guild / Slack workspace / Matrix server scope
|
||||
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
|
||||
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -127,14 +120,8 @@ class SessionSource:
|
||||
d["user_id_alt"] = self.user_id_alt
|
||||
if self.chat_id_alt:
|
||||
d["chat_id_alt"] = self.chat_id_alt
|
||||
if self.guild_id:
|
||||
d["guild_id"] = self.guild_id
|
||||
if self.parent_chat_id:
|
||||
d["parent_chat_id"] = self.parent_chat_id
|
||||
if self.message_id:
|
||||
d["message_id"] = self.message_id
|
||||
return d
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
|
||||
return cls(
|
||||
@@ -148,9 +135,6 @@ class SessionSource:
|
||||
chat_topic=data.get("chat_topic"),
|
||||
user_id_alt=data.get("user_id_alt"),
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
guild_id=data.get("guild_id"),
|
||||
parent_chat_id=data.get("parent_chat_id"),
|
||||
message_id=data.get("message_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -202,31 +186,6 @@ that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
|
||||
and the LLM needs the real ID to tag users."""
|
||||
|
||||
|
||||
def _discord_tools_loaded() -> bool:
|
||||
"""True iff the agent will actually have Discord tools this session.
|
||||
|
||||
Two conditions must hold:
|
||||
1. The `discord` or `discord_admin` toolset is enabled for the
|
||||
Discord platform via `hermes tools` (opt-in, default OFF).
|
||||
2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
|
||||
at registry time, so the toolset being enabled in config is not
|
||||
enough if the token isn't configured.
|
||||
|
||||
Returns False (safe default — keeps the stale-API disclaimer) on any
|
||||
error so a bad config can't silently promise tools the agent lacks.
|
||||
"""
|
||||
if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
cfg = load_config()
|
||||
enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
|
||||
return "discord" in enabled or "discord_admin" in enabled
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def build_session_context_prompt(
|
||||
context: SessionContext,
|
||||
*,
|
||||
@@ -314,44 +273,13 @@ def build_session_context_prompt(
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.DISCORD:
|
||||
# Inject the Discord IDs block only when the agent actually has
|
||||
# Discord tools loaded this session — i.e. the user opted into
|
||||
# `discord` / `discord_admin` via `hermes tools` AND the bot
|
||||
# token is configured. Otherwise keep the stale-API disclaimer
|
||||
# honest so we never promise tools the agent lacks.
|
||||
if _discord_tools_loaded():
|
||||
src = context.source
|
||||
id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
|
||||
if src.guild_id:
|
||||
id_lines.append(f" - Guild: `{src.guild_id}`")
|
||||
if src.thread_id and src.parent_chat_id:
|
||||
id_lines.append(f" - Parent channel: `{src.parent_chat_id}`")
|
||||
id_lines.append(f" - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
|
||||
else:
|
||||
id_lines.append(f" - Channel: `{src.chat_id}`")
|
||||
if src.message_id:
|
||||
id_lines.append(f" - Triggering message: `{src.message_id}`")
|
||||
lines.extend(id_lines)
|
||||
else:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.BLUEBUBBLES:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are responding via iMessage. "
|
||||
"Keep responses short and conversational — think texts, not essays. "
|
||||
"Structure longer replies as separate short thoughts, each separated "
|
||||
"by a blank line (double newline). Each block between blank lines "
|
||||
"will be delivered as its own iMessage bubble, so write accordingly: "
|
||||
"one idea per bubble, 1–3 sentences each. "
|
||||
"If the user needs a detailed answer, give the short version first "
|
||||
"and offer to elaborate."
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
@@ -439,11 +367,11 @@ class SessionEntry:
|
||||
auto_reset_reason: Optional[str] = None # "idle" or "daily"
|
||||
reset_had_activity: bool = False # whether the expired session had any messages
|
||||
|
||||
# Set by the background expiry watcher after it finalizes an expired
|
||||
# session (invoking on_session_finalize hooks and evicting the cached
|
||||
# agent). Persisted to sessions.json so the flag survives gateway
|
||||
# restarts — prevents redundant finalization runs.
|
||||
expiry_finalized: bool = False
|
||||
# Set by the background expiry watcher after it successfully flushes
|
||||
# memories for this session. Persisted to sessions.json so the flag
|
||||
# survives gateway restarts (the old in-memory _pre_flushed_sessions
|
||||
# set was lost on restart, causing redundant re-flushes).
|
||||
memory_flushed: bool = False
|
||||
|
||||
# When True the next call to get_or_create_session() will auto-reset
|
||||
# this session (create a new session_id) so the user starts fresh.
|
||||
@@ -479,7 +407,7 @@ class SessionEntry:
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"estimated_cost_usd": self.estimated_cost_usd,
|
||||
"cost_status": self.cost_status,
|
||||
"expiry_finalized": self.expiry_finalized,
|
||||
"memory_flushed": self.memory_flushed,
|
||||
"suspended": self.suspended,
|
||||
"resume_pending": self.resume_pending,
|
||||
"resume_reason": self.resume_reason,
|
||||
@@ -531,7 +459,7 @@ class SessionEntry:
|
||||
last_prompt_tokens=data.get("last_prompt_tokens", 0),
|
||||
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
||||
cost_status=data.get("cost_status", "unknown"),
|
||||
expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
|
||||
memory_flushed=data.get("memory_flushed", False),
|
||||
suspended=data.get("suspended", False),
|
||||
resume_pending=data.get("resume_pending", False),
|
||||
resume_reason=data.get("resume_reason"),
|
||||
@@ -590,24 +518,15 @@ def build_session_key(
|
||||
"""
|
||||
platform = source.platform.value
|
||||
if source.chat_type == "dm":
|
||||
dm_chat_id = source.chat_id
|
||||
if source.platform == Platform.WHATSAPP:
|
||||
dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
|
||||
|
||||
if dm_chat_id:
|
||||
if source.chat_id:
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}"
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm"
|
||||
|
||||
participant_id = source.user_id_alt or source.user_id
|
||||
if participant_id and source.platform == Platform.WHATSAPP:
|
||||
# Same JID/LID-flip bug as the DM case: without canonicalisation, a
|
||||
# single group member gets two isolated per-user sessions when the
|
||||
# bridge reshuffles alias forms.
|
||||
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
|
||||
key_parts = ["agent:main", platform, source.chat_type]
|
||||
|
||||
if source.chat_id:
|
||||
@@ -1228,11 +1147,6 @@ class SessionStore:
|
||||
tool_name=message.get("tool_name"),
|
||||
tool_calls=message.get("tool_calls"),
|
||||
tool_call_id=message.get("tool_call_id"),
|
||||
reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
|
||||
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
|
||||
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
|
||||
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
|
||||
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
@@ -1262,10 +1176,8 @@ class SessionStore:
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
reasoning=msg.get("reasoning") if role == "assistant" else None,
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
+30
-208
@@ -22,18 +22,11 @@ from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Optional
|
||||
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
else:
|
||||
import fcntl
|
||||
|
||||
_GATEWAY_KIND = "hermes-gateway"
|
||||
_RUNTIME_STATUS_FILE = "gateway_state.json"
|
||||
_LOCKS_DIRNAME = "gateway-locks"
|
||||
_IS_WINDOWS = sys.platform == "win32"
|
||||
_UNSET = object()
|
||||
_GATEWAY_LOCK_FILENAME = "gateway.lock"
|
||||
_gateway_lock_handle = None
|
||||
|
||||
|
||||
def _get_pid_path() -> Path:
|
||||
@@ -42,14 +35,6 @@ def _get_pid_path() -> Path:
|
||||
return home / "gateway.pid"
|
||||
|
||||
|
||||
def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
|
||||
"""Return the path to the runtime gateway lock file."""
|
||||
if pid_path is not None:
|
||||
return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
|
||||
home = get_hermes_home()
|
||||
return home / _GATEWAY_LOCK_FILENAME
|
||||
|
||||
|
||||
def _get_runtime_status_path() -> Path:
|
||||
"""Return the persisted runtime health/status file path."""
|
||||
return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
|
||||
@@ -113,11 +98,6 @@ def _get_process_start_time(pid: int) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def get_process_start_time(pid: int) -> Optional[int]:
|
||||
"""Public wrapper for retrieving a process start time when available."""
|
||||
return _get_process_start_time(pid)
|
||||
|
||||
|
||||
def _read_process_cmdline(pid: int) -> Optional[str]:
|
||||
"""Return the process command line as a space-separated string."""
|
||||
cmdline_path = Path(f"/proc/{pid}/cmdline")
|
||||
@@ -141,7 +121,6 @@ def _looks_like_gateway_process(pid: int) -> bool:
|
||||
"hermes_cli.main gateway",
|
||||
"hermes_cli/main.py gateway",
|
||||
"hermes gateway",
|
||||
"hermes-gateway",
|
||||
"gateway/run.py",
|
||||
)
|
||||
return any(pattern in cmdline for pattern in patterns)
|
||||
@@ -233,135 +212,16 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
|
||||
return None
|
||||
|
||||
|
||||
def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
|
||||
return _read_pid_record(lock_path or _get_gateway_lock_path())
|
||||
|
||||
|
||||
def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
|
||||
if not record:
|
||||
return None
|
||||
try:
|
||||
return int(record["pid"])
|
||||
except (KeyError, TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
|
||||
"""Delete a stale gateway PID file (and its sibling lock metadata).
|
||||
|
||||
Called from ``get_running_pid()`` after the runtime lock has already been
|
||||
confirmed inactive, so the on-disk metadata is known to belong to a dead
|
||||
process. Unlike ``remove_pid_file()`` (which defensively refuses to delete
|
||||
a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
|
||||
``--replace`` handoffs), this path force-unlinks both files so the next
|
||||
startup sees a clean slate.
|
||||
"""
|
||||
if not cleanup_stale:
|
||||
return
|
||||
try:
|
||||
pid_path.unlink(missing_ok=True)
|
||||
if pid_path == _get_pid_path():
|
||||
remove_pid_file()
|
||||
else:
|
||||
pid_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
_get_gateway_lock_path(pid_path).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _write_gateway_lock_record(handle) -> None:
|
||||
handle.seek(0)
|
||||
handle.truncate()
|
||||
json.dump(_build_pid_record(), handle)
|
||||
handle.flush()
|
||||
try:
|
||||
os.fsync(handle.fileno())
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _try_acquire_file_lock(handle) -> bool:
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
handle.seek(0, os.SEEK_END)
|
||||
if handle.tell() == 0:
|
||||
handle.write("\n")
|
||||
handle.flush()
|
||||
handle.seek(0)
|
||||
msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
else:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
return True
|
||||
except (BlockingIOError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
def _release_file_lock(handle) -> None:
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
handle.seek(0)
|
||||
msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def acquire_gateway_runtime_lock() -> bool:
|
||||
"""Claim the cross-process runtime lock for the gateway.
|
||||
|
||||
Unlike the PID file, the lock is owned by the live process itself. If the
|
||||
process dies abruptly, the OS releases the lock automatically.
|
||||
"""
|
||||
global _gateway_lock_handle
|
||||
if _gateway_lock_handle is not None:
|
||||
return True
|
||||
|
||||
path = _get_gateway_lock_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
handle = open(path, "a+", encoding="utf-8")
|
||||
if not _try_acquire_file_lock(handle):
|
||||
handle.close()
|
||||
return False
|
||||
_write_gateway_lock_record(handle)
|
||||
_gateway_lock_handle = handle
|
||||
return True
|
||||
|
||||
|
||||
def release_gateway_runtime_lock() -> None:
|
||||
"""Release the gateway runtime lock when owned by this process."""
|
||||
global _gateway_lock_handle
|
||||
handle = _gateway_lock_handle
|
||||
if handle is None:
|
||||
return
|
||||
_gateway_lock_handle = None
|
||||
_release_file_lock(handle)
|
||||
try:
|
||||
handle.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
|
||||
"""Return True when some process currently owns the gateway runtime lock."""
|
||||
global _gateway_lock_handle
|
||||
resolved_lock_path = lock_path or _get_gateway_lock_path()
|
||||
if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
|
||||
return True
|
||||
|
||||
if not resolved_lock_path.exists():
|
||||
return False
|
||||
|
||||
handle = open(resolved_lock_path, "a+", encoding="utf-8")
|
||||
try:
|
||||
if _try_acquire_file_lock(handle):
|
||||
_release_file_lock(handle)
|
||||
return False
|
||||
return True
|
||||
finally:
|
||||
try:
|
||||
handle.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def write_pid_file() -> None:
|
||||
@@ -501,8 +361,7 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
|
||||
if not stale:
|
||||
try:
|
||||
os.kill(existing_pid, 0)
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
# Windows raises OSError with WinError 87 for invalid pid check
|
||||
except (ProcessLookupError, PermissionError):
|
||||
stale = True
|
||||
else:
|
||||
current_start = _get_process_start_time(existing_pid)
|
||||
@@ -567,43 +426,17 @@ def release_scoped_lock(scope: str, identity: str) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def release_all_scoped_locks(
|
||||
*,
|
||||
owner_pid: Optional[int] = None,
|
||||
owner_start_time: Optional[int] = None,
|
||||
) -> int:
|
||||
"""Remove scoped lock files in the lock directory.
|
||||
def release_all_scoped_locks() -> int:
|
||||
"""Remove all scoped lock files in the lock directory.
|
||||
|
||||
Called during --replace to clean up stale locks left by stopped/killed
|
||||
gateway processes that did not release their locks gracefully. When an
|
||||
``owner_pid`` is provided, only lock records belonging to that gateway
|
||||
process are removed. ``owner_start_time`` further narrows the match to
|
||||
protect against PID reuse.
|
||||
|
||||
When no owner is provided, preserves the legacy behavior and removes every
|
||||
scoped lock file in the directory.
|
||||
|
||||
gateway processes that did not release their locks gracefully.
|
||||
Returns the number of lock files removed.
|
||||
"""
|
||||
lock_dir = _get_lock_dir()
|
||||
removed = 0
|
||||
if lock_dir.exists():
|
||||
for lock_file in lock_dir.glob("*.lock"):
|
||||
if owner_pid is not None:
|
||||
record = _read_json_file(lock_file)
|
||||
if not isinstance(record, dict):
|
||||
continue
|
||||
try:
|
||||
record_pid = int(record.get("pid"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if record_pid != owner_pid:
|
||||
continue
|
||||
if (
|
||||
owner_start_time is not None
|
||||
and record.get("start_time") != owner_start_time
|
||||
):
|
||||
continue
|
||||
try:
|
||||
lock_file.unlink(missing_ok=True)
|
||||
removed += 1
|
||||
@@ -750,46 +583,35 @@ def get_running_pid(
|
||||
Cleans up stale PID files automatically.
|
||||
"""
|
||||
resolved_pid_path = pid_path or _get_pid_path()
|
||||
resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
|
||||
lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
|
||||
if not lock_active:
|
||||
record = _read_pid_record(resolved_pid_path)
|
||||
if not record:
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
primary_record = _read_pid_record(resolved_pid_path)
|
||||
fallback_record = _read_gateway_lock_record(resolved_lock_path)
|
||||
try:
|
||||
pid = int(record["pid"])
|
||||
except (KeyError, TypeError, ValueError):
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
for record in (primary_record, fallback_record):
|
||||
pid = _pid_from_record(record)
|
||||
if pid is None:
|
||||
continue
|
||||
try:
|
||||
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
|
||||
except (ProcessLookupError, PermissionError):
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
try:
|
||||
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
|
||||
except ProcessLookupError:
|
||||
continue
|
||||
except PermissionError:
|
||||
# The process exists but belongs to another user/service scope.
|
||||
# With the runtime lock still held, prefer keeping it visible
|
||||
# rather than deleting the PID file as "stale".
|
||||
if _record_looks_like_gateway(record):
|
||||
return pid
|
||||
continue
|
||||
except OSError:
|
||||
# Windows raises OSError with WinError 87 for an invalid pid
|
||||
# (process is definitely gone). Treat as "process doesn't exist".
|
||||
continue
|
||||
recorded_start = record.get("start_time")
|
||||
current_start = _get_process_start_time(pid)
|
||||
if recorded_start is not None and current_start is not None and current_start != recorded_start:
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
recorded_start = record.get("start_time")
|
||||
current_start = _get_process_start_time(pid)
|
||||
if recorded_start is not None and current_start is not None and current_start != recorded_start:
|
||||
continue
|
||||
if not _looks_like_gateway_process(pid):
|
||||
if not _record_looks_like_gateway(record):
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
|
||||
return pid
|
||||
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
return pid
|
||||
|
||||
|
||||
def is_gateway_running(
|
||||
|
||||
@@ -1,135 +0,0 @@
|
||||
"""Shared helpers for canonicalising WhatsApp sender identity.
|
||||
|
||||
WhatsApp's bridge can surface the same human under two different JID shapes
|
||||
within a single conversation:
|
||||
|
||||
- LID form: ``999999999999999@lid``
|
||||
- Phone form: ``15551234567@s.whatsapp.net``
|
||||
|
||||
Both the authorisation path (:mod:`gateway.run`) and the session-key path
|
||||
(:mod:`gateway.session`) need to collapse these aliases to a single stable
|
||||
identity. This module is the single source of truth for that resolution so
|
||||
the two paths can never drift apart.
|
||||
|
||||
Public helpers:
|
||||
|
||||
- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
|
||||
down to the bare numeric identifier.
|
||||
- :func:`canonical_whatsapp_identifier` — walk the bridge's
|
||||
``lid-mapping-*.json`` files and return a stable canonical identity
|
||||
across phone/LID variants.
|
||||
- :func:`expand_whatsapp_aliases` — return the full alias set for an
|
||||
identifier. Used by authorisation code that needs to match any known
|
||||
form of a sender against an allow-list.
|
||||
|
||||
Plugins that need per-sender behaviour on WhatsApp (role-based routing,
|
||||
per-contact authorisation, policy gating in a gateway hook) should use
|
||||
``canonical_whatsapp_identifier`` so their bookkeeping lines up with
|
||||
Hermes' own session keys.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Set
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
def normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||
|
||||
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||
``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
|
||||
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||
equality comparisons.
|
||||
|
||||
Useful for plugins that want to match sender IDs against
|
||||
user-supplied config (phone numbers in ``config.yaml``) without
|
||||
worrying about which variant the bridge happens to deliver.
|
||||
"""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
|
||||
|
||||
def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
||||
"""Resolve WhatsApp phone/LID aliases via bridge session mapping files.
|
||||
|
||||
Returns the set of all identifiers transitively reachable through the
|
||||
bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
|
||||
starting from ``identifier``. The result always includes the
|
||||
normalized input itself, so callers can safely ``in`` check against
|
||||
the return value without a separate fallback branch.
|
||||
|
||||
Returns an empty set if ``identifier`` normalizes to empty.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||
resolved: Set[str] = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||
|
||||
WhatsApp may surface the same person under either a phone-format JID
|
||||
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||
member inside a group chat — both represent a user identity, and the
|
||||
bridge may flip between the two for the same human.
|
||||
|
||||
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||
files, walks the mapping transitively, and picks the shortest
|
||||
(numeric-preferred) alias as the canonical identity.
|
||||
:func:`gateway.session.build_session_key` uses this for both WhatsApp
|
||||
DM chat_ids and WhatsApp group participant_ids, so callers get the
|
||||
same session-key identity Hermes itself uses.
|
||||
|
||||
Plugins that need per-sender behaviour (role-based routing,
|
||||
authorisation, per-contact policy) should use this so their
|
||||
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||
the bridge reshuffles aliases.
|
||||
|
||||
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||
mapping files exist yet (fresh bridge install), returns the
|
||||
normalized input unchanged.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
# expand_whatsapp_aliases always includes `normalized` itself in the
|
||||
# returned set, so the min() below degrades gracefully to `normalized`
|
||||
# when no lid-mapping files are present.
|
||||
aliases = expand_whatsapp_aliases(normalized)
|
||||
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||
@@ -11,5 +11,5 @@ Provides subcommands for:
|
||||
- hermes cron - Manage cron jobs
|
||||
"""
|
||||
|
||||
__version__ = "0.11.0"
|
||||
__release_date__ = "2026.4.23"
|
||||
__version__ = "0.10.0"
|
||||
__release_date__ = "2026.4.16"
|
||||
|
||||
+107
-976
File diff suppressed because it is too large
Load Diff
@@ -110,40 +110,18 @@ def _display_source(source: str) -> str:
|
||||
return source.split(":", 1)[1] if source.startswith("manual:") else source
|
||||
|
||||
|
||||
def _classify_exhausted_status(entry) -> tuple[str, bool]:
|
||||
code = getattr(entry, "last_error_code", None)
|
||||
reason = str(getattr(entry, "last_error_reason", "") or "").strip().lower()
|
||||
message = str(getattr(entry, "last_error_message", "") or "").strip().lower()
|
||||
|
||||
if code == 429 or any(token in reason for token in ("rate_limit", "usage_limit", "quota", "exhausted")) or any(
|
||||
token in message for token in ("rate limit", "usage limit", "quota", "too many requests")
|
||||
):
|
||||
return "rate-limited", True
|
||||
|
||||
if code in {401, 403} or any(token in reason for token in ("invalid_token", "invalid_grant", "unauthorized", "forbidden", "auth")) or any(
|
||||
token in message for token in ("unauthorized", "forbidden", "expired", "revoked", "invalid token", "authentication")
|
||||
):
|
||||
return "auth failed", False
|
||||
|
||||
return "exhausted", True
|
||||
|
||||
|
||||
|
||||
def _format_exhausted_status(entry) -> str:
|
||||
if entry.last_status != STATUS_EXHAUSTED:
|
||||
return ""
|
||||
label, show_retry_window = _classify_exhausted_status(entry)
|
||||
reason = getattr(entry, "last_error_reason", None)
|
||||
reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else ""
|
||||
code = f" ({entry.last_error_code})" if entry.last_error_code else ""
|
||||
if not show_retry_window:
|
||||
return f" {label}{reason_text}{code} (re-auth may be required)"
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is None:
|
||||
return f" {label}{reason_text}{code}"
|
||||
return f" exhausted{reason_text}{code}"
|
||||
remaining = max(0, int(math.ceil(exhausted_until - time.time())))
|
||||
if remaining <= 0:
|
||||
return f" {label}{reason_text}{code} (ready to retry)"
|
||||
return f" exhausted{reason_text}{code} (ready to retry)"
|
||||
minutes, seconds = divmod(remaining, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
days, hours = divmod(hours, 24)
|
||||
@@ -155,7 +133,7 @@ def _format_exhausted_status(entry) -> str:
|
||||
wait = f"{minutes}m {seconds}s"
|
||||
else:
|
||||
wait = f"{seconds}s"
|
||||
return f" {label}{reason_text}{code} ({wait} left)"
|
||||
return f" exhausted{reason_text}{code} ({wait} left)"
|
||||
|
||||
|
||||
def auth_add_command(args) -> None:
|
||||
@@ -408,44 +386,6 @@ def auth_reset_command(args) -> None:
|
||||
print(f"Reset status on {count} {provider} credentials")
|
||||
|
||||
|
||||
def auth_status_command(args) -> None:
|
||||
provider = _normalize_provider(getattr(args, "provider", "") or "")
|
||||
if not provider:
|
||||
raise SystemExit("Provider is required. Example: `hermes auth status spotify`.")
|
||||
status = auth_mod.get_auth_status(provider)
|
||||
if not status.get("logged_in"):
|
||||
reason = status.get("error")
|
||||
if reason:
|
||||
print(f"{provider}: logged out ({reason})")
|
||||
else:
|
||||
print(f"{provider}: logged out")
|
||||
return
|
||||
|
||||
print(f"{provider}: logged in")
|
||||
for key in ("auth_type", "client_id", "redirect_uri", "scope", "expires_at", "api_base_url"):
|
||||
value = status.get(key)
|
||||
if value:
|
||||
print(f" {key}: {value}")
|
||||
|
||||
|
||||
def auth_logout_command(args) -> None:
|
||||
auth_mod.logout_command(SimpleNamespace(provider=getattr(args, "provider", None)))
|
||||
|
||||
|
||||
def auth_spotify_command(args) -> None:
|
||||
action = str(getattr(args, "spotify_action", "") or "login").strip().lower()
|
||||
if action in {"", "login"}:
|
||||
auth_mod.login_spotify_command(args)
|
||||
return
|
||||
if action == "status":
|
||||
auth_status_command(SimpleNamespace(provider="spotify"))
|
||||
return
|
||||
if action == "logout":
|
||||
auth_logout_command(SimpleNamespace(provider="spotify"))
|
||||
return
|
||||
raise SystemExit(f"Unknown Spotify auth action: {action}")
|
||||
|
||||
|
||||
def _interactive_auth() -> None:
|
||||
"""Interactive credential pool management when `hermes auth` is called bare."""
|
||||
# Show current pool status first
|
||||
@@ -643,14 +583,5 @@ def auth_command(args) -> None:
|
||||
if action == "reset":
|
||||
auth_reset_command(args)
|
||||
return
|
||||
if action == "status":
|
||||
auth_status_command(args)
|
||||
return
|
||||
if action == "logout":
|
||||
auth_logout_command(args)
|
||||
return
|
||||
if action == "spotify":
|
||||
auth_spotify_command(args)
|
||||
return
|
||||
# No subcommand — launch interactive mode
|
||||
_interactive_auth()
|
||||
|
||||
@@ -1,300 +0,0 @@
|
||||
"""Azure Foundry endpoint auto-detection.
|
||||
|
||||
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
|
||||
- API transport (OpenAI-style ``chat_completions`` vs
|
||||
Anthropic-style ``anthropic_messages``)
|
||||
- Available models (best effort — Azure does not expose a deployment
|
||||
listing via the inference API key, but Azure OpenAI v1 endpoints
|
||||
return the resource's model catalog via ``GET /models``)
|
||||
- Context length for each discovered/entered model, via the existing
|
||||
:func:`agent.model_metadata.get_model_context_length` resolver.
|
||||
|
||||
Rationale:
|
||||
|
||||
Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
|
||||
deployment enumeration requires ARM management-plane auth. Azure
|
||||
OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
|
||||
a ``/models`` list, but it reflects the resource's *available* models
|
||||
rather than the user's *deployed* deployment names. In practice it is
|
||||
still a useful hint — the user picks a familiar model name and we look
|
||||
up its context length from the catalog.
|
||||
|
||||
The detector never crashes on errors (every HTTP call is wrapped in a
|
||||
broad try/except). Callers get a :class:`DetectionResult` with whatever
|
||||
information could be gathered, and fall back to manual entry for the
|
||||
rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint
|
||||
# accepts requests without ``api-version`` entirely, so this is only used
|
||||
# as a fallback for pre-v1 resources that still require it.
|
||||
_AZURE_OPENAI_PROBE_API_VERSIONS = (
|
||||
"2025-04-01-preview",
|
||||
"2024-10-21", # oldest GA that supports /models
|
||||
)
|
||||
|
||||
# Default Azure Anthropic ``api-version``. Matches the value used by
|
||||
# ``agent/anthropic_adapter.py`` when building the Anthropic client.
|
||||
_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
"""Everything auto-detection could gather from a base URL + API key."""
|
||||
|
||||
#: Detected API transport: ``"chat_completions"``,
|
||||
#: ``"anthropic_messages"``, or ``None`` when detection failed.
|
||||
api_mode: Optional[str] = None
|
||||
|
||||
#: Deployment / model IDs returned by ``/models`` (best effort).
|
||||
#: Empty when the endpoint doesn't expose the list with an API key.
|
||||
models: list[str] = field(default_factory=list)
|
||||
|
||||
#: Lowercased host from the base URL (used for display messages).
|
||||
hostname: str = ""
|
||||
|
||||
#: Human-readable reason the detector chose ``api_mode``. Useful
|
||||
#: for explaining auto-detection to the user in the wizard.
|
||||
reason: str = ""
|
||||
|
||||
#: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
|
||||
models_probe_ok: bool = False
|
||||
|
||||
#: ``True`` when the URL was determined to be an Anthropic-style
|
||||
#: endpoint (from path suffix or live probe).
|
||||
is_anthropic: bool = False
|
||||
|
||||
|
||||
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
|
||||
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
|
||||
``(status_code, parsed_json_or_None)``. Never raises."""
|
||||
req = urllib_request.Request(url, method="GET")
|
||||
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
|
||||
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
|
||||
# so we probe once per URL rather than twice.
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=timeout) as resp:
|
||||
body = resp.read()
|
||||
try:
|
||||
return resp.status, json.loads(body.decode("utf-8", errors="replace"))
|
||||
except Exception:
|
||||
return resp.status, None
|
||||
except HTTPError as exc:
|
||||
return exc.code, None
|
||||
except (URLError, TimeoutError, OSError) as exc:
|
||||
logger.debug("azure_detect: GET %s failed: %s", url, exc)
|
||||
return 0, None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
|
||||
return 0, None
|
||||
|
||||
|
||||
def _strip_trailing_v1(url: str) -> str:
|
||||
"""Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
|
||||
return re.sub(r"/v1/?$", "", url.rstrip("/"))
|
||||
|
||||
|
||||
def _looks_like_anthropic_path(url: str) -> bool:
|
||||
"""Return True when the URL's path ends in ``/anthropic`` or
|
||||
contains a ``/anthropic/`` segment. Used by Azure Foundry
|
||||
resources that route Claude traffic through a dedicated path."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
path = (parsed.path or "").lower().rstrip("/")
|
||||
return path.endswith("/anthropic") or "/anthropic/" in path + "/"
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _extract_model_ids(payload: dict) -> list[str]:
|
||||
"""Extract a list of model IDs from an OpenAI-shaped ``/models``
|
||||
response. Returns ``[]`` on any shape mismatch."""
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
ids: list[str] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
# OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
|
||||
mid = item.get("id") or item.get("model") or item.get("name")
|
||||
if isinstance(mid, str) and mid:
|
||||
ids.append(mid)
|
||||
return ids
|
||||
|
||||
|
||||
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
||||
"""Probe ``<base>/models`` for an OpenAI-shaped response.
|
||||
|
||||
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
|
||||
us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
|
||||
"""
|
||||
base_url = base_url.rstrip("/")
|
||||
|
||||
# Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
|
||||
# api-version required for GA paths, so probe without first.
|
||||
candidates = [f"{base_url}/models"]
|
||||
# Fallback: explicit api-version for pre-v1 resources
|
||||
for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
|
||||
candidates.append(f"{base_url}/models?api-version={v}")
|
||||
|
||||
for url in candidates:
|
||||
status, body = _http_get_json(url, api_key)
|
||||
if status == 200 and body is not None:
|
||||
ids = _extract_model_ids(body)
|
||||
if ids:
|
||||
logger.info(
|
||||
"azure_detect: /models probe OK at %s (%d models)",
|
||||
url, len(ids),
|
||||
)
|
||||
return True, ids
|
||||
# 200 + empty list still counts as "OpenAI shape, no models
|
||||
# listed" — let the user proceed with manual entry.
|
||||
if isinstance(body, dict) and "data" in body:
|
||||
return True, []
|
||||
return False, []
|
||||
|
||||
|
||||
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
||||
"""Send a zero-token request to ``<base>/v1/messages`` and check
|
||||
whether the endpoint at least *recognises* the Anthropic Messages
|
||||
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
|
||||
``invalid_request`` with an Anthropic error shape). Never completes
|
||||
a real chat.
|
||||
"""
|
||||
base = _strip_trailing_v1(base_url)
|
||||
url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
|
||||
payload = json.dumps({
|
||||
"model": "probe",
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
}).encode("utf-8")
|
||||
req = urllib_request.Request(url, method="POST", data=payload)
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("anthropic-version", "2023-06-01")
|
||||
req.add_header("content-type", "application/json")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=6.0) as resp:
|
||||
# Should never 200 — "probe" isn't a real deployment. But
|
||||
# if it does, the endpoint definitely speaks Anthropic.
|
||||
return resp.status < 500
|
||||
except HTTPError as exc:
|
||||
# 4xx with an Anthropic-shaped error body = Anthropic endpoint.
|
||||
try:
|
||||
body = exc.read().decode("utf-8", errors="replace")
|
||||
lowered = body.lower()
|
||||
if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
|
||||
return True
|
||||
# Pre-Azure-v1 Azure Foundry returns a plain 404 for
|
||||
# Anthropic-style calls on non-Anthropic deployments. A
|
||||
# 400 "model not found" IS Anthropic though.
|
||||
if exc.code == 400 and ("messages" in lowered or "model" in lowered):
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
except (URLError, TimeoutError, OSError):
|
||||
return False
|
||||
except Exception: # pragma: no cover
|
||||
return False
|
||||
|
||||
|
||||
def detect(base_url: str, api_key: str) -> DetectionResult:
|
||||
"""Inspect an Azure endpoint and describe its transport + models.
|
||||
|
||||
Call this from the wizard before asking the user to pick an API
|
||||
mode manually. The caller should treat the returned
|
||||
:class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
|
||||
fall back to asking the user.
|
||||
"""
|
||||
result = DetectionResult()
|
||||
|
||||
try:
|
||||
parsed = urlparse(base_url)
|
||||
result.hostname = (parsed.hostname or "").lower()
|
||||
except Exception:
|
||||
result.hostname = ""
|
||||
|
||||
# 1. Path sniff. Azure Foundry exposes Anthropic-style deployments
|
||||
# under a dedicated ``/anthropic`` path.
|
||||
if _looks_like_anthropic_path(base_url):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "URL path ends in /anthropic → Anthropic Messages API"
|
||||
return result
|
||||
|
||||
# 2. Try the OpenAI-style /models probe. If this works, the
|
||||
# endpoint definitely speaks OpenAI wire.
|
||||
ok, models = _probe_openai_models(base_url, api_key)
|
||||
if ok:
|
||||
result.models_probe_ok = True
|
||||
result.models = models
|
||||
result.api_mode = "chat_completions"
|
||||
result.reason = (
|
||||
f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
|
||||
if models
|
||||
else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
|
||||
)
|
||||
return result
|
||||
|
||||
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
|
||||
# intrusive than /models, so only run it when the OpenAI probe
|
||||
# failed.
|
||||
if _probe_anthropic_messages(base_url, api_key):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "Endpoint accepts Anthropic Messages shape"
|
||||
return result
|
||||
|
||||
# Nothing matched. Caller falls back to manual selection.
|
||||
result.reason = (
|
||||
"Could not probe endpoint (private network, missing model list, or "
|
||||
"non-standard path) — falling back to manual API-mode selection"
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
|
||||
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
|
||||
that returns ``None`` when only the fallback default (128k) would
|
||||
fire, so the wizard can distinguish "we actually know this" from
|
||||
"we guessed."""
|
||||
try:
|
||||
from agent.model_metadata import (
|
||||
DEFAULT_FALLBACK_CONTEXT,
|
||||
get_model_context_length,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: context length lookup failed: %s", exc)
|
||||
return None
|
||||
|
||||
if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
|
||||
return n
|
||||
return None
|
||||
|
||||
|
||||
__all__ = ["DetectionResult", "detect", "lookup_context_length"]
|
||||
+1
-54
@@ -238,52 +238,6 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
|
||||
return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}
|
||||
|
||||
|
||||
_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag"
|
||||
_latest_release_cache: Optional[tuple] = None # (tag, url) once resolved
|
||||
|
||||
|
||||
def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]:
|
||||
"""Return ``(tag, release_url)`` for the latest git tag, or None.
|
||||
|
||||
Local-only — runs ``git describe --tags --abbrev=0`` against the
|
||||
Hermes checkout. Cached per-process. Release URL always points at the
|
||||
canonical NousResearch/hermes-agent repo (forks don't get a link).
|
||||
"""
|
||||
global _latest_release_cache
|
||||
if _latest_release_cache is not None:
|
||||
return _latest_release_cache or None
|
||||
|
||||
repo_dir = repo_dir or _resolve_repo_dir()
|
||||
if repo_dir is None:
|
||||
_latest_release_cache = () # falsy sentinel — skip future lookups
|
||||
return None
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "describe", "--tags", "--abbrev=0"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=3,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
except Exception:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
tag = (result.stdout or "").strip()
|
||||
if not tag:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
url = f"{_RELEASE_URL_BASE}/{tag}"
|
||||
_latest_release_cache = (tag, url)
|
||||
return _latest_release_cache
|
||||
|
||||
|
||||
def format_banner_version_label() -> str:
|
||||
"""Return the version label shown in the startup banner title."""
|
||||
base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
|
||||
@@ -565,16 +519,9 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
agent_name = _skin_branding("agent_name", "Hermes Agent")
|
||||
title_color = _skin_color("banner_title", "#FFD700")
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
version_label = format_banner_version_label()
|
||||
release_info = get_latest_release_tag()
|
||||
if release_info:
|
||||
_tag, _url = release_info
|
||||
title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]"
|
||||
else:
|
||||
title_markup = f"[bold {title_color}]{version_label}[/]"
|
||||
outer_panel = Panel(
|
||||
layout_table,
|
||||
title=title_markup,
|
||||
title=f"[bold {title_color}]{format_banner_version_label()}[/]",
|
||||
border_style=border_color,
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
||||
+1
-1
@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
|
||||
state_path = child / state_name
|
||||
if state_path.exists():
|
||||
kind = "directory" if state_path.is_dir() else "file"
|
||||
rel = state_path.relative_to(source_dir).as_posix()
|
||||
rel = state_path.relative_to(source_dir)
|
||||
findings.append((state_path, f"Workspace {kind}: {rel}"))
|
||||
|
||||
return findings
|
||||
|
||||
@@ -12,7 +12,6 @@ import os
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_CODEX_MODELS: List[str] = [
|
||||
"gpt-5.5",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.4",
|
||||
"gpt-5.3-codex",
|
||||
@@ -22,7 +21,6 @@ DEFAULT_CODEX_MODELS: List[str] = [
|
||||
]
|
||||
|
||||
_FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
|
||||
("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
|
||||
("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.3-codex", ("gpt-5.2-codex",)),
|
||||
|
||||
+19
-107
@@ -77,14 +77,16 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
|
||||
args_hint="[number]"),
|
||||
CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session",
|
||||
cli_only=True, aliases=("snap",), args_hint="[create|restore <id>|prune]"),
|
||||
aliases=("snap",), args_hint="[create|restore <id>|prune]"),
|
||||
CommandDef("stop", "Kill all running background processes", "Session"),
|
||||
CommandDef("approve", "Approve a pending dangerous command", "Session",
|
||||
gateway_only=True, args_hint="[session|always]"),
|
||||
CommandDef("deny", "Deny a pending dangerous command", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("background", "Run a prompt in the background", "Session",
|
||||
aliases=("bg", "btw"), args_hint="<prompt>"),
|
||||
aliases=("bg",), args_hint="<prompt>"),
|
||||
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
|
||||
args_hint="<question>"),
|
||||
CommandDef("agents", "Show active agents and running tasks", "Session",
|
||||
aliases=("tasks",)),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
@@ -101,10 +103,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
# Configuration
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("provider", "Show available providers and current provider",
|
||||
"Configuration"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"),
|
||||
|
||||
CommandDef("personality", "Set a predefined personality", "Configuration",
|
||||
args_hint="[name]"),
|
||||
@@ -122,12 +124,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
args_hint="[normal|fast|status]",
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
cli_only=True, args_hint="[name]"),
|
||||
args_hint="[name]"),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
|
||||
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
|
||||
cli_only=True, args_hint="[queue|interrupt|status]",
|
||||
subcommands=("queue", "interrupt", "status")),
|
||||
|
||||
# Tools & Skills
|
||||
CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
|
||||
@@ -140,13 +139,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
|
||||
"claim", "comment", "complete", "block", "unblock", "archive",
|
||||
"tail", "dispatch", "context", "init", "gc")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
aliases=("reload_mcp",)),
|
||||
CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
|
||||
@@ -267,26 +260,6 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
|
||||
)
|
||||
|
||||
|
||||
def is_gateway_known_command(name: str | None) -> bool:
|
||||
"""Return True if ``name`` resolves to a gateway-dispatchable slash command.
|
||||
|
||||
This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
|
||||
from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
|
||||
looked up lazily so importing this module never forces plugin
|
||||
discovery. Gateway code uses this to decide whether to emit
|
||||
``command:<name>`` hooks — plugin commands get the same lifecycle
|
||||
events as built-ins.
|
||||
"""
|
||||
if not name:
|
||||
return False
|
||||
if name in GATEWAY_KNOWN_COMMANDS:
|
||||
return True
|
||||
for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
|
||||
if plugin_name == name:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# Commands with explicit Level-2 running-agent handlers in gateway/run.py.
|
||||
# Listed here for introspection / tests; semantically a subset of
|
||||
# "all resolvable commands" — which is the real bypass set (see
|
||||
@@ -324,7 +297,7 @@ def should_bypass_active_session(command_name: str | None) -> bool:
|
||||
safety net in gateway.run discards any command text that reaches
|
||||
the pending queue — which meant a mid-run /model (or /reasoning,
|
||||
/voice, /insights, /title, /resume, /retry, /undo, /compress,
|
||||
/usage, /reload-mcp, /sethome, /reset) would silently
|
||||
/usage, /provider, /reload-mcp, /sethome, /reset) would silently
|
||||
interrupt the agent AND get discarded, producing a zero-char
|
||||
response. See issue #5057 / PRs #6252, #10370, #4665.
|
||||
|
||||
@@ -398,47 +371,12 @@ def gateway_help_lines() -> list[str]:
|
||||
return lines
|
||||
|
||||
|
||||
def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
|
||||
"""Yield (name, description, args_hint) tuples for all plugin slash commands.
|
||||
|
||||
Plugin commands are registered via
|
||||
:func:`hermes_cli.plugins.PluginContext.register_command`. They behave
|
||||
like ``CommandDef`` entries for gateway surfacing: they appear in the
|
||||
Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
|
||||
(via :func:`gateway.platforms.discord._register_slash_commands`) in
|
||||
Discord's native slash command picker.
|
||||
|
||||
Lookup is lazy so importing this module never forces plugin discovery
|
||||
(which can trigger filesystem scans and environment-dependent
|
||||
behavior).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.plugins import get_plugin_commands
|
||||
except Exception:
|
||||
return []
|
||||
try:
|
||||
commands = get_plugin_commands() or {}
|
||||
except Exception:
|
||||
return []
|
||||
entries: list[tuple[str, str, str]] = []
|
||||
for name, meta in commands.items():
|
||||
if not isinstance(name, str) or not isinstance(meta, dict):
|
||||
continue
|
||||
description = str(meta.get("description") or f"Run /{name}")
|
||||
args_hint = str(meta.get("args_hint") or "").strip()
|
||||
entries.append((name, description, args_hint))
|
||||
return entries
|
||||
|
||||
|
||||
def telegram_bot_commands() -> list[tuple[str, str]]:
|
||||
"""Return (command_name, description) pairs for Telegram setMyCommands.
|
||||
|
||||
Telegram command names cannot contain hyphens, so they are replaced with
|
||||
underscores. Aliases are skipped -- Telegram shows one menu entry per
|
||||
canonical command.
|
||||
|
||||
Plugin-registered slash commands are included so plugins get native
|
||||
autocomplete in Telegram without touching core code.
|
||||
"""
|
||||
overrides = _resolve_config_gates()
|
||||
result: list[tuple[str, str]] = []
|
||||
@@ -448,10 +386,6 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
|
||||
tg_name = _sanitize_telegram_name(cmd.name)
|
||||
if tg_name:
|
||||
result.append((tg_name, cmd.description))
|
||||
for name, description, _args_hint in _iter_plugin_command_entries():
|
||||
tg_name = _sanitize_telegram_name(name)
|
||||
if tg_name:
|
||||
result.append((tg_name, description))
|
||||
return result
|
||||
|
||||
|
||||
@@ -816,9 +750,6 @@ def slack_subcommand_map() -> dict[str, str]:
|
||||
|
||||
Maps both canonical names and aliases so /hermes bg do stuff works
|
||||
the same as /hermes background do stuff.
|
||||
|
||||
Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
|
||||
routes through the plugin handler.
|
||||
"""
|
||||
overrides = _resolve_config_gates()
|
||||
mapping: dict[str, str] = {}
|
||||
@@ -828,9 +759,6 @@ def slack_subcommand_map() -> dict[str, str]:
|
||||
mapping[cmd.name] = f"/{cmd.name}"
|
||||
for alias in cmd.aliases:
|
||||
mapping[alias] = f"/{alias}"
|
||||
for name, _description, _args_hint in _iter_plugin_command_entries():
|
||||
if name not in mapping:
|
||||
mapping[name] = f"/{name}"
|
||||
return mapping
|
||||
|
||||
|
||||
@@ -996,22 +924,12 @@ class SlashCommandCompleter(Completer):
|
||||
display_meta=meta,
|
||||
)
|
||||
|
||||
# If the user typed @file: / @folder: (or just @file / @folder with
|
||||
# no colon yet), delegate to path completions. Accepting the bare
|
||||
# form lets the picker surface directories as soon as the user has
|
||||
# typed `@folder`, without requiring them to first accept the static
|
||||
# `@folder:` hint and re-trigger completion.
|
||||
# If the user typed @file: or @folder:, delegate to path completions
|
||||
for prefix in ("@file:", "@folder:"):
|
||||
bare = prefix[:-1]
|
||||
|
||||
if word == bare or word.startswith(prefix):
|
||||
want_dir = prefix == "@folder:"
|
||||
path_part = '' if word == bare else word[len(prefix):]
|
||||
if word.startswith(prefix):
|
||||
path_part = word[len(prefix):] or "."
|
||||
expanded = os.path.expanduser(path_part)
|
||||
|
||||
if not expanded or expanded == ".":
|
||||
search_dir, match_prefix = ".", ""
|
||||
elif expanded.endswith("/"):
|
||||
if expanded.endswith("/"):
|
||||
search_dir, match_prefix = expanded, ""
|
||||
else:
|
||||
search_dir = os.path.dirname(expanded) or "."
|
||||
@@ -1027,21 +945,15 @@ class SlashCommandCompleter(Completer):
|
||||
for entry in sorted(entries):
|
||||
if match_prefix and not entry.lower().startswith(prefix_lower):
|
||||
continue
|
||||
full_path = os.path.join(search_dir, entry)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
# `@folder:` must only surface directories; `@file:` only
|
||||
# regular files. Without this filter `@folder:` listed
|
||||
# every .env / .gitignore in the cwd, defeating the
|
||||
# explicit prefix and confusing users expecting a
|
||||
# directory picker.
|
||||
if want_dir != is_dir:
|
||||
continue
|
||||
if count >= limit:
|
||||
break
|
||||
full_path = os.path.join(search_dir, entry)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
display_path = os.path.relpath(full_path)
|
||||
suffix = "/" if is_dir else ""
|
||||
kind = "folder" if is_dir else "file"
|
||||
meta = "dir" if is_dir else _file_size_label(full_path)
|
||||
completion = f"{prefix}{display_path}{suffix}"
|
||||
completion = f"@{kind}:{display_path}{suffix}"
|
||||
yield Completion(
|
||||
completion,
|
||||
start_position=-len(word),
|
||||
|
||||
+20
-265
@@ -361,15 +361,6 @@ DEFAULT_CONFIG = {
|
||||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
# Max app-level retry attempts for API errors (connection drops,
|
||||
# provider timeouts, 5xx, etc.) before the agent surfaces the
|
||||
# failure. The OpenAI SDK already does its own low-level retries
|
||||
# (max_retries=2 default) for transient network errors; this is
|
||||
# the Hermes-level retry loop that wraps the whole call. Lower
|
||||
# this to 1 if you use fallback providers and want fast failover
|
||||
# on flaky primaries; raise it if you prefer to tolerate longer
|
||||
# provider hiccups on a single provider.
|
||||
"api_max_retries": 3,
|
||||
"service_tier": "",
|
||||
# Tool-use enforcement: injects system prompt guidance that tells the
|
||||
# model to actually call tools instead of describing intended actions.
|
||||
@@ -384,11 +375,7 @@ DEFAULT_CONFIG = {
|
||||
# Periodic "still working" notification interval (seconds).
|
||||
# Sends a status message every N seconds so the user knows the
|
||||
# agent hasn't died during long tasks. 0 = disable notifications.
|
||||
# Lower values mean faster feedback on slow tasks but more chat
|
||||
# noise; 180s is a compromise that catches spinning weak-model runs
|
||||
# (60+ tool iterations with tiny output) before users assume the
|
||||
# bot is dead and /restart.
|
||||
"gateway_notify_interval": 180,
|
||||
"gateway_notify_interval": 600,
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
@@ -407,23 +394,17 @@ DEFAULT_CONFIG = {
|
||||
# (bash doesn't source bashrc in non-interactive login mode) or
|
||||
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
|
||||
# Paths support ``~`` / ``${VAR}``. Missing files are silently
|
||||
# skipped. When empty, Hermes auto-sources ``~/.profile``,
|
||||
# ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
|
||||
# skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
|
||||
# snapshot shell is bash (this is the ``auto_source_bashrc``
|
||||
# behaviour — disable with that key if you want strict login-only
|
||||
# semantics).
|
||||
"shell_init_files": [],
|
||||
# When true (default), Hermes sources the user's shell rc files
|
||||
# (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
|
||||
# login shell used to build the environment snapshot. This
|
||||
# captures PATH additions, shell functions, and aliases — which a
|
||||
# plain ``bash -l -c`` would otherwise miss because bash skips
|
||||
# bashrc in non-interactive login mode, and because a default
|
||||
# Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
|
||||
# sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
|
||||
# because ``n`` / ``nvm`` / ``asdf`` installers typically write
|
||||
# their PATH exports there without an interactivity guard. Turn
|
||||
# this off if your rc files misbehave when sourced
|
||||
# When true (default), Hermes sources ``~/.bashrc`` in the login
|
||||
# shell used to build the environment snapshot. This captures
|
||||
# PATH additions, shell functions, and aliases defined in the
|
||||
# user's bashrc — which a plain ``bash -l -c`` would otherwise
|
||||
# miss because bash skips bashrc in non-interactive login mode.
|
||||
# Turn this off if you have a bashrc that misbehaves when sourced
|
||||
# non-interactively (e.g. one that hard-exits on TTY checks).
|
||||
"auto_source_bashrc": True,
|
||||
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
@@ -466,12 +447,6 @@ DEFAULT_CONFIG = {
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||
# Active only when a CDP-capable backend is attached (Browserbase or
|
||||
# local Chrome via /browser connect). See
|
||||
# website/docs/developer-guide/browser-supervisor.md.
|
||||
"dialog_policy": "must_respond", # must_respond | auto_dismiss | auto_accept
|
||||
"dialog_timeout_s": 300, # Safety auto-dismiss after N seconds under must_respond
|
||||
"camofox": {
|
||||
# When true, Hermes sends a stable profile-scoped userId to Camofox
|
||||
# so the server maps it to a persistent Firefox profile automatically.
|
||||
@@ -492,27 +467,7 @@ DEFAULT_CONFIG = {
|
||||
# exceed this are rejected with guidance to use offset+limit.
|
||||
# 100K chars ≈ 25–35K tokens across typical tokenisers.
|
||||
"file_read_max_chars": 100_000,
|
||||
|
||||
# Tool-output truncation thresholds. When terminal output or a
|
||||
# single read_file page exceeds these limits, Hermes truncates the
|
||||
# payload sent to the model (keeping head + tail for terminal,
|
||||
# enforcing pagination for read_file). Tuning these trades context
|
||||
# footprint against how much raw output the model can see in one
|
||||
# shot. Ported from anomalyco/opencode PR #23770.
|
||||
#
|
||||
# - max_bytes: terminal_tool output cap, in chars
|
||||
# (default 50_000 ≈ 12-15K tokens).
|
||||
# - max_lines: read_file pagination cap — the maximum `limit`
|
||||
# a single read_file call can request before
|
||||
# being clamped (default 2000).
|
||||
# - max_line_length: per-line cap applied when read_file emits a
|
||||
# line-numbered view (default 2000 chars).
|
||||
"tool_output": {
|
||||
"max_bytes": 50_000,
|
||||
"max_lines": 2000,
|
||||
"max_line_length": 2000,
|
||||
},
|
||||
|
||||
|
||||
"compression": {
|
||||
"enabled": True,
|
||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||
@@ -521,12 +476,6 @@ DEFAULT_CONFIG = {
|
||||
|
||||
},
|
||||
|
||||
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
||||
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
|
||||
"prompt_caching": {
|
||||
"cache_ttl": "5m",
|
||||
},
|
||||
|
||||
# AWS Bedrock provider configuration.
|
||||
# Only used when model.provider is "bedrock".
|
||||
"bedrock": {
|
||||
@@ -612,6 +561,14 @@ DEFAULT_CONFIG = {
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"flush_memories": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"title_generation": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
@@ -656,10 +613,6 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
|
||||
# Text-to-speech configuration
|
||||
# Each provider supports an optional `max_text_length:` override for the
|
||||
# per-request input-character cap. Omit it to use the provider's documented
|
||||
# limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
|
||||
# Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
|
||||
"tts": {
|
||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
|
||||
"edge": {
|
||||
@@ -755,35 +708,10 @@ DEFAULT_CONFIG = {
|
||||
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
|
||||
"base_url": "", # direct OpenAI-compatible endpoint for subagents
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
# When delegate_task narrows child toolsets explicitly, preserve any
|
||||
# MCP toolsets the parent already has enabled. On by default so
|
||||
# narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
|
||||
# extras" without silently stripping MCP tools the parent already has.
|
||||
# Set to false for strict intersection.
|
||||
"inherit_mcp_toolsets": True,
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s,
|
||||
# no ceiling). High-reasoning models on large tasks
|
||||
# (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
|
||||
# raise if children time out before producing output.
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
|
||||
# Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
|
||||
# and _get_orchestrator_enabled). Values are clamped to [1, 3] with a
|
||||
# warning log if out of range.
|
||||
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
|
||||
"orchestrator_enabled": True, # kill switch for role="orchestrator"
|
||||
# When a subagent hits a dangerous-command approval prompt, the parent's
|
||||
# prompt_toolkit TUI owns stdin — a thread-local input() call from the
|
||||
# subagent worker would deadlock the parent UI. To avoid the deadlock,
|
||||
# subagent threads ALWAYS resolve approvals non-interactively:
|
||||
# false (default) → auto-deny with a logger.warning audit line (safe)
|
||||
# true → auto-approve "once" with a logger.warning audit line
|
||||
# Flip to true only if you trust delegated work to run dangerous cmds
|
||||
# without human review (cron pipelines, batch automation, etc.).
|
||||
"subagent_auto_approve": False,
|
||||
},
|
||||
|
||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||
@@ -810,17 +738,6 @@ DEFAULT_CONFIG = {
|
||||
"inline_shell": False,
|
||||
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
|
||||
"inline_shell_timeout": 10,
|
||||
# Run the keyword/pattern security scanner on skills the agent
|
||||
# writes via skill_manage (create/edit/patch). Off by default
|
||||
# because the agent can already execute the same code paths via
|
||||
# terminal() with no gate, so the scan adds friction (blocks
|
||||
# skills that mention risky keywords in prose) without meaningful
|
||||
# security. Turn on if you want the belt-and-suspenders — a
|
||||
# dangerous verdict will then surface as a tool error to the
|
||||
# agent, which can retry with the flagged content removed.
|
||||
# External hub installs (trusted/community sources) are always
|
||||
# scanned regardless of this setting.
|
||||
"guard_agent_created": False,
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
@@ -840,7 +757,7 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord / discord_admin tools: restrict which actions the agent may call.
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
@@ -913,7 +830,6 @@ DEFAULT_CONFIG = {
|
||||
|
||||
# Pre-exec security scanning via tirith
|
||||
"security": {
|
||||
"allow_private_urls": False, # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
|
||||
"redact_secrets": True,
|
||||
"tirith_enabled": True,
|
||||
"tirith_path": "tirith",
|
||||
@@ -959,27 +875,6 @@ DEFAULT_CONFIG = {
|
||||
"backup_count": 3, # Number of rotated backup files to keep
|
||||
},
|
||||
|
||||
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
|
||||
# curated model lists for OpenRouter and Nous Portal from this URL,
|
||||
# falling back to the in-repo snapshot on network failure. Lets us
|
||||
# update model picker lists without shipping a hermes-agent release.
|
||||
# The default URL is served by the docs site GitHub Pages deploy.
|
||||
"model_catalog": {
|
||||
"enabled": True,
|
||||
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
|
||||
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
|
||||
# next /model or `hermes model` invocation; network failures
|
||||
# silently fall back to the stale cache.
|
||||
"ttl_hours": 24,
|
||||
# Optional per-provider override URLs for third parties that want
|
||||
# to self-host their own curation list using the same schema.
|
||||
# Example:
|
||||
# providers:
|
||||
# openrouter:
|
||||
# url: https://example.com/my-curation.json
|
||||
"providers": {},
|
||||
},
|
||||
|
||||
# Network settings — workarounds for connectivity issues.
|
||||
"network": {
|
||||
# Force IPv4 connections. On servers with broken or unreachable IPv6,
|
||||
@@ -988,41 +883,6 @@ DEFAULT_CONFIG = {
|
||||
"force_ipv4": False,
|
||||
},
|
||||
|
||||
# Session storage — controls automatic cleanup of ~/.hermes/state.db.
|
||||
# state.db accumulates every session, message, tool call, and FTS5 index
|
||||
# entry forever. Without auto-pruning, a heavy user (gateway + cron)
|
||||
# reports 384MB+ databases with 68K+ messages, which slows down FTS5
|
||||
# inserts, /resume listing, and insights queries.
|
||||
"sessions": {
|
||||
# When true, prune ended sessions older than retention_days once
|
||||
# per (roughly) min_interval_hours at CLI/gateway/cron startup.
|
||||
# Only touches ended sessions — active sessions are always preserved.
|
||||
# Default false: session history is valuable for search recall, and
|
||||
# silently deleting it could surprise users. Opt in explicitly.
|
||||
"auto_prune": False,
|
||||
# How many days of ended-session history to keep. Matches the
|
||||
# default of ``hermes sessions prune``.
|
||||
"retention_days": 90,
|
||||
# VACUUM after a prune that actually deleted rows. SQLite does not
|
||||
# reclaim disk space on DELETE — freed pages are just reused on
|
||||
# subsequent INSERTs — so without VACUUM the file stays bloated
|
||||
# even after pruning. VACUUM blocks writes for a few seconds per
|
||||
# 100MB, so it only runs at startup, and only when prune deleted
|
||||
# ≥1 session.
|
||||
"vacuum_after_prune": True,
|
||||
# Minimum hours between auto-maintenance runs (avoids repeating
|
||||
# the sweep on every CLI invocation). Tracked via state_meta in
|
||||
# state.db itself, so it's shared across all processes.
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Contextual first-touch onboarding hints (see agent/onboarding.py).
|
||||
# Each hint is shown once per install and then latched here so it
|
||||
# never fires again. Users can wipe the section to re-see all hints.
|
||||
"onboarding": {
|
||||
"seen": {},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
}
|
||||
@@ -1180,22 +1040,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"STEPFUN_API_KEY": {
|
||||
"description": "StepFun Step Plan API key",
|
||||
"prompt": "StepFun Step Plan API key",
|
||||
"url": "https://platform.stepfun.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"STEPFUN_BASE_URL": {
|
||||
"description": "StepFun Step Plan base URL override",
|
||||
"prompt": "StepFun Step Plan base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"ARCEEAI_API_KEY": {
|
||||
"description": "Arcee AI API key",
|
||||
"prompt": "Arcee AI API key",
|
||||
@@ -1369,7 +1213,7 @@ OPTIONAL_ENV_VARS = {
|
||||
"advanced": True,
|
||||
},
|
||||
"XIAOMI_API_KEY": {
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
|
||||
"prompt": "Xiaomi MiMo API Key",
|
||||
"url": "https://platform.xiaomimimo.com",
|
||||
"password": True,
|
||||
@@ -1399,21 +1243,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"AZURE_FOUNDRY_API_KEY": {
|
||||
"description": "Azure Foundry API key for custom Azure endpoints",
|
||||
"prompt": "Azure Foundry API Key",
|
||||
"url": "https://ai.azure.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
},
|
||||
"AZURE_FOUNDRY_BASE_URL": {
|
||||
"description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
|
||||
"prompt": "Azure Foundry base URL",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Tool API keys ──
|
||||
"EXA_API_KEY": {
|
||||
@@ -2165,14 +1994,6 @@ def _normalize_custom_provider_entry(
|
||||
models = entry.get("models")
|
||||
if isinstance(models, dict) and models:
|
||||
normalized["models"] = models
|
||||
elif isinstance(models, list) and models:
|
||||
# Hand-edited configs (and older Hermes versions) write ``models`` as
|
||||
# a plain list of model ids. Preserve them by converting to the dict
|
||||
# shape downstream code expects; otherwise normalize silently drops
|
||||
# the list and /model shows the provider with (0) models.
|
||||
normalized["models"] = {
|
||||
str(m): {} for m in models if isinstance(m, str) and m.strip()
|
||||
}
|
||||
|
||||
context_length = entry.get("context_length")
|
||||
if isinstance(context_length, int) and context_length > 0:
|
||||
@@ -2249,71 +2070,6 @@ def get_compatible_custom_providers(
|
||||
return compatible
|
||||
|
||||
|
||||
def get_custom_provider_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
custom_providers: Optional[List[Dict[str, Any]]] = None,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[int]:
|
||||
"""Look up a per-model ``context_length`` override from ``custom_providers``.
|
||||
|
||||
Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
|
||||
insensitive) and returns ``custom_providers[i].models.<model>.context_length``
|
||||
if present and valid. Returns ``None`` when no override applies.
|
||||
|
||||
This is the single source of truth for custom-provider context overrides,
|
||||
used by:
|
||||
* ``AIAgent.__init__`` (startup resolution)
|
||||
* ``AIAgent.switch_model`` (mid-session ``/model`` switch)
|
||||
* ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
|
||||
* ``gateway.run._format_session_info`` (``/info`` display)
|
||||
* ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
|
||||
|
||||
Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
|
||||
startup path only; every other path (notably ``/model`` switch) fell back
|
||||
to the 128K default. See #15779.
|
||||
"""
|
||||
if not model or not base_url:
|
||||
return None
|
||||
if custom_providers is None:
|
||||
try:
|
||||
custom_providers = get_compatible_custom_providers(config)
|
||||
except Exception:
|
||||
if config is None:
|
||||
return None
|
||||
raw = config.get("custom_providers")
|
||||
custom_providers = raw if isinstance(raw, list) else []
|
||||
if not isinstance(custom_providers, list):
|
||||
return None
|
||||
|
||||
target_url = (base_url or "").rstrip("/")
|
||||
if not target_url:
|
||||
return None
|
||||
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
entry_url = (entry.get("base_url") or "").rstrip("/")
|
||||
if not entry_url or entry_url != target_url:
|
||||
continue
|
||||
models = entry.get("models")
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
model_cfg = models.get(model)
|
||||
if not isinstance(model_cfg, dict):
|
||||
continue
|
||||
raw_ctx = model_cfg.get("context_length")
|
||||
if raw_ctx is None:
|
||||
continue
|
||||
try:
|
||||
ctx = int(raw_ctx)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if ctx > 0:
|
||||
return ctx
|
||||
return None
|
||||
|
||||
|
||||
def check_config_version() -> Tuple[int, int]:
|
||||
"""
|
||||
Check config version.
|
||||
@@ -2336,7 +2092,6 @@ _KNOWN_ROOT_KEYS = {
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
"sessions",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
@@ -3352,7 +3107,7 @@ def save_config(config: Dict[str, Any]):
|
||||
if not sec or sec.get("redact_secrets") is None:
|
||||
parts.append(_SECURITY_COMMENT)
|
||||
fb = normalized.get("fallback_model", {})
|
||||
if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
|
||||
if not fb or not (fb.get("provider") and fb.get("model")):
|
||||
parts.append(_FALLBACK_COMMENT)
|
||||
|
||||
atomic_yaml_write(
|
||||
|
||||
@@ -275,99 +275,6 @@ def copilot_device_code_login(
|
||||
return None
|
||||
|
||||
|
||||
# ─── Copilot Token Exchange ────────────────────────────────────────────────
|
||||
|
||||
# Module-level cache for exchanged Copilot API tokens.
|
||||
# Maps raw_token_fingerprint -> (api_token, expires_at_epoch).
|
||||
_jwt_cache: dict[str, tuple[str, float]] = {}
|
||||
_JWT_REFRESH_MARGIN_SECONDS = 120 # refresh 2 min before expiry
|
||||
|
||||
# Token exchange endpoint and headers (matching VS Code / Copilot CLI)
|
||||
_TOKEN_EXCHANGE_URL = "https://api.github.com/copilot_internal/v2/token"
|
||||
_EDITOR_VERSION = "vscode/1.104.1"
|
||||
_EXCHANGE_USER_AGENT = "GitHubCopilotChat/0.26.7"
|
||||
|
||||
|
||||
def _token_fingerprint(raw_token: str) -> str:
|
||||
"""Short fingerprint of a raw token for cache keying (avoids storing full token)."""
|
||||
import hashlib
|
||||
return hashlib.sha256(raw_token.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def exchange_copilot_token(raw_token: str, *, timeout: float = 10.0) -> tuple[str, float]:
|
||||
"""Exchange a raw GitHub token for a short-lived Copilot API token.
|
||||
|
||||
Calls ``GET https://api.github.com/copilot_internal/v2/token`` with
|
||||
the raw GitHub token and returns ``(api_token, expires_at)``.
|
||||
|
||||
The returned token is a semicolon-separated string (not a standard JWT)
|
||||
used as ``Authorization: Bearer <token>`` for Copilot API requests.
|
||||
|
||||
Results are cached in-process and reused until close to expiry.
|
||||
Raises ``ValueError`` on failure.
|
||||
"""
|
||||
import urllib.request
|
||||
|
||||
fp = _token_fingerprint(raw_token)
|
||||
|
||||
# Check cache first
|
||||
cached = _jwt_cache.get(fp)
|
||||
if cached:
|
||||
api_token, expires_at = cached
|
||||
if time.time() < expires_at - _JWT_REFRESH_MARGIN_SECONDS:
|
||||
return api_token, expires_at
|
||||
|
||||
req = urllib.request.Request(
|
||||
_TOKEN_EXCHANGE_URL,
|
||||
method="GET",
|
||||
headers={
|
||||
"Authorization": f"token {raw_token}",
|
||||
"User-Agent": _EXCHANGE_USER_AGENT,
|
||||
"Accept": "application/json",
|
||||
"Editor-Version": _EDITOR_VERSION,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except Exception as exc:
|
||||
raise ValueError(f"Copilot token exchange failed: {exc}") from exc
|
||||
|
||||
api_token = data.get("token", "")
|
||||
expires_at = data.get("expires_at", 0)
|
||||
if not api_token:
|
||||
raise ValueError("Copilot token exchange returned empty token")
|
||||
|
||||
# Convert expires_at to float if needed
|
||||
expires_at = float(expires_at) if expires_at else time.time() + 1800
|
||||
|
||||
_jwt_cache[fp] = (api_token, expires_at)
|
||||
logger.debug(
|
||||
"Copilot token exchanged, expires_at=%s",
|
||||
expires_at,
|
||||
)
|
||||
return api_token, expires_at
|
||||
|
||||
|
||||
def get_copilot_api_token(raw_token: str) -> str:
|
||||
"""Exchange a raw GitHub token for a Copilot API token, with fallback.
|
||||
|
||||
Convenience wrapper: returns the exchanged token on success, or the
|
||||
raw token unchanged if the exchange fails (e.g. network error, unsupported
|
||||
account type). This preserves existing behaviour for accounts that don't
|
||||
need exchange while enabling access to internal-only models for those that do.
|
||||
"""
|
||||
if not raw_token:
|
||||
return raw_token
|
||||
try:
|
||||
api_token, _ = exchange_copilot_token(raw_token)
|
||||
return api_token
|
||||
except Exception as exc:
|
||||
logger.debug("Copilot token exchange failed, using raw token: %s", exc)
|
||||
return raw_token
|
||||
|
||||
|
||||
# ─── Copilot API Headers ───────────────────────────────────────────────────
|
||||
|
||||
def copilot_request_headers(
|
||||
|
||||
@@ -93,9 +93,6 @@ def cron_list(show_all: bool = False):
|
||||
script = job.get("script")
|
||||
if script:
|
||||
print(f" Script: {script}")
|
||||
workdir = job.get("workdir")
|
||||
if workdir:
|
||||
print(f" Workdir: {workdir}")
|
||||
|
||||
# Execution history
|
||||
last_status = job.get("last_status")
|
||||
@@ -171,7 +168,6 @@ def cron_create(args):
|
||||
skill=getattr(args, "skill", None),
|
||||
skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
|
||||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
@@ -184,8 +180,6 @@ def cron_create(args):
|
||||
job_data = result.get("job", {})
|
||||
if job_data.get("script"):
|
||||
print(f" Script: {job_data['script']}")
|
||||
if job_data.get("workdir"):
|
||||
print(f" Workdir: {job_data['workdir']}")
|
||||
print(f" Next run: {result['next_run_at']}")
|
||||
return 0
|
||||
|
||||
@@ -224,7 +218,6 @@ def cron_edit(args):
|
||||
repeat=getattr(args, "repeat", None),
|
||||
skills=final_skills,
|
||||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
@@ -240,8 +233,6 @@ def cron_edit(args):
|
||||
print(" Skills: none")
|
||||
if updated.get("script"):
|
||||
print(f" Script: {updated['script']}")
|
||||
if updated.get("workdir"):
|
||||
print(f" Workdir: {updated['workdir']}")
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
+48
-128
@@ -13,7 +13,6 @@ import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@@ -148,14 +147,6 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
|
||||
return (deleted, len(remaining))
|
||||
|
||||
|
||||
def _best_effort_sweep_expired_pastes() -> None:
|
||||
"""Attempt pending-paste cleanup without letting /debug fail offline."""
|
||||
try:
|
||||
_sweep_expired_pastes()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Privacy / delete helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -323,128 +314,72 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
|
||||
# Log file reading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class LogSnapshot:
|
||||
"""Single-read snapshot of a log file used by debug-share."""
|
||||
|
||||
path: Optional[Path]
|
||||
tail_text: str
|
||||
full_text: Optional[str]
|
||||
|
||||
|
||||
def _primary_log_path(log_name: str) -> Optional[Path]:
|
||||
"""Where *log_name* would live if present. Doesn't check existence."""
|
||||
from hermes_cli.logs import LOG_FILES
|
||||
|
||||
filename = LOG_FILES.get(log_name)
|
||||
return (get_hermes_home() / "logs" / filename) if filename else None
|
||||
|
||||
|
||||
def _resolve_log_path(log_name: str) -> Optional[Path]:
|
||||
"""Find the log file for *log_name*, falling back to the .1 rotation.
|
||||
|
||||
Returns the first non-empty candidate (primary, then .1), or None.
|
||||
Callers distinguish 'empty primary' from 'truly missing' via
|
||||
:func:`_primary_log_path`.
|
||||
Returns the path if found, or None.
|
||||
"""
|
||||
primary = _primary_log_path(log_name)
|
||||
if primary is None:
|
||||
from hermes_cli.logs import LOG_FILES
|
||||
|
||||
filename = LOG_FILES.get(log_name)
|
||||
if not filename:
|
||||
return None
|
||||
|
||||
log_dir = get_hermes_home() / "logs"
|
||||
primary = log_dir / filename
|
||||
if primary.exists() and primary.stat().st_size > 0:
|
||||
return primary
|
||||
|
||||
rotated = primary.parent / f"{primary.name}.1"
|
||||
# Fall back to the most recent rotated file (.1).
|
||||
rotated = log_dir / f"{filename}.1"
|
||||
if rotated.exists() and rotated.stat().st_size > 0:
|
||||
return rotated
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _capture_log_snapshot(
|
||||
log_name: str,
|
||||
*,
|
||||
tail_lines: int,
|
||||
max_bytes: int = _MAX_LOG_BYTES,
|
||||
) -> LogSnapshot:
|
||||
"""Capture a log once and derive summary/full-log views from it.
|
||||
def _read_log_tail(log_name: str, num_lines: int) -> str:
|
||||
"""Read the last *num_lines* from a log file, or return a placeholder."""
|
||||
from hermes_cli.logs import _read_last_n_lines
|
||||
|
||||
The report tail and standalone log upload must come from the same file
|
||||
snapshot. Otherwise a rotation/truncate between reads can make the report
|
||||
look newer than the uploaded ``agent.log`` paste.
|
||||
log_path = _resolve_log_path(log_name)
|
||||
if log_path is None:
|
||||
return "(file not found)"
|
||||
|
||||
try:
|
||||
lines = _read_last_n_lines(log_path, num_lines)
|
||||
return "".join(lines).rstrip("\n")
|
||||
except Exception as exc:
|
||||
return f"(error reading: {exc})"
|
||||
|
||||
|
||||
def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
|
||||
"""Read a log file for standalone upload.
|
||||
|
||||
Returns the file content (last *max_bytes* if truncated), or None if the
|
||||
file doesn't exist or is empty.
|
||||
"""
|
||||
log_path = _resolve_log_path(log_name)
|
||||
if log_path is None:
|
||||
primary = _primary_log_path(log_name)
|
||||
tail = "(file empty)" if primary and primary.exists() else "(file not found)"
|
||||
return LogSnapshot(path=None, tail_text=tail, full_text=None)
|
||||
return None
|
||||
|
||||
try:
|
||||
size = log_path.stat().st_size
|
||||
if size == 0:
|
||||
# race: file was truncated between _resolve_log_path and stat
|
||||
return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)
|
||||
return None
|
||||
|
||||
if size <= max_bytes:
|
||||
return log_path.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
# File is larger than max_bytes — read the tail.
|
||||
with open(log_path, "rb") as f:
|
||||
if size <= max_bytes:
|
||||
raw = f.read()
|
||||
truncated = False
|
||||
else:
|
||||
# Read from the end until we have enough bytes for the
|
||||
# standalone upload and enough newline context to render the
|
||||
# summary tail from the same snapshot.
|
||||
chunk_size = 8192
|
||||
pos = size
|
||||
chunks: list[bytes] = []
|
||||
total = 0
|
||||
newline_count = 0
|
||||
|
||||
while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
|
||||
read_size = min(chunk_size, pos)
|
||||
pos -= read_size
|
||||
f.seek(pos)
|
||||
chunk = f.read(read_size)
|
||||
chunks.insert(0, chunk)
|
||||
total += len(chunk)
|
||||
newline_count += chunk.count(b"\n")
|
||||
chunk_size = min(chunk_size * 2, 65536)
|
||||
|
||||
raw = b"".join(chunks)
|
||||
truncated = pos > 0
|
||||
|
||||
full_raw = raw
|
||||
if truncated and len(full_raw) > max_bytes:
|
||||
cut = len(full_raw) - max_bytes
|
||||
# Check whether the cut lands exactly on a line boundary. If the
|
||||
# byte just before the cut position is a newline the first retained
|
||||
# byte starts a complete line and we should keep it. Only drop a
|
||||
# partial first line when we're genuinely mid-line.
|
||||
on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
|
||||
full_raw = full_raw[cut:]
|
||||
if not on_boundary and b"\n" in full_raw:
|
||||
full_raw = full_raw.split(b"\n", 1)[1]
|
||||
|
||||
all_text = raw.decode("utf-8", errors="replace")
|
||||
tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
|
||||
|
||||
full_text = full_raw.decode("utf-8", errors="replace")
|
||||
if truncated:
|
||||
full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
|
||||
|
||||
return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
|
||||
except Exception as exc:
|
||||
return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
|
||||
|
||||
|
||||
def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
|
||||
"""Capture all logs used by debug-share exactly once."""
|
||||
errors_lines = min(log_lines, 100)
|
||||
return {
|
||||
"agent": _capture_log_snapshot("agent", tail_lines=log_lines),
|
||||
"errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
|
||||
"gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
|
||||
}
|
||||
f.seek(size - max_bytes)
|
||||
# Skip partial line at the seek point.
|
||||
f.readline()
|
||||
content = f.read().decode("utf-8", errors="replace")
|
||||
return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -470,12 +405,7 @@ def _capture_dump() -> str:
|
||||
return capture.getvalue()
|
||||
|
||||
|
||||
def collect_debug_report(
|
||||
*,
|
||||
log_lines: int = 200,
|
||||
dump_text: str = "",
|
||||
log_snapshots: Optional[dict[str, LogSnapshot]] = None,
|
||||
) -> str:
|
||||
def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
|
||||
"""Build the summary debug report: system dump + log tails.
|
||||
|
||||
Parameters
|
||||
@@ -494,22 +424,19 @@ def collect_debug_report(
|
||||
dump_text = _capture_dump()
|
||||
buf.write(dump_text)
|
||||
|
||||
if log_snapshots is None:
|
||||
log_snapshots = _capture_default_log_snapshots(log_lines)
|
||||
|
||||
# ── Recent log tails (summary only) ──────────────────────────────────
|
||||
buf.write("\n\n")
|
||||
buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
|
||||
buf.write(log_snapshots["agent"].tail_text)
|
||||
buf.write(_read_log_tail("agent", log_lines))
|
||||
buf.write("\n\n")
|
||||
|
||||
errors_lines = min(log_lines, 100)
|
||||
buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
|
||||
buf.write(log_snapshots["errors"].tail_text)
|
||||
buf.write(_read_log_tail("errors", errors_lines))
|
||||
buf.write("\n\n")
|
||||
|
||||
buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
|
||||
buf.write(log_snapshots["gateway"].tail_text)
|
||||
buf.write(_read_log_tail("gateway", errors_lines))
|
||||
buf.write("\n")
|
||||
|
||||
return buf.getvalue()
|
||||
@@ -521,8 +448,6 @@ def collect_debug_report(
|
||||
|
||||
def run_debug_share(args):
|
||||
"""Collect debug report + full logs, upload each, print URLs."""
|
||||
_best_effort_sweep_expired_pastes()
|
||||
|
||||
log_lines = getattr(args, "lines", 200)
|
||||
expiry = getattr(args, "expire", 7)
|
||||
local_only = getattr(args, "local", False)
|
||||
@@ -534,15 +459,10 @@ def run_debug_share(args):
|
||||
|
||||
# Capture dump once — prepended to every paste for context.
|
||||
dump_text = _capture_dump()
|
||||
log_snapshots = _capture_default_log_snapshots(log_lines)
|
||||
|
||||
report = collect_debug_report(
|
||||
log_lines=log_lines,
|
||||
dump_text=dump_text,
|
||||
log_snapshots=log_snapshots,
|
||||
)
|
||||
agent_log = log_snapshots["agent"].full_text
|
||||
gateway_log = log_snapshots["gateway"].full_text
|
||||
report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
|
||||
agent_log = _read_full_log("agent")
|
||||
gateway_log = _read_full_log("gateway")
|
||||
|
||||
# Prepend dump header to each full log so every paste is self-contained.
|
||||
if agent_log:
|
||||
|
||||
+12
-42
@@ -29,7 +29,6 @@ if _env_path.exists():
|
||||
load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.models import _HERMES_USER_AGENT
|
||||
from hermes_constants import OPENROUTER_MODELS_URL
|
||||
from utils import base_url_host_matches
|
||||
|
||||
@@ -296,37 +295,16 @@ def run_doctor(args):
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
|
||||
from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
|
||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||||
except Exception:
|
||||
_compatible_custom_providers = None
|
||||
_resolve_provider_full = None
|
||||
|
||||
custom_providers = []
|
||||
if _compatible_custom_providers is not None:
|
||||
try:
|
||||
custom_providers = _compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
custom_providers = []
|
||||
|
||||
user_providers = cfg.get("providers")
|
||||
if isinstance(user_providers, dict):
|
||||
known_providers.update(str(name).strip().lower() for name in user_providers if str(name).strip())
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = str(entry.get("name") or "").strip()
|
||||
if name:
|
||||
known_providers.add("custom:" + name.lower().replace(" ", "-"))
|
||||
_resolve_provider = None
|
||||
|
||||
canonical_provider = provider
|
||||
if (
|
||||
provider
|
||||
and _resolve_provider_full is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
|
||||
canonical_provider = provider_def.id if provider_def is not None else None
|
||||
if provider and _resolve_provider is not None and provider != "auto":
|
||||
try:
|
||||
canonical_provider = _resolve_provider(provider)
|
||||
except Exception:
|
||||
canonical_provider = None
|
||||
|
||||
if provider and provider != "auto":
|
||||
if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
|
||||
@@ -934,7 +912,6 @@ def run_doctor(args):
|
||||
_apikey_providers = [
|
||||
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
|
||||
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
|
||||
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
||||
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
@@ -966,25 +943,18 @@ def run_doctor(args):
|
||||
try:
|
||||
import httpx
|
||||
_base = os.getenv(_base_env, "") if _base_env else ""
|
||||
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
|
||||
# (OpenAI-compat surface, which exposes /models for health check).
|
||||
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
|
||||
if not _base and _key.startswith("sk-kimi-"):
|
||||
_base = "https://api.kimi.com/coding/v1"
|
||||
# Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
|
||||
# with no /v1) don't support /models. Rewrite to the OpenAI-compat
|
||||
# /v1 surface for health checks.
|
||||
# Anthropic-compat endpoints (/anthropic) don't support /models.
|
||||
# Rewrite to the OpenAI-compat /v1 surface for health checks.
|
||||
if _base and _base.rstrip("/").endswith("/anthropic"):
|
||||
from agent.auxiliary_client import _to_openai_base_url
|
||||
_base = _to_openai_base_url(_base)
|
||||
if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
|
||||
_base = _base.rstrip("/") + "/v1"
|
||||
_url = (_base.rstrip("/") + "/models") if _base else _default_url
|
||||
_headers = {
|
||||
"Authorization": f"Bearer {_key}",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
}
|
||||
_headers = {"Authorization": f"Bearer {_key}"}
|
||||
if base_url_host_matches(_base, "api.kimi.com"):
|
||||
_headers["User-Agent"] = "claude-code/0.1.0"
|
||||
_headers["User-Agent"] = "KimiCLI/1.30.0"
|
||||
_resp = httpx.get(
|
||||
_url,
|
||||
headers=_headers,
|
||||
|
||||
@@ -267,8 +267,6 @@ def run_dump(args):
|
||||
("ANTHROPIC_API_KEY", "anthropic"),
|
||||
("ANTHROPIC_TOKEN", "anthropic_token"),
|
||||
("NOUS_API_KEY", "nous"),
|
||||
("GOOGLE_API_KEY", "google/gemini"),
|
||||
("GEMINI_API_KEY", "gemini"),
|
||||
("GLM_API_KEY", "glm/zai"),
|
||||
("ZAI_API_KEY", "zai"),
|
||||
("KIMI_API_KEY", "kimi"),
|
||||
|
||||
@@ -160,8 +160,6 @@ def load_hermes_dotenv(
|
||||
# Fix corrupted .env files before python-dotenv parses them (#8908).
|
||||
if user_env.exists():
|
||||
_sanitize_env_file_if_needed(user_env)
|
||||
if project_env_path and project_env_path.exists():
|
||||
_sanitize_env_file_if_needed(project_env_path)
|
||||
|
||||
if user_env.exists():
|
||||
_load_dotenv_with_fallback(user_env, override=True)
|
||||
|
||||
@@ -1,361 +0,0 @@
|
||||
"""
|
||||
hermes fallback — manage the fallback provider chain.
|
||||
|
||||
Fallback providers are tried in order when the primary model fails with
|
||||
rate-limit, overload, or connection errors. See:
|
||||
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
|
||||
|
||||
Subcommands:
|
||||
hermes fallback [list] Show the current fallback chain (default when no subcommand)
|
||||
hermes fallback add Pick provider + model via the same picker as `hermes model`,
|
||||
then append the selection to the chain
|
||||
hermes fallback remove Pick an entry to delete from the chain
|
||||
hermes fallback clear Remove all fallback entries
|
||||
|
||||
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
|
||||
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
|
||||
``fallback_model`` format is migrated to the new list format on first add.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Return the normalized fallback chain as a list of dicts.
|
||||
|
||||
Accepts both the new list format (``fallback_providers``) and the legacy
|
||||
single-dict format (``fallback_model``). The returned list is always a
|
||||
fresh copy — callers can mutate without touching the config dict.
|
||||
"""
|
||||
chain = config.get("fallback_providers") or []
|
||||
if isinstance(chain, list):
|
||||
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
if result:
|
||||
return result
|
||||
legacy = config.get("fallback_model")
|
||||
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
|
||||
return [dict(legacy)]
|
||||
if isinstance(legacy, list):
|
||||
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
return []
|
||||
|
||||
|
||||
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
|
||||
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
|
||||
config["fallback_providers"] = chain
|
||||
# Drop the legacy single-dict key on write so there's only one source of truth.
|
||||
if "fallback_model" in config:
|
||||
config.pop("fallback_model", None)
|
||||
|
||||
|
||||
def _format_entry(entry: Dict[str, Any]) -> str:
|
||||
"""One-line human-readable rendering of a fallback entry."""
|
||||
provider = entry.get("provider", "?")
|
||||
model = entry.get("model", "?")
|
||||
base = entry.get("base_url")
|
||||
suffix = f" [{base}]" if base else ""
|
||||
return f"{model} (via {provider}){suffix}"
|
||||
|
||||
|
||||
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
|
||||
if not isinstance(model_cfg, dict):
|
||||
return None
|
||||
provider = (model_cfg.get("provider") or "").strip()
|
||||
# The picker writes the selected model to ``model.default``.
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
return None
|
||||
entry: Dict[str, Any] = {"provider": provider, "model": model}
|
||||
base_url = (model_cfg.get("base_url") or "").strip()
|
||||
if base_url:
|
||||
entry["base_url"] = base_url
|
||||
api_mode = (model_cfg.get("api_mode") or "").strip()
|
||||
if api_mode:
|
||||
entry["api_mode"] = api_mode
|
||||
return entry
|
||||
|
||||
|
||||
def _snapshot_auth_active_provider() -> Any:
|
||||
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
return store.get("active_provider")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _restore_auth_active_provider(value: Any) -> None:
|
||||
"""Write back a previously snapshotted ``active_provider`` value."""
|
||||
try:
|
||||
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
|
||||
with _auth_store_lock():
|
||||
store = _load_auth_store()
|
||||
store["active_provider"] = value
|
||||
_save_auth_store(store)
|
||||
except Exception:
|
||||
# Best-effort — if auth.json can't be restored, the user's primary
|
||||
# provider may have been deactivated by the picker. They can re-run
|
||||
# `hermes model` to fix it. Don't fail the fallback add.
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subcommand handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback_list(args) -> None: # noqa: ARG001
|
||||
"""Print the current fallback chain."""
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
print()
|
||||
if not chain:
|
||||
print(" No fallback providers configured.")
|
||||
print()
|
||||
print(" Add one with: hermes fallback add")
|
||||
print()
|
||||
return
|
||||
|
||||
primary = _describe_primary(config)
|
||||
if primary:
|
||||
print(f" Primary: {primary}")
|
||||
print()
|
||||
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
|
||||
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
|
||||
print()
|
||||
|
||||
|
||||
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
|
||||
"""One-line description of the primary model for display purposes."""
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = (model_cfg.get("provider") or "?").strip() or "?"
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
|
||||
return f"{model} (via {provider})"
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
return None
|
||||
|
||||
|
||||
def cmd_fallback_add(args) -> None:
|
||||
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
|
||||
from hermes_cli.main import _require_tty, select_provider_and_model
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
_require_tty("fallback add")
|
||||
|
||||
# Snapshot BEFORE the picker runs so we can distinguish "user actually
|
||||
# picked something" from "user cancelled" by comparing before/after.
|
||||
before_cfg = load_config()
|
||||
model_before = copy.deepcopy(before_cfg.get("model"))
|
||||
active_provider_before = _snapshot_auth_active_provider()
|
||||
|
||||
print()
|
||||
print(" Adding a fallback provider. The picker below is the same one used by")
|
||||
print(" `hermes model` — select the provider + model you want as a fallback.")
|
||||
print()
|
||||
|
||||
try:
|
||||
select_provider_and_model(args=args)
|
||||
except SystemExit:
|
||||
# Some provider flows exit on auth failure — restore state and re-raise.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
raise
|
||||
|
||||
# Read the post-picker state to see what the user selected.
|
||||
after_cfg = load_config()
|
||||
model_after = after_cfg.get("model")
|
||||
|
||||
new_entry = _extract_fallback_from_model_cfg(model_after)
|
||||
if not new_entry:
|
||||
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(" No fallback added.")
|
||||
return
|
||||
|
||||
# Picker picked the same thing that's already the primary → nothing changed,
|
||||
# and there's nothing useful to add as a fallback to itself.
|
||||
primary_entry = _extract_fallback_from_model_cfg(model_before)
|
||||
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
|
||||
and primary_entry["model"] == new_entry["model"]:
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
|
||||
print(" A provider cannot be a fallback for itself — no change.")
|
||||
return
|
||||
|
||||
# Reload the config with the primary restored, then append the new entry
|
||||
# to ``fallback_providers``. We deliberately re-load (rather than mutating
|
||||
# ``after_cfg``) because the picker may have touched other top-level keys
|
||||
# (custom_providers, providers credentials) that we want to keep.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
|
||||
final_cfg = load_config()
|
||||
chain = _read_chain(final_cfg)
|
||||
|
||||
# Reject exact-duplicate fallback entries.
|
||||
for existing in chain:
|
||||
if existing.get("provider") == new_entry["provider"] \
|
||||
and existing.get("model") == new_entry["model"]:
|
||||
print()
|
||||
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
|
||||
return
|
||||
|
||||
chain.append(new_entry)
|
||||
_write_chain(final_cfg, chain)
|
||||
save_config(final_cfg)
|
||||
|
||||
print()
|
||||
print(f" Added fallback: {_format_entry(new_entry)}")
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
print()
|
||||
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
|
||||
|
||||
|
||||
def _restore_model_cfg(model_before: Any) -> None:
|
||||
"""Restore ``config["model"]`` to a previously-captured snapshot."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
cfg = load_config()
|
||||
if model_before is None:
|
||||
cfg.pop("model", None)
|
||||
else:
|
||||
cfg["model"] = copy.deepcopy(model_before)
|
||||
save_config(cfg)
|
||||
|
||||
|
||||
def cmd_fallback_remove(args) -> None: # noqa: ARG001
|
||||
"""Pick an entry from the chain and remove it."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to remove.")
|
||||
print()
|
||||
return
|
||||
|
||||
choices = [_format_entry(e) for e in chain]
|
||||
choices.append("Cancel")
|
||||
|
||||
try:
|
||||
from hermes_cli.setup import _curses_prompt_choice
|
||||
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
|
||||
except Exception:
|
||||
idx = _numbered_pick("Select a fallback to remove:", choices)
|
||||
|
||||
if idx is None or idx < 0 or idx >= len(chain):
|
||||
print()
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
removed = chain.pop(idx)
|
||||
_write_chain(config, chain)
|
||||
save_config(config)
|
||||
|
||||
print()
|
||||
print(f" Removed fallback: {_format_entry(removed)}")
|
||||
if chain:
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
else:
|
||||
print(" Fallback chain is now empty.")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_fallback_clear(args) -> None: # noqa: ARG001
|
||||
"""Remove all fallback entries (with confirmation)."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to clear.")
|
||||
print()
|
||||
return
|
||||
|
||||
print()
|
||||
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
try:
|
||||
resp = input(" Clear all entries? [y/N]: ").strip().lower()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
print(" Cancelled.")
|
||||
return
|
||||
if resp not in ("y", "yes"):
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
_write_chain(config, [])
|
||||
save_config(config)
|
||||
print()
|
||||
print(" Fallback chain cleared.")
|
||||
print()
|
||||
|
||||
|
||||
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
|
||||
"""Fallback numbered-list picker when curses is unavailable."""
|
||||
print(question)
|
||||
for i, c in enumerate(choices, 1):
|
||||
print(f" {i}. {c}")
|
||||
print()
|
||||
while True:
|
||||
try:
|
||||
val = input(f"Choice [1-{len(choices)}]: ").strip()
|
||||
if not val:
|
||||
return None
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(choices):
|
||||
return idx
|
||||
print(f"Please enter 1-{len(choices)}")
|
||||
except ValueError:
|
||||
print("Please enter a number")
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback(args) -> None:
|
||||
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
|
||||
sub = getattr(args, "fallback_command", None)
|
||||
if sub in (None, "", "list", "ls"):
|
||||
cmd_fallback_list(args)
|
||||
elif sub == "add":
|
||||
cmd_fallback_add(args)
|
||||
elif sub in ("remove", "rm"):
|
||||
cmd_fallback_remove(args)
|
||||
elif sub == "clear":
|
||||
cmd_fallback_clear(args)
|
||||
else:
|
||||
print(f"Unknown fallback subcommand: {sub}")
|
||||
print("Use one of: list, add, remove, clear")
|
||||
raise SystemExit(2)
|
||||
+149
-545
@@ -175,60 +175,6 @@ def _request_gateway_self_restart(pid: int) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
|
||||
"""Send SIGUSR1 to a gateway PID and wait for it to exit gracefully.
|
||||
|
||||
SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
|
||||
which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
|
||||
seconds), then exits with code 75. Both systemd (``Restart=on-failure``
|
||||
+ ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
|
||||
= false``) relaunch the process after the graceful exit.
|
||||
|
||||
This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
|
||||
which SIGKILL in-flight agents after a short timeout.
|
||||
|
||||
Args:
|
||||
pid: Gateway process PID (systemd MainPID, launchd PID, or bare
|
||||
process PID).
|
||||
drain_timeout: Seconds to wait for the process to exit after sending
|
||||
SIGUSR1. Should be slightly larger than the gateway's
|
||||
``agent.restart_drain_timeout`` to allow the drain loop to
|
||||
finish cleanly.
|
||||
|
||||
Returns:
|
||||
True if the PID was signalled and exited within the timeout.
|
||||
False if SIGUSR1 couldn't be sent or the process didn't exit in
|
||||
time (caller should fall back to a harder restart path).
|
||||
"""
|
||||
if not hasattr(signal, "SIGUSR1"):
|
||||
return False
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, signal.SIGUSR1)
|
||||
except ProcessLookupError:
|
||||
# Already gone — nothing to drain.
|
||||
return True
|
||||
except (PermissionError, OSError):
|
||||
return False
|
||||
|
||||
import time as _time
|
||||
|
||||
deadline = _time.monotonic() + max(drain_timeout, 1.0)
|
||||
while _time.monotonic() < deadline:
|
||||
try:
|
||||
os.kill(pid, 0) # signal 0 — probe liveness
|
||||
except ProcessLookupError:
|
||||
return True
|
||||
except PermissionError:
|
||||
# Process still exists but we can't signal it. Treat as alive
|
||||
# so the caller falls back.
|
||||
pass
|
||||
_time.sleep(0.5)
|
||||
# Drain didn't finish in time.
|
||||
return False
|
||||
|
||||
|
||||
def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
|
||||
if pid is None or pid <= 0:
|
||||
return
|
||||
@@ -387,147 +333,6 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
|
||||
return selected_system, result.stdout.strip() == "active"
|
||||
|
||||
|
||||
def _read_systemd_unit_properties(
|
||||
system: bool = False,
|
||||
properties: tuple[str, ...] = (
|
||||
"ActiveState",
|
||||
"SubState",
|
||||
"Result",
|
||||
"ExecMainStatus",
|
||||
),
|
||||
) -> dict[str, str]:
|
||||
"""Return selected ``systemctl show`` properties for the gateway unit."""
|
||||
selected_system = _select_systemd_scope(system)
|
||||
try:
|
||||
result = _run_systemctl(
|
||||
[
|
||||
"show",
|
||||
get_service_name(),
|
||||
"--no-pager",
|
||||
"--property",
|
||||
",".join(properties),
|
||||
],
|
||||
system=selected_system,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except (RuntimeError, subprocess.TimeoutExpired, OSError):
|
||||
return {}
|
||||
|
||||
if result.returncode != 0:
|
||||
return {}
|
||||
|
||||
parsed: dict[str, str] = {}
|
||||
for line in result.stdout.splitlines():
|
||||
if "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
parsed[key] = value.strip()
|
||||
return parsed
|
||||
|
||||
|
||||
def _wait_for_systemd_service_restart(
|
||||
*,
|
||||
system: bool = False,
|
||||
previous_pid: int | None = None,
|
||||
timeout: float = 60.0,
|
||||
) -> bool:
|
||||
"""Wait for the gateway service to become active after a restart handoff."""
|
||||
import time
|
||||
|
||||
svc = get_service_name()
|
||||
scope_label = _service_scope_label(system).capitalize()
|
||||
deadline = time.time() + timeout
|
||||
|
||||
while time.time() < deadline:
|
||||
props = _read_systemd_unit_properties(system=system)
|
||||
active_state = props.get("ActiveState", "")
|
||||
sub_state = props.get("SubState", "")
|
||||
new_pid = None
|
||||
try:
|
||||
from gateway.status import get_running_pid
|
||||
|
||||
new_pid = get_running_pid()
|
||||
except Exception:
|
||||
new_pid = None
|
||||
|
||||
if active_state == "active":
|
||||
if new_pid and (previous_pid is None or new_pid != previous_pid):
|
||||
print(f"✓ {scope_label} service restarted (PID {new_pid})")
|
||||
return True
|
||||
if previous_pid is None:
|
||||
print(f"✓ {scope_label} service restarted")
|
||||
return True
|
||||
|
||||
if active_state == "activating" and sub_state == "auto-restart":
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
print(
|
||||
f"⚠ {scope_label} service did not become active within {int(timeout)}s.\n"
|
||||
f" Check status: {'sudo ' if system else ''}hermes gateway status\n"
|
||||
f" Check logs: journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
|
||||
"""Recover a planned service restart that is stuck in systemd state."""
|
||||
props = _read_systemd_unit_properties(system=system)
|
||||
if not props:
|
||||
return False
|
||||
|
||||
try:
|
||||
from gateway.status import read_runtime_status
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
runtime_state = read_runtime_status() or {}
|
||||
if not runtime_state.get("restart_requested"):
|
||||
return False
|
||||
|
||||
active_state = props.get("ActiveState", "")
|
||||
sub_state = props.get("SubState", "")
|
||||
exec_main_status = props.get("ExecMainStatus", "")
|
||||
result = props.get("Result", "")
|
||||
|
||||
if active_state == "activating" and sub_state == "auto-restart":
|
||||
print("⏳ Service restart already pending — waiting for systemd relaunch...")
|
||||
return _wait_for_systemd_service_restart(
|
||||
system=system,
|
||||
previous_pid=previous_pid,
|
||||
)
|
||||
|
||||
if active_state == "failed" and (
|
||||
exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
|
||||
or result == "exit-code"
|
||||
):
|
||||
svc = get_service_name()
|
||||
scope_label = _service_scope_label(system).capitalize()
|
||||
print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
|
||||
_run_systemctl(
|
||||
["reset-failed", svc],
|
||||
system=system,
|
||||
check=False,
|
||||
timeout=30,
|
||||
)
|
||||
_run_systemctl(
|
||||
["start", svc],
|
||||
system=system,
|
||||
check=False,
|
||||
timeout=90,
|
||||
)
|
||||
return _wait_for_systemd_service_restart(
|
||||
system=system,
|
||||
previous_pid=previous_pid,
|
||||
)
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _probe_launchd_service_running() -> bool:
|
||||
if not get_launchd_plist_path().exists():
|
||||
return False
|
||||
@@ -665,8 +470,7 @@ def stop_profile_gateway() -> bool:
|
||||
except (ProcessLookupError, PermissionError):
|
||||
break
|
||||
|
||||
if get_running_pid() is None:
|
||||
remove_pid_file()
|
||||
remove_pid_file()
|
||||
return True
|
||||
|
||||
|
||||
@@ -815,21 +619,6 @@ def get_systemd_unit_path(system: bool = False) -> Path:
|
||||
return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"
|
||||
|
||||
|
||||
class UserSystemdUnavailableError(RuntimeError):
|
||||
"""Raised when ``systemctl --user`` cannot reach the user D-Bus session.
|
||||
|
||||
Typically hit on fresh RHEL/Debian SSH sessions where linger is disabled
|
||||
and no user@.service is running, so ``/run/user/$UID/bus`` never exists.
|
||||
Carries a user-facing remediation message in ``args[0]``.
|
||||
"""
|
||||
|
||||
|
||||
def _user_dbus_socket_path() -> Path:
|
||||
"""Return the expected per-user D-Bus socket path (regardless of existence)."""
|
||||
xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
|
||||
return Path(xdg) / "bus"
|
||||
|
||||
|
||||
def _ensure_user_systemd_env() -> None:
|
||||
"""Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.
|
||||
|
||||
@@ -852,126 +641,6 @@ def _ensure_user_systemd_env() -> None:
|
||||
os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}"
|
||||
|
||||
|
||||
def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
|
||||
"""Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
|
||||
|
||||
Linger-enabled user@.service can take a second or two to spawn the socket
|
||||
after ``loginctl enable-linger`` runs. Returns True once the socket exists.
|
||||
"""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
if _user_dbus_socket_path().exists():
|
||||
_ensure_user_systemd_env()
|
||||
return True
|
||||
time.sleep(0.2)
|
||||
return _user_dbus_socket_path().exists()
|
||||
|
||||
|
||||
def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
|
||||
"""Ensure ``systemctl --user`` will reach the user D-Bus session bus.
|
||||
|
||||
No-op when the bus socket is already there (the common case on desktops
|
||||
and linger-enabled servers). On fresh SSH sessions where the socket is
|
||||
missing:
|
||||
|
||||
* If linger is already enabled, wait briefly for user@.service to spawn
|
||||
the socket.
|
||||
* If linger is disabled and ``auto_enable_linger`` is True, try
|
||||
``loginctl enable-linger $USER`` (works as non-root when polkit permits
|
||||
it, otherwise needs sudo).
|
||||
* If the socket is still missing afterwards, raise
|
||||
:class:`UserSystemdUnavailableError` with a precise remediation message.
|
||||
|
||||
Callers should treat the exception as a terminal condition for user-scope
|
||||
systemd operations and surface the message to the user.
|
||||
"""
|
||||
_ensure_user_systemd_env()
|
||||
bus_path = _user_dbus_socket_path()
|
||||
if bus_path.exists():
|
||||
return
|
||||
|
||||
import getpass
|
||||
|
||||
username = getpass.getuser()
|
||||
linger_enabled, linger_detail = get_systemd_linger_status()
|
||||
|
||||
if linger_enabled is True:
|
||||
if _wait_for_user_dbus_socket(timeout=3.0):
|
||||
return
|
||||
# Linger is on but socket still missing — unusual; fall through to error.
|
||||
_raise_user_systemd_unavailable(
|
||||
username,
|
||||
reason="User D-Bus socket is missing even though linger is enabled.",
|
||||
fix_hint=(
|
||||
f" systemctl start user@{os.getuid()}.service\n"
|
||||
" (may require sudo; try again after the command succeeds)"
|
||||
),
|
||||
)
|
||||
|
||||
if auto_enable_linger and shutil.which("loginctl"):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["loginctl", "enable-linger", username],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
timeout=30,
|
||||
)
|
||||
except Exception as exc:
|
||||
_raise_user_systemd_unavailable(
|
||||
username,
|
||||
reason=f"loginctl enable-linger failed ({exc}).",
|
||||
fix_hint=f" sudo loginctl enable-linger {username}",
|
||||
)
|
||||
else:
|
||||
if result.returncode == 0:
|
||||
if _wait_for_user_dbus_socket(timeout=5.0):
|
||||
print(f"✓ Enabled linger for {username} — user D-Bus now available")
|
||||
return
|
||||
# enable-linger succeeded but the socket never appeared.
|
||||
_raise_user_systemd_unavailable(
|
||||
username,
|
||||
reason="Linger was enabled, but the user D-Bus socket did not appear.",
|
||||
fix_hint=(
|
||||
" Log out and log back in, then re-run the command.\n"
|
||||
f" Or reboot and run: systemctl --user start {get_service_name()}"
|
||||
),
|
||||
)
|
||||
detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
|
||||
_raise_user_systemd_unavailable(
|
||||
username,
|
||||
reason=f"loginctl enable-linger was denied: {detail}",
|
||||
fix_hint=f" sudo loginctl enable-linger {username}",
|
||||
)
|
||||
|
||||
_raise_user_systemd_unavailable(
|
||||
username,
|
||||
reason=(
|
||||
"User D-Bus session is not available "
|
||||
f"({linger_detail or 'linger disabled'})."
|
||||
),
|
||||
fix_hint=f" sudo loginctl enable-linger {username}",
|
||||
)
|
||||
|
||||
|
||||
def _raise_user_systemd_unavailable(username: str, *, reason: str, fix_hint: str) -> None:
|
||||
"""Build a user-facing error message and raise UserSystemdUnavailableError."""
|
||||
msg = (
|
||||
f"{reason}\n"
|
||||
" systemctl --user cannot reach the user D-Bus session in this shell.\n"
|
||||
"\n"
|
||||
" To fix:\n"
|
||||
f"{fix_hint}\n"
|
||||
"\n"
|
||||
" Alternative: run the gateway in the foreground (stays up until\n"
|
||||
" you exit / close the terminal):\n"
|
||||
" hermes gateway run"
|
||||
)
|
||||
raise UserSystemdUnavailableError(msg)
|
||||
|
||||
|
||||
def _systemctl_cmd(system: bool = False) -> list[str]:
|
||||
if not system:
|
||||
_ensure_user_systemd_env()
|
||||
@@ -1523,14 +1192,7 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
|
||||
path_entries.append(resolved_node_dir)
|
||||
|
||||
common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
|
||||
# systemd's TimeoutStopSec must exceed the gateway's drain_timeout so
|
||||
# there's budget left for post-interrupt cleanup (tool subprocess kill,
|
||||
# adapter disconnect, session DB close) before systemd escalates to
|
||||
# SIGKILL on the cgroup — otherwise bash/sleep tool-call children left
|
||||
# by a force-interrupted agent get reaped by systemd instead of us
|
||||
# (#8202). 30s of headroom covers the worst case we've observed.
|
||||
_drain_timeout = int(_get_restart_drain_timeout() or 0)
|
||||
restart_timeout = max(60, _drain_timeout) + 30
|
||||
restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))
|
||||
|
||||
if system:
|
||||
username, group_name, home_dir = _system_service_identity(run_as_user)
|
||||
@@ -1819,11 +1481,6 @@ def systemd_start(system: bool = False):
|
||||
system = _select_systemd_scope(system)
|
||||
if system:
|
||||
_require_root_for_system_service("start")
|
||||
else:
|
||||
# Fail fast with actionable guidance if the user D-Bus session is not
|
||||
# reachable (common on fresh RHEL/Debian SSH sessions without linger).
|
||||
# Raises UserSystemdUnavailableError with a remediation message.
|
||||
_preflight_user_systemd()
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
_run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service started")
|
||||
@@ -1843,16 +1500,19 @@ def systemd_restart(system: bool = False):
|
||||
system = _select_systemd_scope(system)
|
||||
if system:
|
||||
_require_root_for_system_service("restart")
|
||||
else:
|
||||
_preflight_user_systemd()
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
from gateway.status import get_running_pid
|
||||
|
||||
pid = get_running_pid()
|
||||
if pid is not None and _request_gateway_self_restart(pid):
|
||||
# SIGUSR1 sent — the gateway will drain active agents, exit with
|
||||
# code 75, and systemd will restart it after RestartSec (30s).
|
||||
# Wait for the old process to die and the new one to become active
|
||||
# so the CLI doesn't return while the service is still restarting.
|
||||
import time
|
||||
scope_label = _service_scope_label(system).capitalize()
|
||||
svc = get_service_name()
|
||||
scope_cmd = _systemctl_cmd(system)
|
||||
|
||||
# Phase 1: wait for old process to exit (drain + shutdown)
|
||||
print(f"⏳ {scope_label} service draining active work...")
|
||||
@@ -1866,41 +1526,48 @@ def systemd_restart(system: bool = False):
|
||||
else:
|
||||
print(f"⚠ Old process (PID {pid}) still alive after 90s")
|
||||
|
||||
# The gateway exits with code 75 for a planned service restart.
|
||||
# systemd can sit in the RestartSec window or even wedge itself into a
|
||||
# failed/rate-limited state if the operator asks for another restart in
|
||||
# the middle of that handoff. Clear any stale failed state and kick the
|
||||
# unit immediately so `hermes gateway restart` behaves idempotently.
|
||||
_run_systemctl(
|
||||
["reset-failed", svc],
|
||||
system=system,
|
||||
check=False,
|
||||
timeout=30,
|
||||
)
|
||||
_run_systemctl(
|
||||
["start", svc],
|
||||
system=system,
|
||||
check=False,
|
||||
timeout=90,
|
||||
)
|
||||
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
|
||||
return
|
||||
# Phase 2: wait for systemd to start the new process
|
||||
print(f"⏳ Waiting for {svc} to restart...")
|
||||
deadline = time.time() + 60
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
scope_cmd + ["is-active", svc],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if result.stdout.strip() == "active":
|
||||
# Verify it's a NEW process, not the old one somehow
|
||||
new_pid = get_running_pid()
|
||||
if new_pid and new_pid != pid:
|
||||
print(f"✓ {scope_label} service restarted (PID {new_pid})")
|
||||
return
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError):
|
||||
pass
|
||||
time.sleep(2)
|
||||
|
||||
if _recover_pending_systemd_restart(system=system, previous_pid=pid):
|
||||
# Timed out — check final state
|
||||
try:
|
||||
result = subprocess.run(
|
||||
scope_cmd + ["is-active", svc],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if result.stdout.strip() == "active":
|
||||
print(f"✓ {scope_label} service restarted")
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
print(
|
||||
f"⚠ {scope_label} service did not become active within 60s.\n"
|
||||
f" Check status: {'sudo ' if system else ''}hermes gateway status\n"
|
||||
f" Check logs: journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
|
||||
)
|
||||
return
|
||||
|
||||
_run_systemctl(
|
||||
["reset-failed", get_service_name()],
|
||||
system=system,
|
||||
check=False,
|
||||
timeout=30,
|
||||
)
|
||||
_run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service restarted")
|
||||
|
||||
|
||||
|
||||
def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
|
||||
def systemd_status(deep: bool = False, system: bool = False):
|
||||
system = _select_systemd_scope(system)
|
||||
unit_path = get_systemd_unit_path(system=system)
|
||||
scope_flag = " --system" if system else ""
|
||||
@@ -1923,12 +1590,8 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
|
||||
print(f" Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag} # auto-refreshes the unit")
|
||||
print()
|
||||
|
||||
status_cmd = ["status", get_service_name(), "--no-pager"]
|
||||
if full:
|
||||
status_cmd.append("-l")
|
||||
|
||||
_run_systemctl(
|
||||
status_cmd,
|
||||
["status", get_service_name(), "--no-pager"],
|
||||
system=system,
|
||||
capture_output=False,
|
||||
timeout=10,
|
||||
@@ -1961,19 +1624,6 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
|
||||
for line in runtime_lines:
|
||||
print(f" {line}")
|
||||
|
||||
unit_props = _read_systemd_unit_properties(system=system)
|
||||
active_state = unit_props.get("ActiveState", "")
|
||||
sub_state = unit_props.get("SubState", "")
|
||||
exec_main_status = unit_props.get("ExecMainStatus", "")
|
||||
result_code = unit_props.get("Result", "")
|
||||
if active_state == "activating" and sub_state == "auto-restart":
|
||||
print(" ⏳ Restart pending: systemd is waiting to relaunch the gateway")
|
||||
elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
|
||||
print(" ⚠ Planned restart is stuck in systemd failed state (exit 75)")
|
||||
print(f" Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
|
||||
elif active_state == "failed" and result_code:
|
||||
print(f" ⚠ Systemd unit result: {result_code}")
|
||||
|
||||
if system:
|
||||
print("✓ System service starts at boot without requiring systemd linger")
|
||||
elif deep:
|
||||
@@ -1989,10 +1639,7 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
|
||||
if deep:
|
||||
print()
|
||||
print("Recent logs:")
|
||||
log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
|
||||
if full:
|
||||
log_cmd.append("-l")
|
||||
subprocess.run(log_cmd, timeout=10)
|
||||
subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
@@ -2992,120 +2639,9 @@ def _setup_dingtalk():
|
||||
|
||||
|
||||
def _setup_wecom():
|
||||
"""Interactive setup for WeCom — scan QR code or manual credential input."""
|
||||
print()
|
||||
print(color(" ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
|
||||
|
||||
existing_bot_id = get_env_value("WECOM_BOT_ID")
|
||||
existing_secret = get_env_value("WECOM_SECRET")
|
||||
if existing_bot_id and existing_secret:
|
||||
print()
|
||||
print_success("WeCom is already configured.")
|
||||
if not prompt_yes_no(" Reconfigure WeCom?", False):
|
||||
return
|
||||
|
||||
# ── Choose setup method ──
|
||||
print()
|
||||
method_choices = [
|
||||
"Scan QR code to obtain Bot ID and Secret automatically (recommended)",
|
||||
"Enter existing Bot ID and Secret manually",
|
||||
]
|
||||
method_idx = prompt_choice(" How would you like to set up WeCom?", method_choices, 0)
|
||||
|
||||
bot_id = None
|
||||
secret = None
|
||||
|
||||
if method_idx == 0:
|
||||
# ── QR scan flow ──
|
||||
try:
|
||||
from gateway.platforms.wecom import qr_scan_for_bot_info
|
||||
except Exception as exc:
|
||||
print_error(f" WeCom QR scan import failed: {exc}")
|
||||
qr_scan_for_bot_info = None
|
||||
|
||||
if qr_scan_for_bot_info is not None:
|
||||
try:
|
||||
credentials = qr_scan_for_bot_info()
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
print_warning(" WeCom setup cancelled.")
|
||||
return
|
||||
except Exception as exc:
|
||||
print_warning(f" QR scan failed: {exc}")
|
||||
credentials = None
|
||||
if credentials:
|
||||
bot_id = credentials.get("bot_id", "")
|
||||
secret = credentials.get("secret", "")
|
||||
print_success(" ✔ QR scan successful! Bot ID and Secret obtained.")
|
||||
|
||||
if not bot_id or not secret:
|
||||
print_info(" QR scan did not complete. Continuing with manual input.")
|
||||
bot_id = None
|
||||
secret = None
|
||||
|
||||
# ── Manual credential input ──
|
||||
if not bot_id or not secret:
|
||||
print()
|
||||
print_info(" 1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
|
||||
print_info(" 2. Select API Mode")
|
||||
print_info(" 3. Copy the Bot ID and Secret from the bot's credentials info")
|
||||
print_info(" 4. The bot connects via WebSocket — no public endpoint needed")
|
||||
print()
|
||||
bot_id = prompt(" Bot ID", password=False)
|
||||
if not bot_id:
|
||||
print_warning(" Skipped — WeCom won't work without a Bot ID.")
|
||||
return
|
||||
secret = prompt(" Secret", password=True)
|
||||
if not secret:
|
||||
print_warning(" Skipped — WeCom won't work without a Secret.")
|
||||
return
|
||||
|
||||
# ── Save core credentials ──
|
||||
save_env_value("WECOM_BOT_ID", bot_id)
|
||||
save_env_value("WECOM_SECRET", secret)
|
||||
|
||||
# ── Allowed users (deny-by-default security) ──
|
||||
print()
|
||||
print_info(" The gateway DENIES all users by default for security.")
|
||||
print_info(" Enter user IDs to create an allowlist, or leave empty.")
|
||||
allowed = prompt(" Allowed user IDs (comma-separated, or empty)", password=False)
|
||||
if allowed:
|
||||
cleaned = allowed.replace(" ", "")
|
||||
save_env_value("WECOM_ALLOWED_USERS", cleaned)
|
||||
print_success(" Saved — only these users can interact with the bot.")
|
||||
else:
|
||||
print()
|
||||
access_choices = [
|
||||
"Enable open access (anyone can message the bot)",
|
||||
"Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
|
||||
"Disable direct messages",
|
||||
"Skip for now (bot will deny all users until configured)",
|
||||
]
|
||||
access_idx = prompt_choice(" How should unauthorized users be handled?", access_choices, 1)
|
||||
if access_idx == 0:
|
||||
save_env_value("WECOM_DM_POLICY", "open")
|
||||
save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
|
||||
print_warning(" Open access enabled — anyone can use your bot!")
|
||||
elif access_idx == 1:
|
||||
save_env_value("WECOM_DM_POLICY", "pairing")
|
||||
print_success(" DM pairing mode — users will receive a code to request access.")
|
||||
print_info(" Approve with: hermes pairing approve <platform> <code>")
|
||||
elif access_idx == 2:
|
||||
save_env_value("WECOM_DM_POLICY", "disabled")
|
||||
print_warning(" Direct messages disabled.")
|
||||
else:
|
||||
print_info(" Skipped — configure later with 'hermes gateway setup'")
|
||||
|
||||
# ── Home channel (optional) ──
|
||||
print()
|
||||
print_info(" Chat ID for scheduled results and notifications.")
|
||||
home = prompt(" Home chat ID (optional, for cron/notifications)", password=False)
|
||||
if home:
|
||||
save_env_value("WECOM_HOME_CHANNEL", home)
|
||||
print_success(f" Home channel set to {home}")
|
||||
|
||||
print()
|
||||
print_success("💬 WeCom configured!")
|
||||
"""Configure WeCom (Enterprise WeChat) via the standard platform setup."""
|
||||
wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
|
||||
_setup_standard_platform(wecom_platform)
|
||||
|
||||
|
||||
def _is_service_installed() -> bool:
|
||||
@@ -3485,8 +3021,7 @@ def _setup_qqbot():
|
||||
if method_idx == 0:
|
||||
# ── QR scan-to-configure ──
|
||||
try:
|
||||
from gateway.platforms.qqbot import qr_register
|
||||
credentials = qr_register()
|
||||
credentials = _qqbot_qr_flow()
|
||||
except KeyboardInterrupt:
|
||||
print()
|
||||
print_warning(" QQ Bot setup cancelled.")
|
||||
@@ -3568,6 +3103,106 @@ def _setup_qqbot():
|
||||
print_info(f" App ID: {credentials['app_id']}")
|
||||
|
||||
|
||||
def _qqbot_render_qr(url: str) -> bool:
|
||||
"""Try to render a QR code in the terminal. Returns True if successful."""
|
||||
try:
|
||||
import qrcode as _qr
|
||||
qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
|
||||
qr.add_data(url)
|
||||
qr.make(fit=True)
|
||||
qr.print_ascii(invert=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _qqbot_qr_flow():
|
||||
"""Run the QR-code scan-to-configure flow.
|
||||
|
||||
Returns a dict with app_id, client_secret, user_openid on success,
|
||||
or None on failure/cancel.
|
||||
"""
|
||||
try:
|
||||
from gateway.platforms.qqbot import (
|
||||
create_bind_task, poll_bind_result, build_connect_url,
|
||||
decrypt_secret, BindStatus,
|
||||
)
|
||||
from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
|
||||
except Exception as exc:
|
||||
print_error(f" QQBot onboard import failed: {exc}")
|
||||
return None
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
MAX_REFRESHES = 3
|
||||
refresh_count = 0
|
||||
|
||||
while refresh_count <= MAX_REFRESHES:
|
||||
loop = asyncio.new_event_loop()
|
||||
|
||||
# ── Create bind task ──
|
||||
try:
|
||||
task_id, aes_key = loop.run_until_complete(create_bind_task())
|
||||
except Exception as e:
|
||||
print_warning(f" Failed to create bind task: {e}")
|
||||
loop.close()
|
||||
return None
|
||||
|
||||
url = build_connect_url(task_id)
|
||||
|
||||
# ── Display QR code + URL ──
|
||||
print()
|
||||
if _qqbot_render_qr(url):
|
||||
print(f" Scan the QR code above, or open this URL directly:\n {url}")
|
||||
else:
|
||||
print(f" Open this URL in QQ on your phone:\n {url}")
|
||||
print_info(" Tip: pip install qrcode to show a scannable QR code here")
|
||||
|
||||
# ── Poll loop (silent — keep QR visible at bottom) ──
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
|
||||
poll_bind_result(task_id)
|
||||
)
|
||||
except Exception:
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
if status == BindStatus.COMPLETED:
|
||||
client_secret = decrypt_secret(encrypted_secret, aes_key)
|
||||
print()
|
||||
print_success(f" QR scan complete! (App ID: {app_id})")
|
||||
if user_openid:
|
||||
print_info(f" Scanner's OpenID: {user_openid}")
|
||||
return {
|
||||
"app_id": app_id,
|
||||
"client_secret": client_secret,
|
||||
"user_openid": user_openid,
|
||||
}
|
||||
|
||||
if status == BindStatus.EXPIRED:
|
||||
refresh_count += 1
|
||||
if refresh_count > MAX_REFRESHES:
|
||||
print()
|
||||
print_warning(f" QR code expired {MAX_REFRESHES} times — giving up.")
|
||||
return None
|
||||
print()
|
||||
print_warning(f" QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
|
||||
loop.close()
|
||||
break # outer while creates a new task
|
||||
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
except KeyboardInterrupt:
|
||||
loop.close()
|
||||
raise
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _setup_signal():
|
||||
"""Interactive setup for Signal messenger."""
|
||||
import shutil
|
||||
@@ -3719,10 +3354,6 @@ def gateway_setup():
|
||||
systemd_start()
|
||||
elif is_macos():
|
||||
launchd_start()
|
||||
except UserSystemdUnavailableError as e:
|
||||
print_error(" Failed to start — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print_error(f" Failed to start: {e}")
|
||||
else:
|
||||
@@ -3759,8 +3390,6 @@ def gateway_setup():
|
||||
_setup_feishu()
|
||||
elif platform["key"] == "qqbot":
|
||||
_setup_qqbot()
|
||||
elif platform["key"] == "wecom":
|
||||
_setup_wecom()
|
||||
else:
|
||||
_setup_standard_platform(platform)
|
||||
|
||||
@@ -3787,10 +3416,6 @@ def gateway_setup():
|
||||
else:
|
||||
stop_profile_gateway()
|
||||
print_info("Start manually: hermes gateway")
|
||||
except UserSystemdUnavailableError as e:
|
||||
print_error(" Restart failed — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print_error(f" Restart failed: {e}")
|
||||
elif service_installed:
|
||||
@@ -3800,10 +3425,6 @@ def gateway_setup():
|
||||
systemd_start()
|
||||
elif is_macos():
|
||||
launchd_start()
|
||||
except UserSystemdUnavailableError as e:
|
||||
print_error(" Start failed — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print_error(f" Start failed: {e}")
|
||||
else:
|
||||
@@ -3827,10 +3448,6 @@ def gateway_setup():
|
||||
systemd_start(system=installed_scope == "system")
|
||||
else:
|
||||
launchd_start()
|
||||
except UserSystemdUnavailableError as e:
|
||||
print_error(" Start failed — user systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print_error(f" Start failed: {e}")
|
||||
except subprocess.CalledProcessError as e:
|
||||
@@ -3868,18 +3485,6 @@ def gateway_setup():
|
||||
|
||||
def gateway_command(args):
|
||||
"""Handle gateway subcommands."""
|
||||
try:
|
||||
return _gateway_command_inner(args)
|
||||
except UserSystemdUnavailableError as e:
|
||||
# Clean, actionable message instead of a traceback when the user D-Bus
|
||||
# session is unreachable (fresh SSH shell, no linger, container, etc.).
|
||||
print_error("User systemd not reachable:")
|
||||
for line in str(e).splitlines():
|
||||
print(f" {line}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _gateway_command_inner(args):
|
||||
subcmd = getattr(args, 'gateway_command', None)
|
||||
|
||||
# Default to run if no subcommand
|
||||
@@ -4143,13 +3748,12 @@ def _gateway_command_inner(args):
|
||||
|
||||
elif subcmd == "status":
|
||||
deep = getattr(args, 'deep', False)
|
||||
full = getattr(args, 'full', False)
|
||||
system = getattr(args, 'system', False)
|
||||
snapshot = get_gateway_runtime_snapshot(system=system)
|
||||
|
||||
# Check for service first
|
||||
if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
|
||||
systemd_status(deep, system=system, full=full)
|
||||
systemd_status(deep, system=system)
|
||||
_print_gateway_process_mismatch(snapshot)
|
||||
elif is_macos() and get_launchd_plist_path().exists():
|
||||
launchd_status(deep)
|
||||
|
||||
@@ -125,7 +125,6 @@ _DEFAULT_PAYLOADS = {
|
||||
"task_id": "test-task",
|
||||
"tool_call_id": "test-call",
|
||||
"result": '{"output": "hello"}',
|
||||
"duration_ms": 42,
|
||||
},
|
||||
"pre_llm_call": {
|
||||
"session_id": "test-session",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+79
-1083
File diff suppressed because it is too large
Load Diff
@@ -1,329 +0,0 @@
|
||||
"""Remote model catalog fetcher.
|
||||
|
||||
The Hermes docs site hosts a JSON manifest of curated models for providers
|
||||
we want to update without shipping a release (currently OpenRouter and
|
||||
Nous Portal). This module fetches, validates, and caches that manifest,
|
||||
falling back to the in-repo hardcoded lists when the network is unavailable.
|
||||
|
||||
Pipeline
|
||||
--------
|
||||
1. ``get_catalog()`` — returns a parsed manifest dict.
|
||||
- Checks in-process cache (invalidated by TTL).
|
||||
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
|
||||
- Fetches the master URL if disk cache is stale or missing.
|
||||
- On any fetch failure, keeps using the stale cache (or empty dict).
|
||||
|
||||
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
|
||||
thin accessors returning the shapes existing callers expect. Each
|
||||
falls back to the in-repo hardcoded list on any lookup failure.
|
||||
|
||||
Schema (version 1)
|
||||
------------------
|
||||
::
|
||||
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {...}, # free-form
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {...}, # free-form
|
||||
"models": [
|
||||
{"id": "vendor/model", "description": "recommended",
|
||||
"metadata": {...}} # free-form, model-level
|
||||
]
|
||||
},
|
||||
"nous": {...}
|
||||
}
|
||||
}
|
||||
|
||||
Unknown fields are ignored — extra metadata can be added at either level
|
||||
without bumping ``version``. ``version`` bumps are reserved for
|
||||
breaking changes (renaming ``providers``, changing ``models`` shape).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_CATALOG_URL = (
|
||||
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
|
||||
)
|
||||
DEFAULT_TTL_HOURS = 24
|
||||
DEFAULT_FETCH_TIMEOUT = 8.0
|
||||
SUPPORTED_SCHEMA_VERSION = 1
|
||||
|
||||
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
|
||||
|
||||
# In-process cache to avoid repeated disk + parse work across multiple
|
||||
# calls within the same session. Invalidated by TTL against the disk file's
|
||||
# mtime, so calling code never has to think about this.
|
||||
_catalog_cache: dict[str, Any] | None = None
|
||||
_catalog_cache_source_mtime: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_catalog_config() -> dict[str, Any]:
|
||||
"""Load the ``model_catalog`` config block with defaults filled in."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
|
||||
raw = cfg.get("model_catalog")
|
||||
if not isinstance(raw, dict):
|
||||
raw = {}
|
||||
|
||||
return {
|
||||
"enabled": bool(raw.get("enabled", True)),
|
||||
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
|
||||
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
|
||||
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
|
||||
}
|
||||
|
||||
|
||||
def _cache_path() -> Path:
|
||||
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "cache" / "model_catalog.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch + validate + cache
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
|
||||
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
|
||||
logger.info("model catalog fetch failed (%s): %s", url, exc)
|
||||
return None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.info("model catalog fetch errored (%s): %s", url, exc)
|
||||
return None
|
||||
|
||||
if not _validate_manifest(data):
|
||||
logger.info("model catalog at %s failed schema validation", url)
|
||||
return None
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _validate_manifest(data: Any) -> bool:
|
||||
"""Return True when ``data`` matches the minimum manifest shape."""
|
||||
if not isinstance(data, dict):
|
||||
return False
|
||||
version = data.get("version")
|
||||
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
|
||||
# Future schema version we don't understand — refuse rather than
|
||||
# guess. Older schemas (version < 1) aren't supported either.
|
||||
return False
|
||||
providers = data.get("providers")
|
||||
if not isinstance(providers, dict):
|
||||
return False
|
||||
for pname, pblock in providers.items():
|
||||
if not isinstance(pname, str) or not isinstance(pblock, dict):
|
||||
return False
|
||||
models = pblock.get("models")
|
||||
if not isinstance(models, list):
|
||||
return False
|
||||
for m in models:
|
||||
if not isinstance(m, dict):
|
||||
return False
|
||||
if not isinstance(m.get("id"), str) or not m["id"].strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
|
||||
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
|
||||
path = _cache_path()
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except (OSError, FileNotFoundError):
|
||||
return (None, 0.0)
|
||||
try:
|
||||
with open(path) as fh:
|
||||
data = json.load(fh)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return (None, 0.0)
|
||||
if not _validate_manifest(data):
|
||||
return (None, 0.0)
|
||||
return (data, mtime)
|
||||
|
||||
|
||||
def _write_disk_cache(data: dict[str, Any]) -> None:
|
||||
path = _cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
with open(tmp, "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
fh.write("\n")
|
||||
os.replace(tmp, path)
|
||||
except OSError as exc:
|
||||
logger.info("model catalog cache write failed: %s", exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
|
||||
"""Return the parsed model catalog manifest, or an empty dict on failure.
|
||||
|
||||
Callers should treat a missing provider/model as "use the in-repo fallback"
|
||||
— never raise from this function so the CLI keeps working offline.
|
||||
"""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return {}
|
||||
|
||||
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
|
||||
|
||||
disk_data, disk_mtime = _read_disk_cache()
|
||||
now = time.time()
|
||||
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
|
||||
|
||||
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
|
||||
if (
|
||||
not force_refresh
|
||||
and _catalog_cache is not None
|
||||
and disk_data is not None
|
||||
and disk_mtime == _catalog_cache_source_mtime
|
||||
and disk_fresh
|
||||
):
|
||||
return _catalog_cache
|
||||
|
||||
# Disk is fresh enough — use it without a network hit.
|
||||
if not force_refresh and disk_fresh and disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
|
||||
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
|
||||
if fetched is not None:
|
||||
_write_disk_cache(fetched)
|
||||
new_disk_data, new_mtime = _read_disk_cache()
|
||||
if new_disk_data is not None:
|
||||
_catalog_cache = new_disk_data
|
||||
_catalog_cache_source_mtime = new_mtime
|
||||
return new_disk_data
|
||||
_catalog_cache = fetched
|
||||
_catalog_cache_source_mtime = now
|
||||
return fetched
|
||||
|
||||
if disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
|
||||
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return None
|
||||
provider_cfg = cfg["providers"].get(provider)
|
||||
if not isinstance(provider_cfg, dict):
|
||||
return None
|
||||
override_url = provider_cfg.get("url")
|
||||
if not isinstance(override_url, str) or not override_url.strip():
|
||||
return None
|
||||
# Override fetches skip the disk cache because they're usually
|
||||
# third-party self-hosted. Re-request on every call but with a short
|
||||
# timeout so they don't block the picker.
|
||||
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
|
||||
|
||||
|
||||
def _get_provider_block(provider: str) -> dict[str, Any] | None:
|
||||
"""Return the provider's manifest block, respecting per-provider overrides."""
|
||||
override = _fetch_provider_override(provider)
|
||||
if override is not None:
|
||||
block = override.get("providers", {}).get(provider)
|
||||
if isinstance(block, dict):
|
||||
return block
|
||||
|
||||
catalog = get_catalog()
|
||||
if not catalog:
|
||||
return None
|
||||
block = catalog.get("providers", {}).get(provider)
|
||||
return block if isinstance(block, dict) else None
|
||||
|
||||
|
||||
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
|
||||
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable, so callers can fall
|
||||
back to their hardcoded list.
|
||||
"""
|
||||
block = _get_provider_block("openrouter")
|
||||
if not block:
|
||||
return None
|
||||
out: list[tuple[str, str]] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
desc = str(m.get("description") or "")
|
||||
out.append((mid, desc))
|
||||
return out or None
|
||||
|
||||
|
||||
def get_curated_nous_models() -> list[str] | None:
|
||||
"""Return Nous Portal's curated list of model ids from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable.
|
||||
"""
|
||||
block = _get_provider_block("nous")
|
||||
if not block:
|
||||
return None
|
||||
out: list[str] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if mid:
|
||||
out.append(mid)
|
||||
return out or None
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
_catalog_cache = None
|
||||
_catalog_cache_source_mtime = 0.0
|
||||
@@ -12,12 +12,8 @@ Different LLM providers expect model identifiers in different formats:
|
||||
model IDs, but Claude still uses hyphenated native names like
|
||||
``claude-sonnet-4-6``.
|
||||
- **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
|
||||
- **DeepSeek** accepts ``deepseek-chat`` (V3), ``deepseek-reasoner``
|
||||
(R1-family), and the first-class V-series IDs (``deepseek-v4-pro``,
|
||||
``deepseek-v4-flash``, and any future ``deepseek-v<N>-*``). Older
|
||||
Hermes revisions folded every non-reasoner input into
|
||||
``deepseek-chat``, which on aggregators routes to V3 — so a user
|
||||
picking V4 Pro was silently downgraded.
|
||||
- **DeepSeek** only accepts two model identifiers:
|
||||
``deepseek-chat`` and ``deepseek-reasoner``.
|
||||
- **Custom** and remaining providers pass the name through as-is.
|
||||
|
||||
This module centralises that translation so callers can simply write::
|
||||
@@ -29,7 +25,6 @@ Inspired by Clawdbot's ``normalizeAnthropicModelId`` pattern.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -105,15 +100,6 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
|
||||
"custom",
|
||||
})
|
||||
|
||||
# Providers whose APIs require lowercase model IDs. Xiaomi's
|
||||
# ``api.xiaomimimo.com`` rejects mixed-case names like ``MiMo-V2.5-Pro``
|
||||
# that users might copy from marketing docs — it only accepts
|
||||
# ``mimo-v2.5-pro``. After stripping a matching provider prefix, these
|
||||
# providers also get ``.lower()`` applied.
|
||||
_LOWERCASE_MODEL_PROVIDERS: frozenset[str] = frozenset({
|
||||
"xiaomi",
|
||||
})
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DeepSeek special handling
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -129,30 +115,17 @@ _DEEPSEEK_REASONER_KEYWORDS: frozenset[str] = frozenset({
|
||||
})
|
||||
|
||||
_DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({
|
||||
"deepseek-chat", # V3 on DeepSeek direct and most aggregators
|
||||
"deepseek-reasoner", # R1-family reasoning model
|
||||
"deepseek-v4-pro", # V4 Pro — first-class model ID
|
||||
"deepseek-v4-flash", # V4 Flash — first-class model ID
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
})
|
||||
|
||||
# First-class V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``,
|
||||
# future ``deepseek-v5-*``, dated variants like ``deepseek-v4-flash-20260423``).
|
||||
# Verified empirically 2026-04-24: DeepSeek's Chat Completions API returns
|
||||
# ``provider: DeepSeek`` / ``model: deepseek-v4-flash-20260423`` when called
|
||||
# with ``model=deepseek/deepseek-v4-flash``, so these names are not aliases
|
||||
# of ``deepseek-chat`` and must not be folded into it.
|
||||
_DEEPSEEK_V_SERIES_RE = re.compile(r"^deepseek-v\d+([-.].+)?$")
|
||||
|
||||
|
||||
def _normalize_for_deepseek(model_name: str) -> str:
|
||||
"""Map a model input to a DeepSeek-accepted identifier.
|
||||
"""Map any model input to one of DeepSeek's two accepted identifiers.
|
||||
|
||||
Rules:
|
||||
- Already a known canonical (``deepseek-chat``/``deepseek-reasoner``/
|
||||
``deepseek-v4-pro``/``deepseek-v4-flash``) -> pass through.
|
||||
- Matches the V-series pattern ``deepseek-v<digit>...`` -> pass through
|
||||
(covers future ``deepseek-v5-*`` and dated variants without a release).
|
||||
- Contains a reasoner keyword (r1, think, reasoning, cot, reasoner)
|
||||
- Already ``deepseek-chat`` or ``deepseek-reasoner`` -> pass through.
|
||||
- Contains any reasoner keyword (r1, think, reasoning, cot, reasoner)
|
||||
-> ``deepseek-reasoner``.
|
||||
- Everything else -> ``deepseek-chat``.
|
||||
|
||||
@@ -160,17 +133,13 @@ def _normalize_for_deepseek(model_name: str) -> str:
|
||||
model_name: The bare model name (vendor prefix already stripped).
|
||||
|
||||
Returns:
|
||||
A DeepSeek-accepted model identifier.
|
||||
One of ``"deepseek-chat"`` or ``"deepseek-reasoner"``.
|
||||
"""
|
||||
bare = _strip_vendor_prefix(model_name).lower()
|
||||
|
||||
if bare in _DEEPSEEK_CANONICAL_MODELS:
|
||||
return bare
|
||||
|
||||
# V-series first-class IDs (v4-pro, v4-flash, future v5-*, dated variants)
|
||||
if _DEEPSEEK_V_SERIES_RE.match(bare):
|
||||
return bare
|
||||
|
||||
# Check for reasoner-like keywords anywhere in the name
|
||||
for keyword in _DEEPSEEK_REASONER_KEYWORDS:
|
||||
if keyword in bare:
|
||||
@@ -378,9 +347,6 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
||||
|
||||
>>> normalize_model_for_provider("claude-sonnet-4.6", "zai")
|
||||
'claude-sonnet-4.6'
|
||||
|
||||
>>> normalize_model_for_provider("MiMo-V2.5-Pro", "xiaomi")
|
||||
'mimo-v2.5-pro'
|
||||
"""
|
||||
name = (model_input or "").strip()
|
||||
if not name:
|
||||
@@ -444,12 +410,7 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
||||
|
||||
# --- Direct providers: repair matching provider prefixes only ---
|
||||
if provider in _MATCHING_PREFIX_STRIP_PROVIDERS:
|
||||
result = _strip_matching_provider_prefix(name, provider)
|
||||
# Some providers require lowercase model IDs (e.g. Xiaomi's API
|
||||
# rejects "MiMo-V2.5-Pro" but accepts "mimo-v2.5-pro").
|
||||
if provider in _LOWERCASE_MODEL_PROVIDERS:
|
||||
result = result.lower()
|
||||
return result
|
||||
return _strip_matching_provider_prefix(name, provider)
|
||||
|
||||
# --- Authoritative native providers: preserve user-facing slugs as-is ---
|
||||
if provider in _AUTHORITATIVE_NATIVE_PROVIDERS:
|
||||
|
||||
+65
-343
@@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
|
||||
# Z.AI / GLM
|
||||
"glm": ModelIdentity("z-ai", "glm"),
|
||||
|
||||
# Step Plan (StepFun)
|
||||
# StepFun
|
||||
"step": ModelIdentity("stepfun", "step"),
|
||||
|
||||
# Xiaomi
|
||||
@@ -304,113 +304,6 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
|
||||
# Alias resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _model_sort_key(model_id: str, prefix: str) -> tuple:
|
||||
"""Sort key for model version preference.
|
||||
|
||||
Extracts version numbers after the family prefix and returns a sort key
|
||||
that prefers higher versions. Suffix tokens (``pro``, ``omni``, etc.)
|
||||
are used as tiebreakers, with common quality indicators ranked.
|
||||
|
||||
Examples (with prefix ``"mimo"``)::
|
||||
|
||||
mimo-v2.5-pro → (-2.5, 0, 'pro') # highest version wins
|
||||
mimo-v2.5 → (-2.5, 1, '') # no suffix = lower than pro
|
||||
mimo-v2-pro → (-2.0, 0, 'pro')
|
||||
mimo-v2-omni → (-2.0, 1, 'omni')
|
||||
mimo-v2-flash → (-2.0, 1, 'flash')
|
||||
"""
|
||||
# Strip the prefix (and optional "/" separator for aggregator slugs)
|
||||
rest = model_id[len(prefix):]
|
||||
if rest.startswith("/"):
|
||||
rest = rest[1:]
|
||||
rest = rest.lstrip("-").strip()
|
||||
|
||||
# Parse version and suffix from the remainder.
|
||||
# "v2.5-pro" → version [2.5], suffix "pro"
|
||||
# "-omni" → version [], suffix "omni"
|
||||
# State machine: start → in_version → between → in_suffix
|
||||
nums: list[float] = []
|
||||
suffix_buf = ""
|
||||
state = "start"
|
||||
num_buf = ""
|
||||
|
||||
for ch in rest:
|
||||
if state == "start":
|
||||
if ch in "vV":
|
||||
state = "in_version"
|
||||
elif ch.isdigit():
|
||||
state = "in_version"
|
||||
num_buf += ch
|
||||
elif ch in "-_.":
|
||||
pass # skip separators before any content
|
||||
else:
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "in_version":
|
||||
if ch.isdigit():
|
||||
num_buf += ch
|
||||
elif ch == ".":
|
||||
if "." in num_buf:
|
||||
# Second dot — flush current number, start new component
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
else:
|
||||
num_buf += ch
|
||||
elif ch in "-_.":
|
||||
if num_buf:
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
state = "between"
|
||||
else:
|
||||
if num_buf:
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
num_buf = ""
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "between":
|
||||
if ch.isdigit():
|
||||
state = "in_version"
|
||||
num_buf = ch
|
||||
elif ch in "vV":
|
||||
state = "in_version"
|
||||
elif ch in "-_.":
|
||||
pass
|
||||
else:
|
||||
state = "in_suffix"
|
||||
suffix_buf += ch
|
||||
elif state == "in_suffix":
|
||||
suffix_buf += ch
|
||||
|
||||
# Flush remaining buffer (strip trailing dots — "5.4." → "5.4")
|
||||
if num_buf and state == "in_version":
|
||||
try:
|
||||
nums.append(float(num_buf.rstrip(".")))
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
suffix = suffix_buf.lower().strip("-_.")
|
||||
suffix = suffix.strip()
|
||||
|
||||
# Negate versions so higher → sorts first
|
||||
version_key = tuple(-n for n in nums)
|
||||
|
||||
# Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite
|
||||
# Lower number = preferred
|
||||
_SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0}
|
||||
suffix_rank = _SUFFIX_RANK.get(suffix, 1)
|
||||
|
||||
return version_key + (suffix_rank, suffix)
|
||||
|
||||
|
||||
def resolve_alias(
|
||||
raw_input: str,
|
||||
current_provider: str,
|
||||
@@ -418,9 +311,9 @@ def resolve_alias(
|
||||
"""Resolve a short alias against the current provider's catalog.
|
||||
|
||||
Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
|
||||
current provider's models.dev catalog for the model whose ID starts
|
||||
with ``vendor/family`` (or just ``family`` for non-aggregator
|
||||
providers) and has the **highest version**.
|
||||
current provider's models.dev catalog for the first model whose ID
|
||||
starts with ``vendor/family`` (or just ``family`` for non-aggregator
|
||||
providers).
|
||||
|
||||
Returns:
|
||||
``(provider, resolved_model_id, alias_name)`` if a match is
|
||||
@@ -448,44 +341,28 @@ def resolve_alias(
|
||||
|
||||
vendor, family = identity
|
||||
|
||||
# Build catalog from models.dev, then merge in static _PROVIDER_MODELS
|
||||
# entries that models.dev may be missing (e.g. newly added models not
|
||||
# yet synced to the registry).
|
||||
# Search the provider's catalog from models.dev
|
||||
catalog = list_provider_models(current_provider)
|
||||
try:
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
static = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if static:
|
||||
seen = {m.lower() for m in catalog}
|
||||
for m in static:
|
||||
if m.lower() not in seen:
|
||||
catalog.append(m)
|
||||
except Exception:
|
||||
pass
|
||||
if not catalog:
|
||||
return None
|
||||
|
||||
# For aggregators, models are vendor/model-name format
|
||||
aggregator = is_aggregator(current_provider)
|
||||
|
||||
if aggregator:
|
||||
prefix = f"{vendor}/{family}".lower()
|
||||
matches = [
|
||||
mid for mid in catalog
|
||||
if mid.lower().startswith(prefix)
|
||||
]
|
||||
else:
|
||||
family_lower = family.lower()
|
||||
matches = [
|
||||
mid for mid in catalog
|
||||
if mid.lower().startswith(family_lower)
|
||||
]
|
||||
for model_id in catalog:
|
||||
mid_lower = model_id.lower()
|
||||
if aggregator:
|
||||
# Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
|
||||
prefix = f"{vendor}/{family}".lower()
|
||||
if mid_lower.startswith(prefix):
|
||||
return (current_provider, model_id, key)
|
||||
else:
|
||||
# Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
|
||||
family_lower = family.lower()
|
||||
if mid_lower.startswith(family_lower):
|
||||
return (current_provider, model_id, key)
|
||||
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
# Sort by version descending — prefer the latest/highest version
|
||||
prefix_for_sort = f"{vendor}/{family}" if aggregator else family
|
||||
matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort))
|
||||
return (current_provider, matches[0], key)
|
||||
return None
|
||||
|
||||
|
||||
def get_authenticated_provider_slugs(
|
||||
@@ -527,49 +404,6 @@ def _resolve_alias_fallback(
|
||||
return None
|
||||
|
||||
|
||||
def resolve_display_context_length(
|
||||
model: str,
|
||||
provider: str,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> Optional[int]:
|
||||
"""Resolve the context length to show in /model output.
|
||||
|
||||
models.dev reports per-vendor context (e.g. gpt-5.5 = 1.05M on openai)
|
||||
but provider-enforced limits can be lower (e.g. Codex OAuth caps the
|
||||
same slug at 272k). The authoritative source is
|
||||
``agent.model_metadata.get_model_context_length`` which already knows
|
||||
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
|
||||
rest.
|
||||
|
||||
When ``custom_providers`` is provided, per-model ``context_length``
|
||||
overrides from ``custom_providers[].models.<id>.context_length`` are
|
||||
honored — this closes #15779 where ``/model`` switch ignored user-set
|
||||
overrides.
|
||||
|
||||
Prefer the provider-aware value; fall back to ``model_info.context_window``
|
||||
only if the resolver returns nothing.
|
||||
"""
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
model,
|
||||
base_url=base_url or "",
|
||||
api_key=api_key or "",
|
||||
provider=provider or None,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if ctx:
|
||||
return int(ctx)
|
||||
except Exception:
|
||||
pass
|
||||
if model_info is not None and model_info.context_window:
|
||||
return int(model_info.context_window)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core model-switching pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -814,10 +648,7 @@ def switch_model(
|
||||
|
||||
if provider_changed or explicit_provider:
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=target_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
runtime = resolve_runtime_provider(requested=target_provider)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
@@ -834,18 +665,10 @@ def switch_model(
|
||||
)
|
||||
else:
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=current_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
# If resolution fell through to "custom" (e.g. named custom provider like
|
||||
# "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
|
||||
# credentials. Otherwise use the resolved values (picks up credential rotation,
|
||||
# base_url adjustments for OpenCode, etc.).
|
||||
if runtime.get("provider") != "custom":
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
runtime = resolve_runtime_provider(requested=current_provider)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -855,7 +678,6 @@ def switch_model(
|
||||
_da = DIRECT_ALIASES.get(resolved_alias)
|
||||
if _da is not None and _da.base_url:
|
||||
base_url = _da.base_url
|
||||
api_mode = "" # clear so determine_api_mode re-detects from URL
|
||||
if not api_key:
|
||||
api_key = "no-key-required"
|
||||
|
||||
@@ -869,7 +691,6 @@ def switch_model(
|
||||
target_provider,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
api_mode=api_mode or None,
|
||||
)
|
||||
except Exception as e:
|
||||
validation = {
|
||||
@@ -879,31 +700,16 @@ def switch_model(
|
||||
"message": f"Could not validate `{new_model}`: {e}",
|
||||
}
|
||||
|
||||
# Override rejection if model is in the user's saved provider config.
|
||||
# API /v1/models may not list cloud/aliased models even though the server supports them.
|
||||
if not validation.get("accepted"):
|
||||
override = False
|
||||
if user_providers:
|
||||
for up in user_providers:
|
||||
if isinstance(up, dict) and up.get("provider") == target_provider:
|
||||
cfg_models = up.get("models", [])
|
||||
if new_model in cfg_models or any(
|
||||
m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
|
||||
):
|
||||
override = True
|
||||
break
|
||||
if override:
|
||||
validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
|
||||
else:
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
|
||||
# Apply auto-correction if validation found a closer match
|
||||
if validation.get("corrected_model"):
|
||||
@@ -975,7 +781,6 @@ def switch_model(
|
||||
|
||||
def list_authenticated_providers(
|
||||
current_provider: str = "",
|
||||
current_base_url: str = "",
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
max_models: int = 8,
|
||||
@@ -1004,10 +809,7 @@ def list_authenticated_providers(
|
||||
get_provider_info as _mdev_pinfo,
|
||||
)
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
from hermes_cli.models import (
|
||||
OPENROUTER_MODELS, _PROVIDER_MODELS,
|
||||
_MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
|
||||
)
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
|
||||
|
||||
results: List[dict] = []
|
||||
seen_slugs: set = set() # lowercase-normalized to catch case variants (#9545)
|
||||
@@ -1041,10 +843,6 @@ def list_authenticated_providers(
|
||||
# source of truth. models.dev can have wrong mappings (e.g.
|
||||
# minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
|
||||
pconfig = PROVIDER_REGISTRY.get(hermes_id)
|
||||
# Skip non-API-key auth providers here — they are handled in
|
||||
# section 2 (HERMES_OVERLAYS) with proper auth store checking.
|
||||
if pconfig and pconfig.auth_type != "api_key":
|
||||
continue
|
||||
if pconfig and pconfig.api_key_env_vars:
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
else:
|
||||
@@ -1054,24 +852,11 @@ def list_authenticated_providers(
|
||||
|
||||
# Check if any env var is set
|
||||
has_creds = any(os.environ.get(ev) for ev in env_vars)
|
||||
if not has_creds:
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if store and hermes_id in store.get("credential_pool", {}):
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
# Use curated list, falling back to models.dev if no curated list.
|
||||
# For preferred providers, merge models.dev entries into the curated
|
||||
# catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
|
||||
# show up in the picker without requiring a Hermes release.
|
||||
# Use curated list, falling back to models.dev if no curated list
|
||||
model_ids = curated.get(hermes_id, [])
|
||||
if hermes_id in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_id, model_ids)
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
@@ -1173,14 +958,8 @@ def list_authenticated_providers(
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
if hermes_slug in {"copilot", "copilot-acp"}:
|
||||
model_ids = provider_model_ids(hermes_slug)
|
||||
else:
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
# Merge with models.dev for preferred providers (same rationale as above).
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
@@ -1303,15 +1082,6 @@ def list_authenticated_providers(
|
||||
if m and m not in models_list:
|
||||
models_list.append(m)
|
||||
|
||||
# Official OpenAI API rows in providers: often have base_url but no
|
||||
# explicit models: dict — avoid a misleading zero count in /model.
|
||||
if not models_list:
|
||||
url_lower = str(api_url).strip().lower()
|
||||
if "api.openai.com" in url_lower:
|
||||
fb = curated.get("openai") or []
|
||||
if fb:
|
||||
models_list = list(fb)
|
||||
|
||||
# Try to probe /v1/models if URL is set (but don't block on it)
|
||||
# For now just show what we know from config
|
||||
results.append({
|
||||
@@ -1335,113 +1105,66 @@ def list_authenticated_providers(
|
||||
|
||||
# --- 4. Saved custom providers from config ---
|
||||
# Each ``custom_providers`` entry represents one model under a named
|
||||
# provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
|
||||
# are grouped into a single picker row, so e.g. four Ollama entries
|
||||
# pointing at ``http://localhost:11434/v1`` with per-model display names
|
||||
# ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
|
||||
# "Ollama" row with four models inside instead of four near-duplicates
|
||||
# that differ only by suffix. Entries with distinct endpoints still
|
||||
# produce separate rows.
|
||||
#
|
||||
# When the grouped endpoint matches ``current_base_url`` the group's
|
||||
# slug becomes ``current_provider`` so that selecting a model from the
|
||||
# picker flows back through the runtime provider that already holds
|
||||
# valid credentials — no re-resolution needed.
|
||||
# provider. Entries sharing the same provider name are grouped into a
|
||||
# single picker row so that e.g. four Ollama Cloud entries
|
||||
# (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
|
||||
# "Ollama Cloud" row with four models inside instead of four
|
||||
# duplicate "Ollama Cloud" rows. Entries with distinct provider names
|
||||
# still produce separate rows (e.g. Ollama Cloud vs Moonshot).
|
||||
if custom_providers and isinstance(custom_providers, list):
|
||||
from collections import OrderedDict
|
||||
|
||||
# Key by (base_url, api_key) instead of slug: names frequently
|
||||
# differ per model ("Ollama — X") while the endpoint stays the
|
||||
# same. Slug-based grouping left them as separate rows.
|
||||
groups: "OrderedDict[tuple, dict]" = OrderedDict()
|
||||
groups: "OrderedDict[str, dict]" = OrderedDict()
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
|
||||
raw_name = (entry.get("name") or "").strip()
|
||||
display_name = (entry.get("name") or "").strip()
|
||||
api_url = (
|
||||
entry.get("base_url", "")
|
||||
or entry.get("url", "")
|
||||
or entry.get("api", "")
|
||||
or ""
|
||||
).strip().rstrip("/")
|
||||
if not raw_name or not api_url:
|
||||
).strip()
|
||||
if not display_name or not api_url:
|
||||
continue
|
||||
api_key = (entry.get("api_key") or "").strip()
|
||||
|
||||
group_key = (api_url, api_key)
|
||||
if group_key not in groups:
|
||||
# Strip per-model suffix so "Ollama — GLM 5.1" becomes
|
||||
# "Ollama" for the grouped row. Em dash is the convention
|
||||
# Hermes's own writer uses; a hyphen variant is accepted
|
||||
# for hand-edited configs.
|
||||
display_name = raw_name
|
||||
for sep in ("—", " - "):
|
||||
if sep in display_name:
|
||||
display_name = display_name.split(sep)[0].strip()
|
||||
break
|
||||
if not display_name:
|
||||
display_name = raw_name
|
||||
# If this endpoint matches the currently active one, use
|
||||
# ``current_provider`` as the slug so picker-driven switches
|
||||
# route through the live credential pipeline.
|
||||
if (
|
||||
current_base_url
|
||||
and api_url == current_base_url.strip().rstrip("/")
|
||||
):
|
||||
slug = current_provider or custom_provider_slug(display_name)
|
||||
else:
|
||||
slug = custom_provider_slug(display_name)
|
||||
groups[group_key] = {
|
||||
"slug": slug,
|
||||
slug = custom_provider_slug(display_name)
|
||||
if slug not in groups:
|
||||
groups[slug] = {
|
||||
"name": display_name,
|
||||
"api_url": api_url,
|
||||
"models": [],
|
||||
}
|
||||
|
||||
# The singular ``model:`` field only holds the currently
|
||||
# active model. Hermes's own writer (main.py::_save_custom_provider)
|
||||
# stores every configured model as a dict under ``models:``;
|
||||
# downstream readers (agent/models_dev.py, gateway/run.py,
|
||||
# run_agent.py, hermes_cli/config.py) already consume that dict.
|
||||
# The /model picker previously ignored it, so multi-model
|
||||
# custom providers appeared to have only the active model.
|
||||
default_model = (entry.get("model") or "").strip()
|
||||
if default_model and default_model not in groups[group_key]["models"]:
|
||||
groups[group_key]["models"].append(default_model)
|
||||
if default_model and default_model not in groups[slug]["models"]:
|
||||
groups[slug]["models"].append(default_model)
|
||||
|
||||
cfg_models = entry.get("models", {})
|
||||
if isinstance(cfg_models, dict):
|
||||
for m in cfg_models:
|
||||
if m and m not in groups[group_key]["models"]:
|
||||
groups[group_key]["models"].append(m)
|
||||
if m and m not in groups[slug]["models"]:
|
||||
groups[slug]["models"].append(m)
|
||||
elif isinstance(cfg_models, list):
|
||||
for m in cfg_models:
|
||||
if m and m not in groups[group_key]["models"]:
|
||||
groups[group_key]["models"].append(m)
|
||||
if m and m not in groups[slug]["models"]:
|
||||
groups[slug]["models"].append(m)
|
||||
|
||||
_section4_emitted_slugs: set = set()
|
||||
for grp in groups.values():
|
||||
slug = grp["slug"]
|
||||
# If the slug is already claimed by a built-in / overlay /
|
||||
# user-provider row (sections 1-3), skip this custom group
|
||||
# to avoid shadowing a real provider.
|
||||
if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
|
||||
for slug, grp in groups.items():
|
||||
if slug.lower() in seen_slugs:
|
||||
continue
|
||||
# If a prior section-4 group already used this slug (two custom
|
||||
# endpoints with the same cleaned name — e.g. two OpenAI-
|
||||
# compatible gateways named identically with different keys),
|
||||
# append a counter so both rows stay visible in the picker.
|
||||
if slug.lower() in _section4_emitted_slugs:
|
||||
base_slug = slug
|
||||
n = 2
|
||||
while f"{base_slug}-{n}".lower() in seen_slugs:
|
||||
n += 1
|
||||
slug = f"{base_slug}-{n}"
|
||||
grp["slug"] = slug
|
||||
# Skip if section 3 already emitted this endpoint under its
|
||||
# ``providers:`` dict key — matches on (display_name, base_url).
|
||||
# Prevents two picker rows labelled identically when callers
|
||||
# pass both ``user_providers`` and a compatibility-merged
|
||||
# ``custom_providers`` list.
|
||||
# ``providers:`` dict key — matches on (display_name, base_url),
|
||||
# the tuple section 4 groups by. Prevents two picker rows
|
||||
# labelled identically when callers pass both ``user_providers``
|
||||
# and a compatibility-merged ``custom_providers`` list.
|
||||
_pair_key = (
|
||||
str(grp["name"]).strip().lower(),
|
||||
str(grp["api_url"]).strip().rstrip("/").lower(),
|
||||
@@ -1459,7 +1182,6 @@ def list_authenticated_providers(
|
||||
"api_url": grp["api_url"],
|
||||
})
|
||||
seen_slugs.add(slug.lower())
|
||||
_section4_emitted_slugs.add(slug.lower())
|
||||
|
||||
# Sort: current provider first, then by model count descending
|
||||
results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
|
||||
|
||||
+143
-737
File diff suppressed because it is too large
Load Diff
@@ -1,202 +0,0 @@
|
||||
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
|
||||
|
||||
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
|
||||
no stderr chatter. Just the agent's final text to stdout.
|
||||
|
||||
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
|
||||
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
|
||||
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
|
||||
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
|
||||
|
||||
Model / provider selection mirrors `hermes chat`:
|
||||
- Both optional. If omitted, use the user's configured default.
|
||||
- If both given, pair them exactly as given.
|
||||
- If only --model given, auto-detect the provider that serves it.
|
||||
- If only --provider given, error out (ambiguous — caller must pick a model).
|
||||
|
||||
Env var fallbacks (used when the corresponding arg is not passed):
|
||||
- HERMES_INFERENCE_MODEL
|
||||
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Execute a single prompt and print only the final content block.
|
||||
|
||||
Args:
|
||||
prompt: The user message to send.
|
||||
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
|
||||
env var, then config.yaml's model.default / model.model.
|
||||
provider: Optional provider override. Falls back to
|
||||
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
||||
then "auto".
|
||||
|
||||
Returns the exit code. Caller should sys.exit() with the return.
|
||||
"""
|
||||
# Silence every stdlib logger for the duration. AIAgent, tools, and
|
||||
# provider adapters all log to stderr through the root logger; file
|
||||
# handlers added by setup_logging() keep working (they're attached to
|
||||
# the root logger's handler list, not affected by level), but no
|
||||
# bytes reach the terminal.
|
||||
logging.disable(logging.CRITICAL)
|
||||
|
||||
# --provider without --model is ambiguous: carrying the user's configured
|
||||
# model across to a different provider is usually wrong (that provider may
|
||||
# not host it), and silently picking the provider's catalog default hides
|
||||
# the mismatch. Require the caller to be explicit. Validate BEFORE the
|
||||
# stderr redirect so the message actually reaches the terminal.
|
||||
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
if provider and not ((model or "").strip() or env_model_early):
|
||||
sys.stderr.write(
|
||||
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
|
||||
"Pass both explicitly, or neither to use your configured defaults.\n"
|
||||
)
|
||||
return 2
|
||||
|
||||
# Auto-approve any shell / tool approvals. Non-interactive by
|
||||
# definition — a prompt would hang forever.
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
|
||||
# Redirect stderr AND stdout to devnull for the entire call tree.
|
||||
# We'll print the final response to the real stdout at the end.
|
||||
real_stdout = sys.stdout
|
||||
devnull = open(os.devnull, "w")
|
||||
|
||||
try:
|
||||
with redirect_stdout(devnull), redirect_stderr(devnull):
|
||||
response = _run_agent(prompt, model=model, provider=provider)
|
||||
finally:
|
||||
try:
|
||||
devnull.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if response:
|
||||
real_stdout.write(response)
|
||||
if not response.endswith("\n"):
|
||||
real_stdout.write("\n")
|
||||
real_stdout.flush()
|
||||
return 0
|
||||
|
||||
|
||||
def _run_agent(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
||||
run a single conversation. Returns the final response string."""
|
||||
# Imports are local so they don't run when hermes is invoked for
|
||||
# other commands (keeps top-level CLI startup cheap).
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.models import detect_provider_for_model
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
from run_agent import AIAgent
|
||||
|
||||
cfg = load_config()
|
||||
|
||||
# Resolve effective model: explicit arg → env var → config.
|
||||
model_cfg = cfg.get("model") or {}
|
||||
if isinstance(model_cfg, str):
|
||||
cfg_model = model_cfg
|
||||
else:
|
||||
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
|
||||
|
||||
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
effective_model = (model or "").strip() or env_model or cfg_model
|
||||
|
||||
# Resolve effective provider: explicit arg → (auto-detect from model if
|
||||
# model was explicit) → env / config (handled inside resolve_runtime_provider).
|
||||
#
|
||||
# When --model is given without --provider, auto-detect the provider that
|
||||
# serves that model — same semantic as `/model <name>` in an interactive
|
||||
# session. Without this, resolve_runtime_provider() would fall back to
|
||||
# the user's configured default provider, which may not host the model
|
||||
# the caller just asked for.
|
||||
effective_provider = (provider or "").strip() or None
|
||||
if effective_provider is None and (model or env_model):
|
||||
# Only auto-detect when the model was explicitly requested via arg or
|
||||
# env var (not when it came from config — that's the "use my defaults"
|
||||
# path and the configured provider is already correct).
|
||||
explicit_model = (model or "").strip() or env_model
|
||||
if explicit_model:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=effective_provider,
|
||||
target_model=effective_model or None,
|
||||
)
|
||||
|
||||
# Pull in whatever toolsets the user has enabled for "cli".
|
||||
# sorted() gives stable ordering; set→list for AIAgent's signature.
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
|
||||
agent = AIAgent(
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
model=effective_model,
|
||||
enabled_toolsets=toolsets_list,
|
||||
quiet_mode=True,
|
||||
platform="cli",
|
||||
credential_pool=runtime.get("credential_pool"),
|
||||
# Interactive callbacks are intentionally NOT wired beyond this
|
||||
# one. In oneshot mode there's no user sitting at a terminal:
|
||||
# - clarify → returns a synthetic "pick a default" instruction
|
||||
# so the agent continues instead of stalling on
|
||||
# the tool's built-in "not available" error
|
||||
# - sudo password prompt → terminal_tool gates on
|
||||
# HERMES_INTERACTIVE which we never set
|
||||
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
|
||||
# (set above); also falls back to deny on non-tty
|
||||
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
|
||||
# - skill secret capture → returns gracefully when no callback set
|
||||
clarify_callback=_oneshot_clarify_callback,
|
||||
)
|
||||
|
||||
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
|
||||
# display callbacks that would bypass our stdout capture.
|
||||
agent.suppress_status_output = True
|
||||
agent.stream_delta_callback = None
|
||||
agent.tool_gen_callback = None
|
||||
|
||||
return agent.chat(prompt) or ""
|
||||
|
||||
|
||||
def _oneshot_clarify_callback(question: str, choices=None) -> str:
|
||||
"""Clarify is disabled in oneshot mode — tell the agent to pick a
|
||||
default and proceed instead of stalling or erroring."""
|
||||
if choices:
|
||||
return (
|
||||
f"[oneshot mode: no user available. Pick the best option from "
|
||||
f"{choices} using your own judgment and continue.]"
|
||||
)
|
||||
return (
|
||||
"[oneshot mode: no user available. Make the most reasonable "
|
||||
"assumption you can and continue.]"
|
||||
)
|
||||
@@ -44,7 +44,7 @@ def _cmd_list(store):
|
||||
for p in pending:
|
||||
print(
|
||||
f" {p['platform']:<12} {p['code']:<10} {p['user_id']:<20} "
|
||||
f"{(p.get('user_name') or ''):<20} {p['age_minutes']}m ago"
|
||||
f"{p.get('user_name', ''):<20} {p['age_minutes']}m ago"
|
||||
)
|
||||
else:
|
||||
print("\n No pending pairing requests.")
|
||||
@@ -54,7 +54,7 @@ def _cmd_list(store):
|
||||
print(f" {'Platform':<12} {'User ID':<20} {'Name':<20}")
|
||||
print(f" {'--------':<12} {'-------':<20} {'----':<20}")
|
||||
for a in approved:
|
||||
print(f" {a['platform']:<12} {a['user_id']:<20} {(a.get('user_name') or ''):<20}")
|
||||
print(f" {a['platform']:<12} {a['user_id']:<20} {a.get('user_name', ''):<20}")
|
||||
else:
|
||||
print("\n No approved users.")
|
||||
|
||||
@@ -69,7 +69,7 @@ def _cmd_approve(store, platform: str, code: str):
|
||||
result = store.approve_code(platform, code)
|
||||
if result:
|
||||
uid = result["user_id"]
|
||||
name = result.get("user_name") or ""
|
||||
name = result.get("user_name", "")
|
||||
display = f"{name} ({uid})" if name else uid
|
||||
print(f"\n Approved! User {display} on {platform} can now use the bot~")
|
||||
print(" They'll be recognized automatically on their next message.\n")
|
||||
|
||||
@@ -38,7 +38,6 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
||||
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
|
||||
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
|
||||
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
|
||||
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
|
||||
])
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user