Compare commits

..

1 Commits

Author SHA1 Message Date
kshitijk4poor b277962dcc refactor: extract codex_responses logic into dedicated adapter
Move 10 Responses API format-conversion and normalization functions from
run_agent.py into agent/codex_responses_adapter.py. All functions are now
stateless module-level functions with zero self references.

The AIAgent methods remain as thin one-line wrappers that delegate to the
adapter, so all callers (tests, gateway, CLI) are unchanged.

Functions extracted:
- _deterministic_call_id: deterministic tool call ID generation
- _split_responses_tool_id: composite ID splitting
- _derive_responses_function_call_id: call_ to fc_ prefix conversion
- _responses_tools: chat completions tool schema → Responses format
- _chat_messages_to_responses_input: message format conversion
- _preflight_codex_input_items: input item normalization
- _preflight_codex_api_kwargs: API kwargs validation/cleaning
- _extract_responses_message_text: text extraction from response items
- _extract_responses_reasoning_text: reasoning extraction
- _normalize_codex_response: full response normalization

This brings codex_responses in line with anthropic_adapter.py and
bedrock_adapter.py which already have their own adapter files.

run_agent.py: 12410 → 11845 lines (-565 net)
2026-04-20 16:32:43 +05:30
559 changed files with 9153 additions and 61261 deletions
-3
View File
@@ -14,6 +14,3 @@ node_modules
.env
*.md
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
data/
-8
View File
@@ -1,8 +0,0 @@
name: 'Setup Nix'
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
runs:
using: composite
steps:
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
-68
View File
@@ -1,68 +0,0 @@
name: Nix Lockfile Check
on:
pull_request:
workflow_dispatch:
permissions:
contents: read
pull-requests: write
concurrency:
group: nix-lockfile-check-${{ github.ref }}
cancel-in-progress: true
jobs:
check:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: ./.github/actions/nix-setup
- name: Resolve head SHA
id: sha
shell: bash
run: |
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
echo "full=$FULL" >> "$GITHUB_OUTPUT"
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
- name: Check lockfile hashes
id: check
continue-on-error: true
env:
LINK_SHA: ${{ steps.sha.outputs.full }}
run: nix run .#fix-lockfiles -- --check
- name: Post sticky PR comment (stale)
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
message: |
### ⚠️ npm lockfile hash out of date
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
${{ steps.check.outputs.report }}
#### Apply the fix
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
- name: Clear sticky PR comment (resolved)
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
delete: true
- name: Fail if stale
if: steps.check.outputs.stale == 'true'
run: exit 1
-149
View File
@@ -1,149 +0,0 @@
name: Nix Lockfile Fix
on:
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to fix (leave empty to run on the selected branch)'
required: false
type: string
issue_comment:
types: [edited]
permissions:
contents: write
pull-requests: write
concurrency:
group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
cancel-in-progress: false
jobs:
fix:
# Run on manual dispatch OR when a task-list checkbox in the sticky
# lockfile-check comment flips from `[ ]` to `[x]`.
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'issue_comment'
&& github.event.issue.pull_request != null
&& contains(github.event.comment.body, '[x] **Apply lockfile fix**')
&& !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
runs-on: ubuntu-latest
timeout-minutes: 25
steps:
- name: Authorize & resolve PR
id: resolve
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
// 1. Verify the actor has write access — applies to both checkbox
// clicks and manual dispatch.
const { data: perm } =
await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: context.actor,
});
if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
core.setFailed(
`${context.actor} lacks write access (has: ${perm.permission})`
);
return;
}
// 2. Resolve which ref to check out.
let prNumber = '';
if (context.eventName === 'issue_comment') {
prNumber = String(context.payload.issue.number);
} else if (context.eventName === 'workflow_dispatch') {
prNumber = context.payload.inputs.pr_number || '';
}
if (!prNumber) {
core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
core.setOutput('repo', context.repo.repo);
core.setOutput('owner', context.repo.owner);
core.setOutput('pr', '');
return;
}
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: Number(prNumber),
});
core.setOutput('ref', pr.head.ref);
core.setOutput('repo', pr.head.repo.name);
core.setOutput('owner', pr.head.repo.owner.login);
core.setOutput('pr', String(pr.number));
# Wipe the sticky lockfile-check comment to a "running" state as soon
# as the job is authorized, so the user sees their click was picked up
# before the ~minute of nix build work.
- name: Mark sticky as running
if: steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### 🔄 Applying lockfile fix…
Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
ref: ${{ steps.resolve.outputs.ref }}
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0
- uses: ./.github/actions/nix-setup
- name: Apply lockfile hashes
id: apply
run: nix run .#fix-lockfiles -- --apply
- name: Commit & push
if: steps.apply.outputs.changed == 'true'
shell: bash
run: |
set -euo pipefail
git config user.name 'github-actions[bot]'
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
git add nix/tui.nix nix/web.nix
git commit -m "fix(nix): refresh npm lockfile hashes"
git push
- name: Update sticky (applied)
if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ✅ Lockfile fix applied
Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- name: Update sticky (already current)
if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ✅ Lockfile hashes already current
Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- name: Update sticky (failed)
if: failure() && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ❌ Lockfile fix failed
See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
+12 -2
View File
@@ -4,6 +4,15 @@ on:
push:
branches: [main]
pull_request:
paths:
- 'flake.nix'
- 'flake.lock'
- 'nix/**'
- 'pyproject.toml'
- 'uv.lock'
- 'hermes_cli/**'
- 'run_agent.py'
- 'acp_adapter/**'
permissions:
contents: read
@@ -20,8 +29,9 @@ jobs:
runs-on: ${{ matrix.os }}
timeout-minutes: 30
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: ./.github/actions/nix-setup
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
- name: Check flake
if: runner.os == 'Linux'
run: nix flake check --print-build-logs
-1
View File
@@ -1,4 +1,3 @@
.DS_Store
/venv/
/_pycache/
*.pyc*
+77 -260
View File
@@ -5,61 +5,78 @@ Instructions for AI coding assistants and developers working on the hermes-agent
## Development Environment
```bash
# Prefer .venv; fall back to venv if that's what your checkout has.
source .venv/bin/activate # or: source venv/bin/activate
source venv/bin/activate # ALWAYS activate before running Python
```
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
main checkout).
## Project Structure
File counts shift constantly — don't treat the tree below as exhaustive.
The canonical source is the filesystem. The notes call out the load-bearing
entry points you'll actually edit.
```
hermes-agent/
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
├── run_agent.py # AIAgent class — core conversation loop
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
├── cli.py # HermesCLI class — interactive CLI orchestrator
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
├── batch_runner.py # Parallel batch processing
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
├── agent/ # Agent internals
│ ├── prompt_builder.py # System prompt assembly
│ ├── context_compressor.py # Auto context compression
│ ├── prompt_caching.py # Anthropic prompt caching
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
│ ├── model_metadata.py # Model context lengths, token estimation
│ ├── models_dev.py # models.dev registry integration (provider-aware context)
│ ├── display.py # KawaiiSpinner, tool preview formatting
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
│ └── trajectory.py # Trajectory saving helpers
├── hermes_cli/ # CLI subcommands and setup
│ ├── main.py # Entry point — all `hermes` subcommands
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
│ ├── setup.py # Interactive setup wizard
│ ├── skin_engine.py # Skin/theme engine — CLI visual customization
│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform
│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform
│ ├── skills_hub.py # `/skills` slash command (search, browse, install)
│ ├── models.py # Model catalog, provider model lists
│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway)
│ └── auth.py # Provider credential resolution
├── tools/ # Tool implementations (one file per tool)
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
│ ├── approval.py # Dangerous command detection
│ ├── terminal_tool.py # Terminal orchestration
│ ├── process_registry.py # Background process management
│ ├── file_tools.py # File read/write/search/patch
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
│ ├── browser_tool.py # Browserbase browser automation
│ ├── code_execution_tool.py # execute_code sandbox
│ ├── delegate_tool.py # Subagent delegation
│ ├── mcp_tool.py # MCP client (~1050 lines)
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
│ # homeassistant, signal, matrix, mattermost, email, sms,
│ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
├── plugins/ # Plugin system (see "Plugins" section below)
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
│ ├── context_engine/ # Context-engine plugins
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
├── skills/ # Built-in skills bundled with the repo
├── gateway/ # Messaging platform gateway
│ ├── run.py # Main loop, slash commands, message dispatch
├── session.py # SessionStore — conversation persistence
└── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
── src/entry.tsx # TTY gate + render()
│ ├── src/app.tsx # Main state machine and UI
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
├── tui_gateway/ # Python JSON-RPC backend for the TUI
│ ├── entry.py # stdio entrypoint
│ ├── server.py # RPC handlers and session logic
│ ├── render.py # Optional rich/ANSI bridge
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
├── cron/ # Scheduler jobs.py, scheduler.py
├── cron/ # Scheduler (jobs.py, scheduler.py)
├── environments/ # RL training environments (Atropos)
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
── website/ # Docusaurus docs site
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
├── tests/ # Pytest suite (~3000 tests)
── batch_runner.py # Parallel batch processing
```
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
**Logs:** `~/.hermes/logs/``agent.log` (INFO+), `errors.log` (WARNING+),
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
## File Dependency Chain
@@ -77,30 +94,20 @@ run_agent.py, cli.py, batch_runner.py, environments/
## AIAgent Class (run_agent.py)
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
session context, budget, credential pool, etc.). The signature below is the
minimum subset you'll usually touch — read `run_agent.py` for the full list.
```python
class AIAgent:
def __init__(self,
base_url: str = None,
api_key: str = None,
provider: str = None,
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
model: str = "", # empty → resolved from config/provider later
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
model: str = "anthropic/claude-opus-4.6",
max_iterations: int = 90,
enabled_toolsets: list = None,
disabled_toolsets: list = None,
quiet_mode: bool = False,
save_trajectories: bool = False,
platform: str = None, # "cli", "telegram", etc.
platform: str = None, # "cli", "telegram", etc.
session_id: str = None,
skip_context_files: bool = False,
skip_memory: bool = False,
credential_pool=None,
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
# ... plus provider, api_mode, callbacks, routing params
): ...
def chat(self, message: str) -> str:
@@ -113,13 +120,10 @@ class AIAgent:
### Agent Loop
The core loop is inside `run_conversation()` — entirely synchronous, with
interrupt checks, budget tracking, and a one-turn grace call:
The core loop is inside `run_conversation()` — entirely synchronous:
```python
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
or self._budget_grace_call:
if self._interrupt_requested: break
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
if response.tool_calls:
for tool_call in response.tool_calls:
@@ -130,8 +134,7 @@ while (api_call_count < self.max_iterations and self.iteration_budget.remaining
return response.content
```
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
Reasoning content is stored in `assistant_msg["reasoning"]`.
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
---
@@ -277,7 +280,7 @@ The registry handles schema collection, dispatch, availability checking, and err
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
---
@@ -285,13 +288,9 @@ The registry handles schema collection, dispatch, availability checking, and err
### config.yaml options:
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
ONLY if you need to actively migrate/transform existing user config
(renaming keys, changing structure). Adding a new key to an existing
section is handled automatically by the deep-merge and does NOT require
a version bump.
2. Bump `_config_version` (currently 5) to trigger migration for existing users
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
### .env variables:
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
```python
"NEW_API_KEY": {
@@ -303,29 +302,13 @@ The registry handles schema collection, dispatch, availability checking, and err
},
```
Non-secret settings (timeouts, thresholds, feature flags, paths, display
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
env var mirror for backward compatibility, bridge it from `config.yaml` to
the env var in code (see `gateway_timeout`, `terminal.cwd``TERMINAL_CWD`).
### Config loaders (three paths — know which one you're in):
### Config loaders (two separate systems):
| Loader | Used by | Location |
|--------|---------|----------|
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
If you add a new key and the CLI sees it but the gateway doesn't (or vice
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
### Working directory:
- **CLI** — uses the process's current directory (`os.getcwd()`).
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
removed** — the config loader prints a deprecation warning if it's set in
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
`terminal.cwd` in `config.yaml`.
| `load_cli_config()` | CLI mode | `cli.py` |
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
| Direct YAML load | Gateway | `gateway/run.py` |
---
@@ -418,95 +401,7 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
---
## Plugins
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
repo-shipped plugins can be discovered alongside user-installed ones in
`~/.hermes/plugins/` and pip-installed entry points.
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
and pip entry points. Each plugin exposes a `register(ctx)` function that
can:
- Register Python-callback lifecycle hooks:
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
`on_session_start`, `on_session_end`
- Register new tools via `ctx.register_tool(...)`
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
plugin's argparse tree is wired into `hermes` at startup so
`hermes <pluginname> <subcmd>` works with no change to `main.py`
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
as a side effect of importing `model_tools.py`. Code paths that read plugin
state without importing `model_tools.py` first must call `discover_plugins()`
explicitly (it's idempotent).
### Memory-provider plugins (`plugins/memory/<name>/`)
Separate discovery system for pluggable memory backends. Current built-in
providers include **honcho, mem0, supermemory, byterover, hindsight,
holographic, openviking, retaindb**.
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
`post_setup(hermes_home, config)` for setup-wizard integration.
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
it at argparse setup time and wires it into `hermes <plugin>`. The
framework only exposes CLI commands for the **currently active** memory
provider (read from `memory.provider` in config.yaml), so disabled
providers don't clutter `hermes --help`.
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
If a plugin needs a capability the framework doesn't expose, expand the
generic plugin surface (new hook, new ctx method) — never hardcode
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
honcho argparse from `main.py` for exactly this reason.
### Dashboard / context-engine / image-gen plugin directories
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
Context engines plug into `agent/context_engine.py`; image-gen providers
into `agent/image_gen_provider.py`.
---
## Skills
Two parallel surfaces:
- **`skills/`** — built-in skills shipped and loadable by default.
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
NOT active by default. Installed explicitly via
`hermes skills install official/<category>/<skill>`. Adapter lives in
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
`research`, `security`, `web-development`.
When reviewing skill PRs, check which directory they target — heavy-dep or
niche skills belong in `optional-skills/`.
### SKILL.md frontmatter
Standard fields: `name`, `description`, `version`, `platforms`
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
`metadata.hermes.tags`, `metadata.hermes.category`,
`metadata.hermes.config` (config.yaml settings the skill needs — stored
under `skills.config.<key>`, prompted during setup, injected at load time).
---
## Important Policies
### Prompt Caching Must Not Break
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
@@ -516,10 +411,9 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
must be **cache-aware**: default to deferred invalidation (change takes
effect next session), with an opt-in `--now` flag for immediate
invalidation. See `/skills install --now` for the canonical pattern.
### Working Directory Behavior
- **CLI**: Uses current directory (`.``os.getcwd()`)
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
### Background Process Notifications (Gateway)
@@ -541,7 +435,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
automatically scope to the active profile.
### Rules for profile-safe code
@@ -598,12 +492,8 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
### DO NOT introduce new `simple_term_menu` usage
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
the preferred UI is curses (stdlib) because `simple_term_menu` has
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
interactive menus must use `hermes_cli/curses_ui.py` — see
`hermes_cli/tools_config.py` for the canonical pattern.
### DO NOT use `simple_term_menu` for interactive menus
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
@@ -614,30 +504,6 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
### DO NOT hardcode cross-tool references in schema descriptions
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
### The gateway has TWO message guards — both must bypass approval/control commands
When an agent is running, messages pass through two sequential guards:
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
`_pending_messages` when `session_key in self._active_sessions`, and
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
`/queue`, `/status`, `/approve`, `/deny` before they reach
`running_agent.interrupt()`. Any new command that must reach the runner
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
guards and be dispatched inline, not via `_process_message_background()`
(which races session lifecycle).
### Squash merges from stale branches silently revert recent fixes
Before squash-merging a PR, ensure the branch is up to date with `main`
(`git fetch origin main && git reset --hard origin/main` in the worktree,
then re-apply the PR's commits). A stale branch's version of an unrelated
file will silently overwrite recent fixes on main when squashed. Verify
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
red flag.
### Don't wire in dead code without E2E validation
Unused code that was never shipped was dead for a reason. Before wiring an
unused module into a live code path, E2E test the real resolution chain
with actual imports (not mocks) against a temp `HERMES_HOME`.
### Tests must not write to `~/.hermes/`
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
@@ -693,59 +559,10 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
pytest directly), at minimum activate the venv and pass `-n 4`:
```bash
source .venv/bin/activate # or: source venv/bin/activate
source venv/bin/activate
python -m pytest tests/ -q -n 4
```
Worker count above 4 will surface test-ordering flakes that CI never sees.
Always run the full suite before pushing changes.
### Don't write change-detector tests
A test is a **change-detector** if it fails whenever data that is **expected
to change** gets updated — model catalogs, config version numbers,
enumeration counts, hardcoded lists of provider models. These tests add no
behavioral coverage; they just guarantee that routine source updates break
CI and cost engineering time to "fix."
**Do not write:**
```python
# catalog snapshot — breaks every model release
assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
assert "MiniMax-M2.7" in models
# config version literal — breaks every schema bump
assert DEFAULT_CONFIG["_config_version"] == 21
# enumeration count — breaks every time a skill/provider is added
assert len(_PROVIDER_MODELS["huggingface"]) == 8
```
**Do write:**
```python
# behavior: does the catalog plumbing work at all?
assert "gemini" in _PROVIDER_MODELS
assert len(_PROVIDER_MODELS["gemini"]) >= 1
# behavior: does migration bump the user's version to current latest?
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
# invariant: no plan-only model leaks into the legacy list
assert not (set(moonshot_models) & coding_plan_only_models)
# invariant: every model in the catalog has a context-length entry
for m in _PROVIDER_MODELS["huggingface"]:
assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
```
The rule: if the test reads like a snapshot of current data, delete it. If
it reads like a contract about how two pieces of data must relate, keep it.
When a PR adds a new provider/model and you want a test, make the test
assert the relationship (e.g. "catalog entries all have context lengths"),
not the specific names.
Reviewers should reject new change-detector tests; authors should convert
them into invariants before re-requesting review.
+6 -6
View File
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
We value contributions in this order:
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
2. **Cross-platform compatibility** Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite
| Requirement | Notes |
|-------------|-------|
| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
| **Git** | With `--recurse-submodules` support |
| **Python 3.11+** | uv will install it if missing |
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
### Clone and install
@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
touch ~/.hermes/.env
# Add at minimum an LLM provider key:
echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
```
### Run
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
## Cross-Platform Compatibility
Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
### Critical rules
@@ -597,7 +597,7 @@ refactor/description # Code restructuring
1. **Run tests**: `pytest tests/ -v`
2. **Test manually**: Run `hermes` and exercise the code path you changed
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
### PR description
+3 -2
View File
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
# Install system dependencies in one layer, clear APT cache
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
rm -rf /var/lib/apt/lists/*
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -27,10 +27,12 @@ WORKDIR /opt/hermes
# Copy only package manifests first so npm install + Playwright are cached
# unless the lockfiles themselves change.
COPY package.json package-lock.json ./
COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
COPY web/package.json web/package-lock.json web/
RUN npm install --prefer-offline --no-audit && \
npx playwright install --with-deps chromium --only-shell && \
(cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
(cd web && npm install --prefer-offline --no-audit) && \
npm cache clean --force
@@ -50,6 +52,5 @@ RUN uv venv && \
# ---------- Runtime ----------
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
ENV HERMES_HOME=/opt/data
ENV PATH="/opt/data/.local/bin:${PATH}"
VOLUME [ "/opt/data" ]
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
+8 -3
View File
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
| Set a personality | `/personality [name]` | `/personality [name]` |
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
@@ -157,10 +157,14 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv venv --python 3.11
source venv/bin/activate
uv pip install -e ".[all,dev]"
scripts/run_tests.sh
python -m pytest tests/ -q
```
> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
> ```bash
> git submodule update --init tinker-atropos
> uv pip install -e "./tinker-atropos"
> ```
---
@@ -169,6 +173,7 @@ scripts/run_tests.sh
- 💬 [Discord](https://discord.gg/NousResearch)
- 📚 [Skills Hub](https://agentskills.io)
- 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
---
-453
View File
@@ -1,453 +0,0 @@
# Hermes Agent v0.11.0 (v2026.4.23)
**Release Date:** April 23, 2026
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
---
## ✨ Highlights
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
---
## 🏗️ Core Agent & Architecture
### Transport Layer (NEW)
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
### Provider & Model Support
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
### Agent Loop & Conversation
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
### Session & Memory
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
---
## 🖥️ New Ink-based TUI
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
### TUI Foundations
- New TUI based on Ink + Python JSON-RPC backend
- Prettier + ESLint + vitest tooling for `ui-tui/`
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
- Persistent `_SlashWorker` subprocess for slash command dispatch
### UX & Features
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
- Sticky composer that freezes during scroll
- OSC-52 clipboard support for copy across SSH sessions
- Virtualized history rendering for performance
- Slash command autocomplete via `complete.slash` RPC
- Path autocomplete via `complete.path` RPC
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
### Structural Refactors
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
---
## 📱 Messaging Platforms (Gateway)
### New Platforms
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
### Telegram
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
- Fix: Markdown special char escaping in `send_exec_approval`
- Fix: parentheses in URLs during MarkdownV2 link conversion
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
- Many platform hint / streaming / session-key fixes
### Discord
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
### Feishu
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
### DingTalk
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
### WhatsApp
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
### WeCom / Weixin
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
### Signal
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
### Slack
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
### BlueBubbles (iMessage)
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
### Gateway Core
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
- Fix: unlink stale PID + lock files on cleanup
- Fix: force-unlink stale PID file after `--replace` takeover
---
## 🔧 Tool System
### Plugin Surface (major expansion)
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
### Browser
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
- Camofox hardening + connection stability across the window
### Execute Code
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
### Image Generation
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
### TTS / STT / Voice
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
### Webhook / Cron
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
### Delegate
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
### File / Patch
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
### API Server
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
### Docker / Podman
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
### MCP
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
---
## 🧩 Skills Ecosystem
### Skill System
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
### New Skills
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
---
## 📊 Web Dashboard
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
---
## 🖱️ CLI & User Experience
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
---
## 🔒 Security & Reliability
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
---
## 🐛 Notable Bug Fixes
The `fix:` category in this window covers 482 PRs. Highlights:
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
- Doctor checks for Kimi China credentials fixed
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
- Rapid Telegram follow-ups no longer get cut off
---
## 🧪 Testing & CI
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
---
## 📚 Documentation
- Atropos + wandb links in user guide
- ACP / VS Code / Zed / JetBrains integration docs refresh
- Webhook subscription docs updated for direct-delivery mode
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
- Transport layer developer guide added
- Website removed Discussions link from README
---
## 👥 Contributors
### Core
- **@teknium1** (Teknium)
### Top Community Contributors (by merged PR count)
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
- **@ethernet8023** — 3 PRs
- **@benbarclay** — 3 PRs
- **@Aslaaen** — 2 PRs
### Also contributing
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
### All Contributors (alphabetical)
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
---
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
-3
View File
@@ -63,9 +63,6 @@ def make_approval_callback(
logger.warning("Permission request timed out or failed: %s", exc)
return "deny"
if response is None:
return "deny"
outcome = response.outcome
if isinstance(outcome, AllowedOutcome):
option_id = outcome.option_id
+14 -74
View File
@@ -4,7 +4,6 @@ from __future__ import annotations
import asyncio
import logging
import os
from collections import defaultdict, deque
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Deque, Optional
@@ -52,7 +51,7 @@ try:
except ImportError:
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
from acp_adapter.auth import detect_provider
from acp_adapter.auth import detect_provider, has_provider
from acp_adapter.events import (
make_message_cb,
make_step_cb,
@@ -72,11 +71,6 @@ except Exception:
# Thread pool for running AIAgent (synchronous) in parallel.
_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
# does not expose a client-side limit, so this is a fixed cap that clients
# paginate against using `cursor` / `next_cursor`.
_LIST_SESSIONS_PAGE_SIZE = 50
def _extract_text(
prompt: list[
@@ -357,18 +351,9 @@ class HermesACPAgent(acp.Agent):
)
async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
# Only accept authenticate() calls whose method_id matches the
# provider we advertised in initialize(). Without this check,
# authenticate() would acknowledge any method_id as long as the
# server has provider credentials configured — harmless under
# Hermes' threat model (ACP is stdio-only, local-trust), but poor
# API hygiene and confusing if ACP ever grows multi-method auth.
provider = detect_provider()
if not provider:
return None
if not isinstance(method_id, str) or method_id.strip().lower() != provider:
return None
return AuthenticateResponse()
if has_provider():
return AuthenticateResponse()
return None
# ---- Session management -------------------------------------------------
@@ -452,28 +437,7 @@ class HermesACPAgent(acp.Agent):
cwd: str | None = None,
**kwargs: Any,
) -> ListSessionsResponse:
"""List ACP sessions with optional ``cwd`` filtering and cursor pagination.
``cwd`` is passed through to ``SessionManager.list_sessions`` which already
normalizes and filters by working directory. ``cursor`` is a ``session_id``
previously returned as ``next_cursor``; results resume after that entry.
Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
results remain, ``next_cursor`` is set to the last returned ``session_id``.
"""
infos = self.session_manager.list_sessions(cwd=cwd)
if cursor:
for idx, s in enumerate(infos):
if s["session_id"] == cursor:
infos = infos[idx + 1:]
break
else:
# Unknown cursor -> empty page (do not fall back to full list).
infos = []
has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
sessions = []
for s in infos:
updated_at = s.get("updated_at")
@@ -487,9 +451,7 @@ class HermesACPAgent(acp.Agent):
updated_at=updated_at,
)
)
next_cursor = sessions[-1].session_id if has_more and sessions else None
return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
return ListSessionsResponse(sessions=sessions)
# ---- Prompt (core) ------------------------------------------------------
@@ -555,32 +517,15 @@ class HermesACPAgent(acp.Agent):
agent.step_callback = step_cb
agent.message_callback = message_cb
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
# Set it INSIDE _run_agent so the TLS write happens in the executor
# thread — setting it here would write to the event-loop thread's TLS,
# not the executor's. Also set HERMES_INTERACTIVE so approval.py
# takes the CLI-interactive path (which calls the registered
# callback via prompt_dangerous_approval) instead of the
# non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
# ACP's conn.request_permission maps cleanly to the interactive
# callback shape — not the gateway-queue HERMES_EXEC_ASK path,
# which requires a notify_cb registered in _gateway_notify_cbs.
previous_approval_cb = None
previous_interactive = None
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
def _run_agent() -> dict:
nonlocal previous_approval_cb, previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = _terminal_tool._get_approval_callback()
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
# Signal to tools.approval that we have an interactive callback
# and the non-interactive auto-approve path must not fire.
previous_interactive = os.environ.get("HERMES_INTERACTIVE")
os.environ["HERMES_INTERACTIVE"] = "1"
try:
result = agent.run_conversation(
user_message=user_text,
@@ -592,11 +537,6 @@ class HermesACPAgent(acp.Agent):
logger.exception("Agent error in session %s", session_id)
return {"final_response": f"Error: {e}", "messages": state.history}
finally:
# Restore HERMES_INTERACTIVE.
if previous_interactive is None:
os.environ.pop("HERMES_INTERACTIVE", None)
else:
os.environ["HERMES_INTERACTIVE"] = previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
@@ -673,8 +613,8 @@ class HermesACPAgent(acp.Agent):
await self._conn.session_update(
session_id=session_id,
update=AvailableCommandsUpdate(
session_update="available_commands_update",
available_commands=self._available_commands(),
sessionUpdate="available_commands_update",
availableCommands=self._available_commands(),
),
)
except Exception:
-326
View File
@@ -1,326 +0,0 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Optional
import httpx
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
from hermes_cli.runtime_provider import resolve_runtime_provider
def _utc_now() -> datetime:
return datetime.now(timezone.utc)
@dataclass(frozen=True)
class AccountUsageWindow:
label: str
used_percent: Optional[float] = None
reset_at: Optional[datetime] = None
detail: Optional[str] = None
@dataclass(frozen=True)
class AccountUsageSnapshot:
provider: str
source: str
fetched_at: datetime
title: str = "Account limits"
plan: Optional[str] = None
windows: tuple[AccountUsageWindow, ...] = ()
details: tuple[str, ...] = ()
unavailable_reason: Optional[str] = None
@property
def available(self) -> bool:
return bool(self.windows or self.details) and not self.unavailable_reason
def _title_case_slug(value: Optional[str]) -> Optional[str]:
cleaned = str(value or "").strip()
if not cleaned:
return None
return cleaned.replace("_", " ").replace("-", " ").title()
def _parse_dt(value: Any) -> Optional[datetime]:
if value in (None, ""):
return None
if isinstance(value, (int, float)):
return datetime.fromtimestamp(float(value), tz=timezone.utc)
if isinstance(value, str):
text = value.strip()
if not text:
return None
if text.endswith("Z"):
text = text[:-1] + "+00:00"
try:
dt = datetime.fromisoformat(text)
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
except ValueError:
return None
return None
def _format_reset(dt: Optional[datetime]) -> str:
if not dt:
return "unknown"
local_dt = dt.astimezone()
delta = dt - _utc_now()
total_seconds = int(delta.total_seconds())
if total_seconds <= 0:
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
hours, rem = divmod(total_seconds, 3600)
minutes = rem // 60
if hours >= 24:
days, hours = divmod(hours, 24)
rel = f"in {days}d {hours}h"
elif hours > 0:
rel = f"in {hours}h {minutes}m"
else:
rel = f"in {minutes}m"
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
if not snapshot:
return []
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
lines = [header]
if snapshot.plan:
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
else:
lines.append(f"Provider: {snapshot.provider}")
for window in snapshot.windows:
if window.used_percent is None:
base = f"{window.label}: unavailable"
else:
remaining = max(0, round(100 - float(window.used_percent)))
used = max(0, round(float(window.used_percent)))
base = f"{window.label}: {remaining}% remaining ({used}% used)"
if window.reset_at:
base += f" • resets {_format_reset(window.reset_at)}"
elif window.detail:
base += f"{window.detail}"
lines.append(base)
for detail in snapshot.details:
lines.append(detail)
if snapshot.unavailable_reason:
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
return lines
def _resolve_codex_usage_url(base_url: str) -> str:
normalized = (base_url or "").strip().rstrip("/")
if not normalized:
normalized = "https://chatgpt.com/backend-api/codex"
if normalized.endswith("/codex"):
normalized = normalized[: -len("/codex")]
if "/backend-api" in normalized:
return normalized + "/wham/usage"
return normalized + "/api/codex/usage"
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
token_data = _read_codex_tokens()
tokens = token_data.get("tokens") or {}
account_id = str(tokens.get("account_id", "") or "").strip() or None
headers = {
"Authorization": f"Bearer {creds['api_key']}",
"Accept": "application/json",
"User-Agent": "codex-cli",
}
if account_id:
headers["ChatGPT-Account-Id"] = account_id
with httpx.Client(timeout=15.0) as client:
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
response.raise_for_status()
payload = response.json() or {}
rate_limit = payload.get("rate_limit") or {}
windows: list[AccountUsageWindow] = []
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
window = rate_limit.get(key) or {}
used = window.get("used_percent")
if used is None:
continue
windows.append(
AccountUsageWindow(
label=label,
used_percent=float(used),
reset_at=_parse_dt(window.get("reset_at")),
)
)
details: list[str] = []
credits = payload.get("credits") or {}
if credits.get("has_credits"):
balance = credits.get("balance")
if isinstance(balance, (int, float)):
details.append(f"Credits balance: ${float(balance):.2f}")
elif credits.get("unlimited"):
details.append("Credits balance: unlimited")
return AccountUsageSnapshot(
provider="openai-codex",
source="usage_api",
fetched_at=_utc_now(),
plan=_title_case_slug(payload.get("plan_type")),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
token = (resolve_anthropic_token() or "").strip()
if not token:
return None
if not _is_oauth_token(token):
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
)
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
"Content-Type": "application/json",
"anthropic-beta": "oauth-2025-04-20",
"User-Agent": "claude-code/2.1.0",
}
with httpx.Client(timeout=15.0) as client:
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
response.raise_for_status()
payload = response.json() or {}
windows: list[AccountUsageWindow] = []
mapping = (
("five_hour", "Current session"),
("seven_day", "Current week"),
("seven_day_opus", "Opus week"),
("seven_day_sonnet", "Sonnet week"),
)
for key, label in mapping:
window = payload.get(key) or {}
util = window.get("utilization")
if util is None:
continue
used = float(util) * 100 if float(util) <= 1 else float(util)
windows.append(
AccountUsageWindow(
label=label,
used_percent=used,
reset_at=_parse_dt(window.get("resets_at")),
)
)
details: list[str] = []
extra = payload.get("extra_usage") or {}
if extra.get("is_enabled"):
used_credits = extra.get("used_credits")
monthly_limit = extra.get("monthly_limit")
currency = extra.get("currency") or "USD"
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
details.append(
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
)
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
runtime = resolve_runtime_provider(
requested="openrouter",
explicit_base_url=base_url,
explicit_api_key=api_key,
)
token = str(runtime.get("api_key", "") or "").strip()
if not token:
return None
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
credits_url = f"{normalized}/credits"
key_url = f"{normalized}/key"
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
}
with httpx.Client(timeout=10.0) as client:
credits_resp = client.get(credits_url, headers=headers)
credits_resp.raise_for_status()
credits = (credits_resp.json() or {}).get("data") or {}
try:
key_resp = client.get(key_url, headers=headers)
key_resp.raise_for_status()
key_data = (key_resp.json() or {}).get("data") or {}
except Exception:
key_data = {}
total_credits = float(credits.get("total_credits") or 0.0)
total_usage = float(credits.get("total_usage") or 0.0)
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
windows: list[AccountUsageWindow] = []
limit = key_data.get("limit")
limit_remaining = key_data.get("limit_remaining")
limit_reset = str(key_data.get("limit_reset") or "").strip()
usage = key_data.get("usage")
if (
isinstance(limit, (int, float))
and float(limit) > 0
and isinstance(limit_remaining, (int, float))
and 0 <= float(limit_remaining) <= float(limit)
):
limit_value = float(limit)
remaining_value = float(limit_remaining)
used_percent = ((limit_value - remaining_value) / limit_value) * 100
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
if limit_reset:
detail_parts.append(f"resets {limit_reset}")
windows.append(
AccountUsageWindow(
label="API key quota",
used_percent=used_percent,
detail="".join(detail_parts),
)
)
if isinstance(usage, (int, float)):
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
for value, label in (
(key_data.get("usage_daily"), "today"),
(key_data.get("usage_weekly"), "this week"),
(key_data.get("usage_monthly"), "this month"),
):
if isinstance(value, (int, float)) and float(value) > 0:
usage_parts.append(f"${float(value):.2f} {label}")
details.append("".join(usage_parts))
return AccountUsageSnapshot(
provider="openrouter",
source="credits_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def fetch_account_usage(
provider: Optional[str],
*,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Optional[AccountUsageSnapshot]:
normalized = str(provider or "").strip().lower()
if normalized in {"", "auto", "custom"}:
return None
try:
if normalized == "openai-codex":
return _fetch_codex_account_usage()
if normalized == "anthropic":
return _fetch_anthropic_account_usage()
if normalized == "openrouter":
return _fetch_openrouter_account_usage(base_url, api_key)
except Exception:
return None
return None
+72 -146
View File
@@ -17,8 +17,8 @@ import os
from pathlib import Path
from hermes_constants import get_hermes_home
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from utils import normalize_proxy_env_vars
try:
import anthropic as _anthropic_sdk
@@ -116,63 +116,6 @@ def _get_anthropic_max_output(model: str) -> int:
return best_val
def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
"""Return ``value`` floored to a positive int, or ``None`` if it is not a
finite positive number. Ported from openclaw/openclaw#66664.
Anthropic's Messages API rejects ``max_tokens`` values that are 0,
negative, non-integer, or non-finite with HTTP 400. Python's ``or``
idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
negative ints and fractional floats (``-1``, ``0.5``) through to the
API, producing a user-visible failure instead of a local error.
"""
# Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
# silently become 1 and ``False`` doesn't become 0.
if isinstance(value, bool):
return None
if not isinstance(value, (int, float)):
return None
try:
import math
if not math.isfinite(value):
return None
except Exception:
return None
floored = int(value) # truncates toward zero for floats
return floored if floored > 0 else None
def _resolve_anthropic_messages_max_tokens(
requested,
model: str,
context_length: Optional[int] = None,
) -> int:
"""Resolve the ``max_tokens`` budget for an Anthropic Messages call.
Prefers ``requested`` when it is a positive finite number; otherwise
falls back to the model's output ceiling. Raises ``ValueError`` if no
positive budget can be resolved (should not happen with current model
table defaults, but guards against a future regression where
``_get_anthropic_max_output`` could return ``0``).
Separately, callers apply a context-window clamp — this resolver does
not, to keep the positive-value contract independent of endpoint
specifics.
Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
"""
resolved = _resolve_positive_anthropic_max_tokens(requested)
if resolved is not None:
return resolved
fallback = _get_anthropic_max_output(model)
if fallback > 0:
return fallback
raise ValueError(
f"Anthropic Messages adapter requires a positive max_tokens value for "
f"model {model!r}; got {requested!r} and no model default resolved."
)
def _supports_adaptive_thinking(model: str) -> bool:
"""Return True for Claude 4.6+ models that support adaptive thinking."""
return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -322,14 +265,6 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
return True # Any other endpoint is a third-party proxy
def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
"""Return True for Kimi's /coding endpoint that requires claude-code UA."""
normalized = _normalize_base_url_text(base_url)
if not normalized:
return False
return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
def _requires_bearer_auth(base_url: str | None) -> bool:
"""Return True for Anthropic-compatible providers that require Bearer auth.
@@ -373,9 +308,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
"The 'anthropic' package is required for the Anthropic provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
normalize_proxy_env_vars()
from httpx import Timeout
normalized_base_url = _normalize_base_url_text(base_url)
@@ -387,18 +319,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
kwargs["base_url"] = normalized_base_url
common_betas = _common_betas_for_base_url(normalized_base_url)
if _is_kimi_coding_endpoint(base_url):
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
# to be recognized as a valid Coding Agent. Without it, returns 403.
# Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
kwargs["api_key"] = api_key
kwargs["default_headers"] = {
"User-Agent": "claude-code/0.1.0",
**( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
}
elif _requires_bearer_auth(normalized_base_url):
if _requires_bearer_auth(normalized_base_url):
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
# Authorization: Bearer *** for regular API keys. Route those endpoints
# Authorization: Bearer even for regular API keys. Route those endpoints
# through auth_token so the SDK sends Bearer auth instead of x-api-key.
# Check this before OAuth token shape detection because MiniMax secrets do
# not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1139,31 +1062,6 @@ def convert_messages_to_anthropic(
"name": fn.get("name", ""),
"input": parsed_args,
})
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
# tool-call messages to carry reasoning_content when thinking is
# enabled server-side. Preserve it as a thinking block so Kimi
# can validate the message history. See hermes-agent#13848.
#
# Accept empty string "" — _copy_reasoning_content_for_api()
# injects "" as a tier-3 fallback for Kimi tool-call messages
# that had no reasoning. Kimi requires the field to exist, even
# if empty.
#
# Prepend (not append): Anthropic protocol requires thinking
# blocks before text and tool_use blocks.
#
# Guard: only add when reasoning_details didn't already contribute
# thinking blocks. On native Anthropic, reasoning_details produces
# signed thinking blocks — adding another unsigned one from
# reasoning_content would create a duplicate (same text) that gets
# downgraded to a spurious text block on the last assistant message.
reasoning_content = m.get("reasoning_content")
_already_has_thinking = any(
isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
for b in blocks
)
if isinstance(reasoning_content, str) and not _already_has_thinking:
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
# Anthropic rejects empty assistant content
effective = blocks or content
if not effective or effective == "":
@@ -1319,7 +1217,6 @@ def convert_messages_to_anthropic(
# cache markers can interfere with signature validation.
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
_is_kimi = _is_kimi_coding_endpoint(base_url)
last_assistant_idx = None
for i in range(len(result) - 1, -1, -1):
@@ -1331,25 +1228,7 @@ def convert_messages_to_anthropic(
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
continue
if _is_kimi:
# Kimi's /coding endpoint enables thinking server-side and
# requires unsigned thinking blocks on replayed assistant
# tool-call messages. Strip signed Anthropic blocks (Kimi
# can't validate signatures) but preserve the unsigned ones
# we synthesised from reasoning_content above.
new_content = []
for b in m["content"]:
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
new_content.append(b)
continue
if b.get("signature") or b.get("data"):
# Anthropic-signed block — Kimi can't validate, strip
continue
# Unsigned thinking (synthesised from reasoning_content) —
# keep it: Kimi needs it for message-history validation.
new_content.append(b)
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
elif _is_third_party or idx != last_assistant_idx:
if _is_third_party or idx != last_assistant_idx:
# Third-party endpoint: strip ALL thinking blocks from every
# assistant message — signatures are Anthropic-proprietary.
# Direct Anthropic: strip from non-latest assistant messages only.
@@ -1447,12 +1326,7 @@ def build_anthropic_kwargs(
model = normalize_model_name(model, preserve_dots=preserve_dots)
# effective_max_tokens = output cap for this call (≠ total context window)
# Use the resolver helper so non-positive values (negative ints,
# fractional floats, NaN, non-numeric) fail locally with a clear error
# rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
effective_max_tokens = _resolve_anthropic_messages_max_tokens(
max_tokens, model, context_length=context_length
)
effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
# Clamp output cap to fit inside the total context window.
# Only matters for small custom endpoints where context_length < native
@@ -1531,25 +1405,11 @@ def build_anthropic_kwargs(
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
#
# Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
# its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
# validates the message history and requires every prior assistant
# tool-call message to carry OpenAI-style ``reasoning_content``. The
# Anthropic path never populates that field, and
# ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
# on third-party endpoints — so the request fails with HTTP 400
# "thinking is enabled but reasoning_content is missing in assistant
# tool call message at index N". Kimi's reasoning is driven server-side
# on the /coding route, so skip Anthropic's thinking parameter entirely
# for that host. (Kimi on chat_completions enables thinking via
# extra_body in the ChatCompletionsTransport — see #13503.)
#
# On 4.7+ the `thinking.display` field defaults to "omitted", which
# silently hides reasoning text that Hermes surfaces in its CLI. We
# request "summarized" so the reasoning blocks stay populated — matching
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
effort = str(reasoning_config.get("effort", "medium")).lower()
budget = THINKING_BUDGET.get(effort, 8000)
@@ -1598,4 +1458,70 @@ def build_anthropic_kwargs(
return kwargs
def normalize_anthropic_response(
response,
strip_tool_prefix: bool = False,
) -> Tuple[SimpleNamespace, str]:
"""Normalize Anthropic response to match the shape expected by AIAgent.
Returns (assistant_message, finish_reason) where assistant_message has
.content, .tool_calls, and .reasoning attributes.
When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
added to tool names for OAuth Claude Code compatibility.
"""
text_parts = []
reasoning_parts = []
reasoning_details = []
tool_calls = []
for block in response.content:
if block.type == "text":
text_parts.append(block.text)
elif block.type == "thinking":
reasoning_parts.append(block.thinking)
block_dict = _to_plain_data(block)
if isinstance(block_dict, dict):
reasoning_details.append(block_dict)
elif block.type == "tool_use":
name = block.name
if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
name = name[len(_MCP_TOOL_PREFIX):]
tool_calls.append(
SimpleNamespace(
id=block.id,
type="function",
function=SimpleNamespace(
name=name,
arguments=json.dumps(block.input),
),
)
)
# Map Anthropic stop_reason to OpenAI finish_reason.
# Newer stop reasons added in Claude 4.5+ / 4.7:
# - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
# - model_context_window_exceeded: hit context limit (not max_tokens)
# Both need distinct handling upstream — a refusal should surface to the
# user with a clear message, and a context-window overflow should trigger
# compression/truncation rather than be treated as normal end-of-turn.
stop_reason_map = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
return (
SimpleNamespace(
content="\n".join(text_parts) if text_parts else None,
tool_calls=tool_calls or None,
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
reasoning_content=None,
reasoning_details=reasoning_details or None,
),
finish_reason,
)
+131 -320
View File
@@ -48,7 +48,6 @@ from openai import OpenAI
from agent.credential_pool import load_pool
from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
logger = logging.getLogger(__name__)
@@ -96,37 +95,84 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
return _PROVIDER_ALIASES.get(normalized, normalized)
# Sentinel: when returned by _fixed_temperature_for_model(), callers must
# strip the ``temperature`` key from API kwargs entirely so the provider's
# server-side default applies. Kimi/Moonshot models manage temperature
# internally — sending *any* value (even the "correct" one) can conflict
# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
OMIT_TEMPERATURE: object = object()
_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
"kimi-for-coding": 0.6,
}
# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
# value 0.6. Any other value will result in an error." The same lock applies
# to the other k2.* models served on that endpoint. Enumerated explicitly so
# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
# the standard chat API and third parties) are NOT clamped.
# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
_KIMI_INSTANT_MODELS: frozenset = frozenset({
"kimi-k2.5",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
})
_KIMI_THINKING_MODELS: frozenset = frozenset({
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
})
def _is_kimi_model(model: Optional[str]) -> bool:
"""True for any Kimi / Moonshot model that manages temperature server-side."""
bare = (model or "").strip().lower().rsplit("/", 1)[-1]
return bare.startswith("kimi-") or bare == "kimi"
# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different
# temperature contract than the Coding Plan endpoint above. Empirically,
# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400
# "invalid temperature: only 1 is allowed for this model" — the Coding Plan
# lock (0.6 for non-thinking) does not apply. `kimi-k2-turbo-preview` and the
# thinking variants already match the Coding Plan contract on the public
# endpoint, so we only override the models that diverge.
# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the
# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see
# hermes_cli/auth.py:_kimi_base_url_for_key).
_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = {
"kimi-k2.5": 1.0,
}
def _fixed_temperature_for_model(
model: Optional[str],
base_url: Optional[str] = None,
) -> "Optional[float] | object":
"""Return a temperature directive for models with strict contracts.
) -> Optional[float]:
"""Return a required temperature override for models with strict contracts.
Returns:
``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
provider chooses its own default. Used for all Kimi / Moonshot
models whose gateway selects temperature server-side.
``float`` — a specific value the caller must use (reserved for future
models with fixed-temperature contracts).
``None`` — no override; caller should use its own default.
Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
the k2.5 family. Non-thinking variants require exactly 0.6; thinking
variants require 1.0. An optional ``vendor/`` prefix (e.g.
``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
When ``base_url`` points to Moonshot's public chat endpoint
(``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public
API only accepts ``temperature=1``, not 0.6. That override takes precedence
over the Coding Plan defaults above.
Returns ``None`` for every other model, including ``kimi-k2-instruct*``
which is the separate non-coding K2 family with variable temperature.
"""
if _is_kimi_model(model):
logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
return OMIT_TEMPERATURE
normalized = (model or "").strip().lower()
bare = normalized.rsplit("/", 1)[-1]
# Public Moonshot API has a stricter contract for some models than the
# Coding Plan endpoint — check it first so it wins on conflict.
if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()):
public = _KIMI_PUBLIC_API_OVERRIDES.get(bare)
if public is not None:
logger.debug(
"Forcing temperature=%s for %r on public Moonshot API", public, model
)
return public
fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
if fixed is not None:
logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
return fixed
if bare in _KIMI_THINKING_MODELS:
logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
return 1.0
if bare in _KIMI_INSTANT_MODELS:
logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
return 0.6
return None
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@@ -134,7 +180,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
"zai": "glm-4.5-flash",
"kimi-coding": "kimi-k2-turbo-preview",
"stepfun": "step-3.5-flash",
"kimi-coding-cn": "kimi-k2-turbo-preview",
"minimax": "MiniMax-M2.7",
"minimax-cn": "MiniMax-M2.7",
@@ -151,7 +196,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
# differs from their main chat model, map it here. The vision auto-detect
# "exotic provider" branch checks this before falling back to the main model.
_PROVIDER_VISION_MODELS: Dict[str, str] = {
"xiaomi": "mimo-v2.5",
"xiaomi": "mimo-v2-omni",
"zai": "glm-5v-turbo",
}
@@ -162,16 +207,6 @@ _OR_HEADERS = {
"X-OpenRouter-Categories": "productivity,cli-agent",
}
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
# referrerUrl and X-Title maps to appName in the gateway's analytics.
from hermes_cli import __version__ as _HERMES_VERSION
_AI_GATEWAY_HEADERS = {
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
"X-Title": "Hermes Agent",
"User-Agent": f"HermesAgent/{_HERMES_VERSION}",
}
# Nous Portal extra_body for product attribution.
# Callers should pass this as extra_body in chat.completions.create()
# when the auxiliary client is backed by Nous Portal.
@@ -183,6 +218,8 @@ auxiliary_is_nous: bool = False
# Default auxiliary models per provider
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
_NOUS_MODEL = "google/gemini-3-flash-preview"
_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
_AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -573,8 +610,7 @@ class _AnthropicCompletionsAdapter:
self._is_oauth = is_oauth
def create(self, **kwargs) -> Any:
from agent.anthropic_adapter import build_anthropic_kwargs
from agent.transports import get_transport
from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
messages = kwargs.get("messages", [])
model = kwargs.get("model", self._model)
@@ -611,19 +647,7 @@ class _AnthropicCompletionsAdapter:
anthropic_kwargs["temperature"] = temperature
response = self._client.messages.create(**anthropic_kwargs)
_transport = get_transport("anthropic_messages")
_nr = _transport.normalize_response(
response, strip_tool_prefix=self._is_oauth
)
# ToolCall already duck-types as OpenAI shape (.type, .function.name,
# .function.arguments) via properties, so no wrapping needed.
assistant_message = SimpleNamespace(
content=_nr.content,
tool_calls=_nr.tool_calls,
reasoning=_nr.reasoning,
)
finish_reason = _nr.finish_reason
assistant_message, finish_reason = normalize_anthropic_response(response)
usage = None
if hasattr(response, "usage") and response.usage:
@@ -740,33 +764,6 @@ def _nous_base_url() -> str:
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
"""Return fresh Nous runtime credentials when available.
This mirrors the main agent's 401 recovery path and keeps auxiliary
clients aligned with the singleton auth store + mint flow instead of
relying only on whatever raw tokens happen to be sitting in auth.json
or the credential pool.
"""
try:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
force_mint=force_refresh,
)
except Exception as exc:
logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
return None
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
if not api_key or not base_url:
return None
return api_key, base_url
def _read_codex_access_token() -> Optional[str]:
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
@@ -856,9 +853,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if is_native_gemini_base_url(base_url):
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
if "api.kimi.com" in base_url.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
@@ -882,9 +879,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if is_native_gemini_base_url(base_url):
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
if "api.kimi.com" in base_url.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
@@ -916,19 +913,6 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
def _describe_openrouter_unavailable() -> str:
"""Return a more precise OpenRouter auth failure reason for logs."""
pool_present, entry = _select_pool_entry("openrouter")
if pool_present:
if entry is None:
return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
if not _pool_runtime_api_key(entry):
return "OpenRouter credential pool entry is missing a runtime API key"
if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
return "OPENROUTER_API_KEY not set"
return "no usable OpenRouter credentials found"
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
# Check cross-session rate limit guard before attempting Nous —
# if another session already recorded a 429, skip Nous entirely
@@ -946,50 +930,29 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
pass
nous = _read_nous_auth()
runtime = _resolve_nous_runtime_api(force_refresh=False)
if runtime is None and not nous:
if not nous:
return None, None
global auxiliary_is_nous
auxiliary_is_nous = True
logger.debug("Auxiliary client: Nous Portal")
# Ask the Portal which model it currently recommends for this task type.
# The /api/nous/recommended-models endpoint is the authoritative source:
# it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
# auto-detects the caller's tier via check_nous_free_tier(). Fall back to
# _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
# or returns a null recommendation for this task type.
model = _NOUS_MODEL
try:
from hermes_cli.models import get_nous_recommended_aux_model
recommended = get_nous_recommended_aux_model(vision=vision)
if recommended:
model = recommended
logger.debug(
"Auxiliary/%s: using Portal-recommended model %s",
"vision" if vision else "text", model,
)
else:
logger.debug(
"Auxiliary/%s: no Portal recommendation, falling back to %s",
"vision" if vision else "text", model,
)
except Exception as exc:
logger.debug(
"Auxiliary/%s: recommended-models lookup failed (%s); "
"falling back to %s",
"vision" if vision else "text", exc, model,
)
if runtime is not None:
api_key, base_url = runtime
if nous.get("source") == "pool":
model = "gemini-3-flash"
else:
api_key = _nous_api_key(nous or {})
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
model = _NOUS_MODEL
# Free-tier users can't use paid auxiliary models — use the free
# models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
try:
from hermes_cli.models import check_nous_free_tier
if check_nous_free_tier():
model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
model, "vision" if vision else "text")
except Exception:
pass
return (
OpenAI(
api_key=api_key,
base_url=base_url,
api_key=_nous_api_key(nous),
base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
),
model,
)
@@ -1067,7 +1030,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
return None, None, None
custom_base = custom_base.strip().rstrip("/")
if base_url_host_matches(custom_base, "openrouter.ai"):
if "openrouter.ai" in custom_base.lower():
# requested='custom' falls back to OpenRouter when no custom endpoint is
# configured. Treat that as "no custom endpoint" for auxiliary routing.
return None, None, None
@@ -1101,8 +1064,6 @@ def _validate_proxy_env_urls() -> None:
"""
from urllib.parse import urlparse
normalize_proxy_env_vars()
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = str(os.environ.get(key) or "").strip()
@@ -1333,15 +1294,6 @@ def _is_connection_error(exc: Exception) -> bool:
return False
def _is_auth_error(exc: Exception) -> bool:
"""Detect auth failures that should trigger provider-specific refresh."""
status = getattr(exc, "status_code", None)
if status == 401:
return True
err_lower = str(exc).lower()
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
def _try_payment_fallback(
failed_provider: str,
task: str = None,
@@ -1517,15 +1469,15 @@ def _to_async_client(sync_client, model: str):
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
sync_base_url = str(sync_client.base_url)
if base_url_host_matches(sync_base_url, "openrouter.ai"):
base_lower = str(sync_client.base_url).lower()
if "openrouter" in base_lower:
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_lower:
from hermes_cli.models import copilot_default_headers
async_kwargs["default_headers"] = copilot_default_headers()
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif "api.kimi.com" in base_lower:
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
return AsyncOpenAI(**async_kwargs), model
@@ -1601,7 +1553,8 @@ def resolve_provider_client(
# Auto-detect: api.openai.com + codex model name pattern
if api_mode and api_mode != "codex_responses":
return False # explicit non-codex mode
if base_url_hostname(base_url_str) == "api.openai.com":
normalized_base = (base_url_str or "").strip().lower()
if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
model_lower = (model_str or "").lower()
if "codex" in model_lower:
return True
@@ -1640,10 +1593,8 @@ def resolve_provider_client(
if provider == "openrouter":
client, default = _try_openrouter()
if client is None:
logger.warning(
"resolve_provider_client: openrouter requested but %s",
_describe_openrouter_unavailable(),
)
logger.warning("resolve_provider_client: openrouter requested "
"but OPENROUTER_API_KEY not set")
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model) if async_mode
@@ -1651,13 +1602,7 @@ def resolve_provider_client(
# ── Nous Portal (OAuth) ──────────────────────────────────────────
if provider == "nous":
# Detect vision tasks: either explicit model override from
# _PROVIDER_VISION_MODELS, or caller passed a known vision model.
_is_vision = (
model in _PROVIDER_VISION_MODELS.values()
or (model or "").strip().lower() == "mimo-v2-omni"
)
client, default = _try_nous(vision=_is_vision)
client, default = _try_nous()
if client is None:
logger.warning("resolve_provider_client: nous requested "
"but Nous Portal not configured (run: hermes auth)")
@@ -1713,9 +1658,9 @@ def resolve_provider_client(
provider,
)
extra = {}
if base_url_host_matches(custom_base, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
if "api.kimi.com" in custom_base.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif "api.githubcopilot.com" in custom_base.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
@@ -1820,9 +1765,9 @@ def resolve_provider_client(
# Provider-specific headers
headers = {}
if base_url_host_matches(base_url, "api.kimi.com"):
headers["User-Agent"] = "claude-code/0.1.0"
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
if "api.kimi.com" in base_url.lower():
headers["User-Agent"] = "KimiCLI/1.30.0"
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
headers.update(copilot_default_headers())
@@ -2053,35 +1998,24 @@ def resolve_vision_provider_client(
# _PROVIDER_VISION_MODELS provides per-provider vision model
# overrides when the provider has a dedicated multimodal model
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
# zai → glm-5v-turbo). Nous is the exception: it has a dedicated
# strict vision backend with tier-aware defaults, so it must not
# fall through to the user's text chat model here.
# zai → glm-5v-turbo).
# 2. OpenRouter (vision-capable aggregator fallback)
# 3. Nous Portal (vision-capable aggregator fallback)
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
if main_provider == "nous":
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, default_model or resolved_model or main_model,
)
return _finalize(main_provider, sync_client, default_model)
else:
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
# Fall back through aggregators (uses their dedicated vision model,
# not the user's main model) when main provider has no client.
@@ -2128,7 +2062,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Only use max_completion_tokens for direct OpenAI custom endpoints
if (not or_key
and _read_nous_auth() is None
and base_url_hostname(custom_base) == "api.openai.com"):
and "api.openai.com" in custom_base.lower()):
return {"max_completion_tokens": value}
return {"max_tokens": value}
@@ -2156,76 +2090,6 @@ _client_cache_lock = threading.Lock()
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
def _client_cache_key(
provider: str,
*,
async_mode: bool,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
) -> tuple:
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
with _client_cache_lock:
old_entry = _client_cache.get(cache_key)
if old_entry is not None and old_entry[0] is not client:
_force_close_async_httpx(old_entry[0])
try:
close_fn = getattr(old_entry[0], "close", None)
if callable(close_fn):
close_fn()
except Exception:
pass
_client_cache[cache_key] = (client, default_model, bound_loop)
def _refresh_nous_auxiliary_client(
*,
cache_provider: str,
model: Optional[str],
async_mode: bool,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
) -> Tuple[Optional[Any], Optional[str]]:
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
runtime = _resolve_nous_runtime_api(force_refresh=True)
if runtime is None:
return None, model
fresh_key, fresh_base_url = runtime
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
final_model = model
current_loop = None
if async_mode:
try:
import asyncio as _aio
current_loop = _aio.get_event_loop()
except RuntimeError:
pass
client, final_model = _to_async_client(sync_client, final_model or "")
else:
client = sync_client
cache_key = _client_cache_key(
cache_provider,
async_mode=async_mode,
base_url=base_url,
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
)
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
return client, final_model
def neuter_async_httpx_del() -> None:
"""Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
@@ -2327,7 +2191,7 @@ def cleanup_stale_async_clients() -> None:
def _is_openrouter_client(client: Any) -> bool:
for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
return True
return False
@@ -2379,14 +2243,8 @@ def _get_cached_client(
except RuntimeError:
pass
runtime = _normalize_main_runtime(main_runtime)
cache_key = _client_cache_key(
provider,
async_mode=async_mode,
base_url=base_url,
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
with _client_cache_lock:
if cache_key in _client_cache:
cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2617,9 +2475,7 @@ def _build_call_kwargs(
}
fixed_temperature = _fixed_temperature_for_model(model, base_url)
if fixed_temperature is OMIT_TEMPERATURE:
temperature = None # strip — let server choose
elif fixed_temperature is not None:
if fixed_temperature is not None:
temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
@@ -2639,7 +2495,7 @@ def _build_call_kwargs(
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom":
custom_base = base_url or _current_custom_base_url()
if base_url_hostname(custom_base) == "api.openai.com":
if "api.openai.com" in custom_base.lower():
kwargs["max_completion_tokens"] = max_tokens
else:
kwargs["max_tokens"] = max_tokens
@@ -2834,29 +2690,6 @@ def call_llm(
raise
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
)
if _is_auth_error(first_err) and client_is_nous:
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
cache_provider=resolved_provider or "nous",
model=final_model,
async_mode=False,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
main_runtime=main_runtime,
)
if refreshed_client is not None:
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
task or "call")
if refreshed_model and refreshed_model != kwargs.get("model"):
kwargs["model"] = refreshed_model
return _validate_llm_response(
refreshed_client.chat.completions.create(**kwargs), task)
# ── Payment / credit exhaustion fallback ──────────────────────
# When the resolved provider returns 402 or a credit-related error,
# try alternative providers instead of giving up. This handles the
@@ -3055,28 +2888,6 @@ async def async_call_llm(
raise
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
)
if _is_auth_error(first_err) and client_is_nous:
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
cache_provider=resolved_provider or "nous",
model=final_model,
async_mode=True,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
)
if refreshed_client is not None:
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
task or "call")
if refreshed_model and refreshed_model != kwargs.get("model"):
kwargs["model"] = refreshed_model
return _validate_llm_response(
await refreshed_client.chat.completions.create(**kwargs), task)
# ── Payment / connection fallback (mirrors sync call_llm) ─────
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
is_auto = resolved_provider in ("auto", "", None)
+8 -171
View File
@@ -22,98 +22,6 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Multimodal content helpers
# ---------------------------------------------------------------------------
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
"""Convert chat-style multimodal content to Responses API input parts.
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
Returns an empty list when ``content`` is not a list or contains no
recognized parts — callers fall back to the string path.
"""
if not isinstance(content, list):
return []
converted: List[Dict[str, Any]] = []
for part in content:
if isinstance(part, str):
if part:
converted.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
continue
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
converted.append({"type": "input_text", "text": text})
continue
if ptype in {"image_url", "input_image"}:
image_ref = part.get("image_url")
detail = part.get("detail")
if isinstance(image_ref, dict):
url = image_ref.get("url")
detail = image_ref.get("detail", detail)
else:
url = image_ref
if not isinstance(url, str) or not url:
continue
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
if isinstance(detail, str) and detail.strip():
image_part["detail"] = detail.strip()
converted.append(image_part)
return converted
def _summarize_user_message_for_log(content: Any) -> str:
"""Return a short text summary of a user message for logging/trajectory.
Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
parts from the API server. Logging, spinner previews, and trajectory
files all want a plain string — this helper extracts the first chunk of
text and notes any attached images. Returns an empty string for empty
lists and ``str(content)`` for unexpected scalar types.
"""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
text_bits: List[str] = []
image_count = 0
for part in content:
if isinstance(part, str):
if part:
text_bits.append(part)
continue
if not isinstance(part, dict):
continue
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
text_bits.append(text)
elif ptype in {"image_url", "input_image"}:
image_count += 1
summary = " ".join(text_bits).strip()
if image_count:
note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
summary = f"{note} {summary}" if summary else note
return summary
try:
return str(content)
except Exception:
return ""
# ---------------------------------------------------------------------------
# ID helpers
# ---------------------------------------------------------------------------
def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
"""Generate a deterministic call_id from tool call content.
@@ -172,17 +80,14 @@ def _derive_responses_function_call_id(
return f"fc_{digest}"
# ---------------------------------------------------------------------------
# Schema conversion
# ---------------------------------------------------------------------------
def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
"""Convert chat-completions tool schemas to Responses function-tool schemas."""
if not tools:
source_tools = tools
if not source_tools:
return None
converted: List[Dict[str, Any]] = []
for item in tools:
for item in source_tools:
fn = item.get("function", {}) if isinstance(item, dict) else {}
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
@@ -197,10 +102,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
return converted or None
# ---------------------------------------------------------------------------
# Message format conversion
# ---------------------------------------------------------------------------
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
items: List[Dict[str, Any]] = []
@@ -215,14 +116,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
if role in {"user", "assistant"}:
content = msg.get("content", "")
if isinstance(content, list):
content_parts = _chat_content_to_responses_parts(content)
content_text = "".join(
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
)
else:
content_parts = []
content_text = str(content) if content is not None else ""
content_text = str(content) if content is not None else ""
if role == "assistant":
# Replay encrypted reasoning items from previous turns
@@ -245,9 +139,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
seen_item_ids.add(item_id)
has_codex_reasoning = True
if content_parts:
items.append({"role": "assistant", "content": content_parts})
elif content_text.strip():
if content_text.strip():
items.append({"role": "assistant", "content": content_text})
elif has_codex_reasoning:
# The Responses API requires a following item after each
@@ -300,12 +192,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
})
continue
# Non-assistant (user) role: emit multimodal parts when present,
# otherwise fall back to the text payload.
if content_parts:
items.append({"role": role, "content": content_parts})
else:
items.append({"role": role, "content": content_text})
items.append({"role": role, "content": content_text})
continue
if role == "tool":
@@ -325,10 +212,6 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
return items
# ---------------------------------------------------------------------------
# Input preflight / validation
# ---------------------------------------------------------------------------
def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
if not isinstance(raw_items, list):
raise ValueError("Codex Responses input must be a list of input items.")
@@ -410,46 +293,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
content = item.get("content", "")
if content is None:
content = ""
if isinstance(content, list):
# Multimodal content from ``_chat_messages_to_responses_input``
# is already in Responses format (``input_text`` / ``input_image``).
# Validate each part and pass through.
validated: List[Dict[str, Any]] = []
for part_idx, part in enumerate(content):
if isinstance(part, str):
if part:
validated.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
raise ValueError(
f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
)
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"input_text", "text", "output_text"}:
text = part.get("text", "")
if not isinstance(text, str):
text = str(text or "")
validated.append({"type": "input_text", "text": text})
elif ptype in {"input_image", "image_url"}:
image_ref = part.get("image_url", "")
detail = part.get("detail")
if isinstance(image_ref, dict):
url = image_ref.get("url", "")
detail = image_ref.get("detail", detail)
else:
url = image_ref
if not isinstance(url, str):
url = str(url or "")
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
if isinstance(detail, str) and detail.strip():
image_part["detail"] = detail.strip()
validated.append(image_part)
else:
raise ValueError(
f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
)
normalized.append({"role": role, "content": validated})
continue
if not isinstance(content, str):
content = str(content)
@@ -606,10 +449,6 @@ def _preflight_codex_api_kwargs(
return normalized
# ---------------------------------------------------------------------------
# Response extraction helpers
# ---------------------------------------------------------------------------
def _extract_responses_message_text(item: Any) -> str:
"""Extract assistant text from a Responses message output item."""
content = getattr(item, "content", None)
@@ -644,10 +483,6 @@ def _extract_responses_reasoning_text(item: Any) -> str:
return ""
# ---------------------------------------------------------------------------
# Full response normalization
# ---------------------------------------------------------------------------
def _normalize_codex_response(response: Any) -> tuple[Any, str]:
"""Normalize a Responses API object to an assistant_message-like object."""
output = getattr(response, "output", None)
@@ -811,3 +646,5 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
else:
finish_reason = "stop"
return assistant_message, finish_reason
+13 -71
View File
@@ -31,7 +31,6 @@ from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
from agent.redact import redact_sensitive_text
logger = logging.getLogger(__name__)
@@ -64,47 +63,6 @@ _CHARS_PER_TOKEN = 4
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _content_text_for_contains(content: Any) -> str:
"""Return a best-effort text view of message content.
Used only for substring checks when we need to know whether we've already
appended a note to a message. Keeps multimodal lists intact elsewhere.
"""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str):
parts.append(text)
return "\n".join(part for part in parts if part)
return str(content)
def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
"""Append or prepend plain text to message content safely.
Compression sometimes needs to add a note or merge a summary into an
existing message. Message content may be plain text or a multimodal list of
blocks, so direct string concatenation is not always safe.
"""
if content is None:
return text
if isinstance(content, str):
return text + content if prepend else content + text
if isinstance(content, list):
text_block = {"type": "text", "text": text}
return [text_block, *content] if prepend else [*content, text_block]
rendered = str(content)
return text + rendered if prepend else rendered + text
def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
"""Shrink long string values inside a tool-call arguments JSON blob while
preserving JSON validity.
@@ -592,15 +550,11 @@ class ContextCompressor(ContextEngine):
Includes tool call arguments and result content (up to
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
specific details like file paths, commands, and outputs.
All content is redacted before serialization to prevent secrets
(API keys, tokens, passwords) from leaking into the summary that
gets sent to the auxiliary model and persisted across compactions.
"""
parts = []
for msg in turns:
role = msg.get("role", "unknown")
content = redact_sensitive_text(msg.get("content") or "")
content = msg.get("content") or ""
# Tool results: keep enough content for the summarizer
if role == "tool":
@@ -621,7 +575,7 @@ class ContextCompressor(ContextEngine):
if isinstance(tc, dict):
fn = tc.get("function", {})
name = fn.get("name", "?")
args = redact_sensitive_text(fn.get("arguments", ""))
args = fn.get("arguments", "")
# Truncate long arguments but keep enough for context
if len(args) > self._TOOL_ARGS_MAX:
args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -681,11 +635,7 @@ class ContextCompressor(ContextEngine):
"only output the structured summary. "
"Do NOT include any preamble, greeting, or prefix. "
"Write the summary in the same language the user was using in the "
"conversation — do not translate or switch to English. "
"NEVER include API keys, tokens, passwords, secrets, credentials, "
"or connection strings in the summary — replace any that appear "
"with [REDACTED]. Note that the user had credentials present, but "
"do not preserve their values."
"conversation — do not translate or switch to English."
)
# Shared structured template (used by both paths).
@@ -742,7 +692,7 @@ Be specific with file paths, commands, line numbers, and results.]
[What remains to be done — framed as context, not instructions]
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
@@ -782,7 +732,7 @@ Use this exact structure:
prompt += f"""
FOCUS TOPIC: "{focus_topic}"
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
try:
call_kwargs = {
@@ -805,9 +755,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Handle cases where content is not a string (e.g., dict from llama.cpp)
if not isinstance(content, str):
content = str(content) if content else ""
# Redact the summary output as well — the summarizer LLM may
# ignore prompt instructions and echo back secrets verbatim.
summary = redact_sensitive_text(content.strip())
summary = content.strip()
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0
@@ -848,7 +796,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
)
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately
return self._generate_summary(messages, summary_budget) # retry immediately
# Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60
@@ -1185,13 +1133,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system":
existing = msg.get("content")
existing = msg.get("content") or ""
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
if _compression_note not in _content_text_for_contains(existing):
msg["content"] = _append_text_to_content(
existing,
"\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
)
if _compression_note not in existing:
msg["content"] = existing + "\n\n" + _compression_note
compressed.append(msg)
# If LLM summary failed, insert a static fallback so the model
@@ -1235,15 +1180,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
for i in range(compress_end, n_messages):
msg = messages[i].copy()
if _merge_summary_into_tail and i == compress_end:
merged_prefix = (
original = msg.get("content") or ""
msg["content"] = (
summary
+ "\n\n--- END OF CONTEXT SUMMARY — "
"respond to the message below, not the summary above ---\n\n"
)
msg["content"] = _append_text_to_content(
msg.get("content"),
merged_prefix,
prepend=True,
+ original
)
_merge_summary_into_tail = False
compressed.append(msg)
+9 -27
View File
@@ -21,9 +21,6 @@ from pathlib import Path
from types import SimpleNamespace
from typing import Any
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
@@ -57,18 +54,6 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
}
def _permission_denied(message_id: Any) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "cancelled",
}
},
}
def _format_messages_as_prompt(
messages: list[dict[str, Any]],
model: str | None = None,
@@ -401,8 +386,6 @@ class CopilotACPClient:
stderr_tail: deque[str] = deque(maxlen=40)
def _stdout_reader() -> None:
if proc.stdout is None:
return
for line in proc.stdout:
try:
inbox.put(json.loads(line))
@@ -550,13 +533,18 @@ class CopilotACPClient:
params = msg.get("params") or {}
if method == "session/request_permission":
response = _permission_denied(message_id)
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "allow_once",
}
},
}
elif method == "fs/read_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
block_error = get_read_block_error(str(path))
if block_error:
raise PermissionError(block_error)
content = path.read_text() if path.exists() else ""
line = params.get("line")
limit = params.get("limit")
@@ -565,8 +553,6 @@ class CopilotACPClient:
start = line - 1
end = start + limit if isinstance(limit, int) and limit > 0 else None
content = "".join(lines[start:end])
if content:
content = redact_sensitive_text(content)
response = {
"jsonrpc": "2.0",
"id": message_id,
@@ -579,10 +565,6 @@ class CopilotACPClient:
elif method == "fs/write_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
if is_write_denied(str(path)):
raise PermissionError(
f"Write denied: '{path}' is a protected system/credential file."
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(params.get("content") or ""))
response = {
+69 -91
View File
@@ -983,14 +983,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
active_sources: Set[str] = set()
auth_store = _load_auth_store()
# Shared suppression gate — used at every upsert site so
# `hermes auth remove <provider> <N>` is stable across all source types.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "anthropic":
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
# when the user has explicitly configured anthropic as their provider.
@@ -1010,8 +1002,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
("claude_code", read_claude_code_credentials()),
):
if creds and creds.get("accessToken"):
if _is_suppressed(provider, source_name):
continue
# Check if user explicitly removed this source
try:
from hermes_cli.auth import is_source_suppressed
if is_source_suppressed(provider, source_name):
continue
except ImportError:
pass
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
@@ -1029,7 +1026,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
elif provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state and not _is_suppressed(provider, "device_code"):
if state:
active_sources.add("device_code")
# Prefer a user-supplied label embedded in the singleton state
# (set by persist_nous_credentials(label=...) when the user ran
@@ -1070,21 +1067,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token, source = resolve_copilot_token()
if token:
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
except Exception as exc:
logger.debug("Copilot token seed failed: %s", exc)
@@ -1100,21 +1096,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token = creds.get("api_key", "")
if token:
source_name = creds.get("source", "qwen-cli")
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
except Exception as exc:
logger.debug("Qwen OAuth token seed failed: %s", exc)
@@ -1123,7 +1118,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
# the device_code source as suppressed so it won't be re-seeded from
# the Hermes auth store. Without this gate the removal is instantly
# undone on the next load_pool() call.
if _is_suppressed(provider, "device_code"):
codex_suppressed = False
try:
from hermes_cli.auth import is_source_suppressed
codex_suppressed = is_source_suppressed(provider, "device_code")
except ImportError:
pass
if codex_suppressed:
return changed, active_sources
state = _load_provider_state(auth_store, "openai-codex")
@@ -1157,22 +1158,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
changed = False
active_sources: Set[str] = set()
# Honour user suppression — `hermes auth remove <provider> <N>` for an
# env-seeded credential marks the env:<VAR> source as suppressed so it
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
# Without this gate the removal is silently undone on the next
# load_pool() call whenever the var is still exported by the shell.
try:
from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
except ImportError:
def _is_source_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "openrouter":
token = os.getenv("OPENROUTER_API_KEY", "").strip()
if token:
source = "env:OPENROUTER_API_KEY"
if _is_source_suppressed(provider, source):
return changed, active_sources
active_sources.add(source)
changed |= _upsert_entry(
entries,
@@ -1209,8 +1198,6 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
if not token:
continue
source = f"env:{env_var}"
if _is_source_suppressed(provider, source):
continue
active_sources.add(source)
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
base_url = env_url or pconfig.inference_base_url
@@ -1255,13 +1242,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
changed = False
active_sources: Set[str] = set()
# Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
# Seed from the custom_providers config entry's api_key field
cp_config = _get_custom_provider_config(pool_key)
if cp_config:
@@ -1270,20 +1250,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
name = str(cp_config.get("name") or "").strip()
if api_key:
source = f"config:{name}"
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
# Seed from model.api_key if model.provider=='custom' and model.base_url matches
try:
@@ -1303,20 +1282,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
matched_key = get_custom_provider_pool_key(model_base_url)
if matched_key == pool_key:
source = "model_config"
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
except Exception:
pass
-401
View File
@@ -1,401 +0,0 @@
"""Unified removal contract for every credential source Hermes reads from.
Hermes seeds its credential pool from many places:
env:<VAR> — os.environ / ~/.hermes/.env
claude_code — ~/.claude/.credentials.json
hermes_pkce — ~/.hermes/.anthropic_oauth.json
device_code — auth.json providers.<provider> (nous, openai-codex, ...)
qwen-cli — ~/.qwen/oauth_creds.json
gh_cli — gh auth token
config:<name> — custom_providers config entry
model_config — model.api_key when model.provider == "custom"
manual — user ran `hermes auth add`
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
(which keep their existing shape — we haven't restructured them). What we
unify here is **removal**:
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
Before this module, every source had an ad-hoc removal branch in
``auth_remove_command``, and several sources had no branch at all — so
``auth remove`` silently reverted on the next ``load_pool()`` call for
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
custom-config sources.
Now every source registers a ``RemovalStep`` that does exactly three things
in the same shape:
1. Clean up whatever externally-readable state the source reads from
(.env line, auth.json block, OAuth file, etc.)
2. Suppress the ``(provider, source_id)`` in auth.json so the
corresponding ``_seed_from_*`` branch skips the upsert on re-load
3. Return ``RemovalResult`` describing what was cleaned and any
diagnostic hints the user should see (shell-exported env vars,
external credential files we deliberately don't delete, etc.)
Adding a new credential source is:
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
- gate that reader behind ``is_source_suppressed(provider, source_id)``
- register a ``RemovalStep`` here
No more per-source if/elif chain in ``auth_remove_command``.
"""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, List, Optional
@dataclass
class RemovalResult:
"""Outcome of removing a credential source.
Attributes:
cleaned: Short strings describing external state that was actually
mutated (``"Cleared XAI_API_KEY from .env"``,
``"Cleared openai-codex OAuth tokens from auth store"``).
Printed as plain lines to the user.
hints: Diagnostic lines ABOUT state the user may need to clean up
themselves or is deliberately left intact (shell-exported env
var, Claude Code credential file we don't delete, etc.).
Printed as plain lines to the user. Always non-destructive.
suppress: Whether to call ``suppress_credential_source`` after
cleanup so future ``load_pool`` calls skip this source.
Default True — almost every source needs this to stay sticky.
The only legitimate False is ``manual`` entries, which aren't
seeded from anywhere external.
"""
cleaned: List[str] = field(default_factory=list)
hints: List[str] = field(default_factory=list)
suppress: bool = True
@dataclass
class RemovalStep:
"""How to remove one specific credential source cleanly.
Attributes:
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
Special value ``"*"`` means "matches any provider" — used for
sources like ``manual`` that aren't provider-specific.
source_id: Source identifier as it appears in
``PooledCredential.source``. May be a literal (``"claude_code"``)
or a prefix pattern matched via ``match_fn``.
match_fn: Optional predicate overriding literal ``source_id``
matching. Gets the removed entry's source string. Used for
``env:*`` (any env-seeded key), ``config:*`` (any custom
pool), and ``manual:*`` (any manual-source variant).
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
actual cleanup and returns what happened for the user.
description: One-line human-readable description for docs / tests.
"""
provider: str
source_id: str
remove_fn: Callable[..., RemovalResult]
match_fn: Optional[Callable[[str], bool]] = None
description: str = ""
def matches(self, provider: str, source: str) -> bool:
if self.provider != "*" and self.provider != provider:
return False
if self.match_fn is not None:
return self.match_fn(source)
return source == self.source_id
_REGISTRY: List[RemovalStep] = []
def register(step: RemovalStep) -> RemovalStep:
_REGISTRY.append(step)
return step
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
"""Return the first matching RemovalStep, or None if unregistered.
Unregistered sources fall through to the default remove path in
``auth_remove_command``: the pool entry is already gone (that happens
before dispatch), no external cleanup, no suppression. This is the
correct behaviour for ``manual`` entries — they were only ever stored
in the pool, nothing external to clean up.
"""
for step in _REGISTRY:
if step.matches(provider, source):
return step
return None
# ---------------------------------------------------------------------------
# Individual RemovalStep implementations — one per source.
# ---------------------------------------------------------------------------
# Each remove_fn is intentionally small and single-purpose. Adding a new
# credential source means adding ONE entry here — no other changes to
# auth_remove_command.
def _remove_env_source(provider: str, removed) -> RemovalResult:
"""env:<VAR> — the most common case.
Handles three user situations:
1. Var lives only in ~/.hermes/.env → clear it
2. Var lives only in the user's shell (shell profile, systemd
EnvironmentFile, launchd plist) → hint them where to unset it
3. Var lives in both → clear from .env, hint about shell
"""
from hermes_cli.config import get_env_path, remove_env_value
result = RemovalResult()
env_var = removed.source[len("env:"):]
if not env_var:
return result
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
env_in_process = bool(os.getenv(env_var))
env_in_dotenv = False
try:
env_path = get_env_path()
if env_path.exists():
env_in_dotenv = any(
line.strip().startswith(f"{env_var}=")
for line in env_path.read_text(errors="replace").splitlines()
)
except OSError:
pass
shell_exported = env_in_process and not env_in_dotenv
cleared = remove_env_value(env_var)
if cleared:
result.cleaned.append(f"Cleared {env_var} from .env")
if shell_exported:
result.hints.extend([
f"Note: {env_var} is still set in your shell environment "
f"(not in ~/.hermes/.env).",
" Unset it there (shell profile, systemd EnvironmentFile, "
"launchd plist, etc.) or it will keep being visible to Hermes.",
f" The pool entry is now suppressed — Hermes will ignore "
f"{env_var} until you run `hermes auth add {provider}`.",
])
else:
result.hints.append(
f"Suppressed env:{env_var} — it will not be re-seeded even "
f"if the variable is re-exported later."
)
return result
def _remove_claude_code(provider: str, removed) -> RemovalResult:
"""~/.claude/.credentials.json is owned by Claude Code itself.
We don't delete it — the user's Claude Code install still needs to
work. We just suppress it so Hermes stops reading it.
"""
return RemovalResult(hints=[
"Suppressed claude_code credential — it will not be re-seeded.",
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
"Run `hermes auth add anthropic` to re-enable if needed.",
])
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
from hermes_constants import get_hermes_home
result = RemovalResult()
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
try:
oauth_file.unlink()
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
except OSError as exc:
result.hints.append(f"Could not delete {oauth_file}: {exc}")
return result
def _clear_auth_store_provider(provider: str) -> bool:
"""Delete auth_store.providers[provider]. Returns True if deleted."""
from hermes_cli.auth import (
_auth_store_lock,
_load_auth_store,
_save_auth_store,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
return True
return False
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
We suppress in addition to clearing because nothing else stops the
user's next `hermes login` run from writing providers.nous again
before they decide to. Suppression forces them to go through
`hermes auth add nous` to re-engage, which is the documented re-add
path and clears the suppression atomically.
"""
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
return result
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
refresh_codex_oauth_pure() writes both every time, so clearing only
the Hermes auth store is not enough — _seed_from_singletons() would
re-import from ~/.codex/auth.json on the next load_pool() call and
the removal would be instantly undone. We suppress instead of
deleting Codex CLI's file, so the Codex CLI itself keeps working.
The canonical source name in ``_seed_from_singletons`` is
``"device_code"`` (no prefix). Entries may show up in the pool as
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
via ``hermes auth add openai-codex``), but in both cases the re-seed
gate lives at the ``"device_code"`` suppression key. We suppress
that canonical key here; the central dispatcher also suppresses
``removed.source`` which is fine — belt-and-suspenders, idempotent.
"""
from hermes_cli.auth import suppress_credential_source
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
# Suppress the canonical re-seed source, not just whatever source the
# removed entry had. Otherwise `manual:device_code` removals wouldn't
# block the `device_code` re-seed path.
suppress_credential_source(provider, "device_code")
result.hints.extend([
"Suppressed openai-codex device_code source — it will not be re-seeded.",
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
"Run `hermes auth add openai-codex` to re-enable if needed.",
])
return result
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
Same pattern as claude_code — suppress, don't delete. The user's
Qwen CLI install still reads from that file.
"""
return RemovalResult(hints=[
"Suppressed qwen-cli credential — it will not be re-seeded.",
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
])
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
Copilot is special: the same token can be seeded as multiple source
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
``_seed_from_env``), so removing one entry without suppressing the
others lets the duplicates resurrect. We suppress ALL known copilot
sources here so removal is stable regardless of which entry the
user clicked.
We don't touch the user's gh CLI or shell state — just suppress so
Hermes stops picking the token up.
"""
# Suppress ALL copilot source variants up-front so no path resurrects
# the pool entry. The central dispatcher in auth_remove_command will
# ALSO suppress removed.source, but it's idempotent so double-calling
# is harmless.
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "gh_cli")
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
suppress_credential_source(provider, f"env:{env_var}")
return RemovalResult(hints=[
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
"Note: Your gh CLI / shell environment is unchanged.",
"Run `hermes auth add copilot` to re-enable if needed.",
])
def _remove_custom_config(provider: str, removed) -> RemovalResult:
"""Custom provider pools are seeded from custom_providers config or
model.api_key. Both are in config.yaml — modifying that from here
is more invasive than suppression. We suppress; the user can edit
config.yaml if they want to remove the key from disk entirely.
"""
source_label = removed.source
return RemovalResult(hints=[
f"Suppressed {source_label} — it will not be re-seeded.",
"Note: The underlying value in config.yaml is unchanged. Edit it "
"directly if you want to remove the credential from disk.",
])
def _register_all_sources() -> None:
"""Called once on module import.
ORDER MATTERS — ``find_removal_step`` returns the first match. Put
provider-specific steps before the generic ``env:*`` step so that e.g.
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
doesn't touch the user's shell), not the generic env-var removal
(which would try to clear .env).
"""
register(RemovalStep(
provider="copilot", source_id="gh_cli",
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
remove_fn=_remove_copilot_gh,
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
))
register(RemovalStep(
provider="*", source_id="env:",
match_fn=lambda src: src.startswith("env:"),
remove_fn=_remove_env_source,
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
))
register(RemovalStep(
provider="anthropic", source_id="claude_code",
remove_fn=_remove_claude_code,
description="~/.claude/.credentials.json",
))
register(RemovalStep(
provider="anthropic", source_id="hermes_pkce",
remove_fn=_remove_hermes_pkce,
description="~/.hermes/.anthropic_oauth.json",
))
register(RemovalStep(
provider="nous", source_id="device_code",
remove_fn=_remove_nous_device_code,
description="auth.json providers.nous",
))
register(RemovalStep(
provider="openai-codex", source_id="device_code",
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
remove_fn=_remove_codex_device_code,
description="auth.json providers.openai-codex + ~/.codex/auth.json",
))
register(RemovalStep(
provider="qwen-oauth", source_id="qwen-cli",
remove_fn=_remove_qwen_cli,
description="~/.qwen/oauth_creds.json",
))
register(RemovalStep(
provider="*", source_id="config:",
match_fn=lambda src: src.startswith("config:") or src == "model_config",
remove_fn=_remove_custom_config,
description="Custom provider config.yaml api_key field",
))
_register_all_sources()
+10 -74
View File
@@ -220,25 +220,12 @@ _TRANSPORT_ERROR_TYPES = frozenset({
"ConnectionAbortedError", "BrokenPipeError",
"TimeoutError", "ReadError",
"ServerDisconnectedError",
# SSL/TLS transport errors — transient mid-stream handshake/record
# failures that should retry rather than surface as a stalled session.
# ssl.SSLError subclasses OSError (caught by isinstance) but we list
# the type names here so provider-wrapped SSL errors (e.g. when the
# SDK re-raises without preserving the exception chain) still classify
# as transport rather than falling through to the unknown bucket.
"SSLError", "SSLZeroReturnError", "SSLWantReadError",
"SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
# OpenAI SDK errors (not subclasses of Python builtins)
"APIConnectionError",
"APITimeoutError",
})
# Server disconnect patterns (no status code, but transport-level).
# These are the "ambiguous" patterns — a plain connection close could be
# transient transport hiccup OR server-side context overflow rejection
# (common when the API gateway disconnects instead of returning an HTTP
# error for oversized requests). A large session + one of these patterns
# triggers the context-overflow-with-compression recovery path.
# Server disconnect patterns (no status code, but transport-level)
_SERVER_DISCONNECT_PATTERNS = [
"server disconnected",
"peer closed connection",
@@ -249,40 +236,6 @@ _SERVER_DISCONNECT_PATTERNS = [
"incomplete chunked read",
]
# SSL/TLS transient failure patterns — intentionally distinct from
# _SERVER_DISCONNECT_PATTERNS above.
#
# An SSL alert mid-stream is almost always a transport-layer hiccup
# (flaky network, mid-session TLS renegotiation failure, load balancer
# dropping the connection) — NOT a server-side context overflow signal.
# So we want the retry path but NOT the compression path; lumping these
# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
# expensive) context compression on any large-session SSL hiccup.
#
# The OpenSSL library constructs error codes by prepending a format string
# to the uppercased alert reason; OpenSSL 3.x changed the separator
# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
# which silently stopped matching anything explicit. Matching on the
# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
# survives future OpenSSL format churn without code changes.
_SSL_TRANSIENT_PATTERNS = [
# Space-separated (human-readable form, Python ssl module, most SDKs)
"bad record mac",
"ssl alert",
"tls alert",
"ssl handshake failure",
"tlsv1 alert",
"sslv3 alert",
# Underscore-separated (OpenSSL error code tokens, e.g.
# `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
"bad_record_mac",
"ssl_alert",
"tls_alert",
"tls_alert_internal_error",
# Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
"[ssl:",
]
# ── Classification pipeline ─────────────────────────────────────────────
@@ -302,10 +255,9 @@ def classify_api_error(
2. HTTP status code + message-aware refinement
3. Error code classification (from body)
4. Message pattern matching (billing vs rate_limit vs context vs auth)
5. SSL/TLS transient alert patterns → retry as timeout
5. Transport error heuristics
6. Server disconnect + large session → context overflow
7. Transport error heuristics
8. Fallback: unknown (retryable with backoff)
7. Fallback: unknown (retryable with backoff)
Args:
error: The exception from the API call.
@@ -436,18 +388,7 @@ def classify_api_error(
if classified is not None:
return classified
# ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
# SSL alerts mid-stream are transport hiccups, not server-side context
# overflow signals. Classify before the disconnect check so a large
# session doesn't incorrectly trigger context compression when the real
# cause is a flaky TLS handshake. Also matches when the error is
# wrapped in a generic exception whose message string carries the SSL
# alert text but the type isn't ssl.SSLError (happens with some SDKs
# that re-raise without chaining).
if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
return _result(FailoverReason.timeout, retryable=True)
# ── 6. Server disconnect + large session → context overflow ─────
# ── 5. Server disconnect + large session → context overflow ─────
# Must come BEFORE generic transport error catch — a disconnect on
# a large session is more likely context overflow than a transient
# transport hiccup. Without this ordering, RemoteProtocolError
@@ -464,12 +405,12 @@ def classify_api_error(
)
return _result(FailoverReason.timeout, retryable=True)
# ── 7. Transport / timeout heuristics ───────────────────────────
# ── 6. Transport / timeout heuristics ───────────────────────────
if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
return _result(FailoverReason.timeout, retryable=True)
# ── 8. Fallback: unknown ────────────────────────────────────────
# ── 7. Fallback: unknown ────────────────────────────────────────
return _result(FailoverReason.unknown, retryable=True)
@@ -529,16 +470,11 @@ def _classify_by_status(
retryable=False,
should_fallback=True,
)
# Generic 404 with no "model not found" signal — could be a wrong
# endpoint path (common with local llama.cpp / Ollama / vLLM when
# the URL is slightly misconfigured), a proxy routing glitch, or
# a transient backend issue. Classifying these as model_not_found
# silently falls back to a different provider and tells the model
# the model is missing, which is wrong and wastes a turn. Treat
# as unknown so the retry loop surfaces the real error instead.
# Generic 404 — could be model or endpoint
return result_fn(
FailoverReason.unknown,
retryable=True,
FailoverReason.model_not_found,
retryable=False,
should_fallback=True,
)
if status_code == 413:
-111
View File
@@ -1,111 +0,0 @@
"""Shared file safety rules used by both tools and ACP shims."""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional
def _hermes_home_path() -> Path:
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
try:
from hermes_constants import get_hermes_home # local import to avoid cycles
return get_hermes_home()
except Exception:
return Path(os.path.expanduser("~/.hermes"))
def build_write_denied_paths(home: str) -> set[str]:
"""Return exact sensitive paths that must never be written."""
hermes_home = _hermes_home_path()
return {
os.path.realpath(p)
for p in [
os.path.join(home, ".ssh", "authorized_keys"),
os.path.join(home, ".ssh", "id_rsa"),
os.path.join(home, ".ssh", "id_ed25519"),
os.path.join(home, ".ssh", "config"),
str(hermes_home / ".env"),
os.path.join(home, ".bashrc"),
os.path.join(home, ".zshrc"),
os.path.join(home, ".profile"),
os.path.join(home, ".bash_profile"),
os.path.join(home, ".zprofile"),
os.path.join(home, ".netrc"),
os.path.join(home, ".pgpass"),
os.path.join(home, ".npmrc"),
os.path.join(home, ".pypirc"),
"/etc/sudoers",
"/etc/passwd",
"/etc/shadow",
]
}
def build_write_denied_prefixes(home: str) -> list[str]:
"""Return sensitive directory prefixes that must never be written."""
return [
os.path.realpath(p) + os.sep
for p in [
os.path.join(home, ".ssh"),
os.path.join(home, ".aws"),
os.path.join(home, ".gnupg"),
os.path.join(home, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
os.path.join(home, ".docker"),
os.path.join(home, ".azure"),
os.path.join(home, ".config", "gh"),
]
]
def get_safe_write_root() -> Optional[str]:
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not root:
return None
try:
return os.path.realpath(os.path.expanduser(root))
except Exception:
return None
def is_write_denied(path: str) -> bool:
"""Return True if path is blocked by the write denylist or safe root."""
home = os.path.realpath(os.path.expanduser("~"))
resolved = os.path.realpath(os.path.expanduser(str(path)))
if resolved in build_write_denied_paths(home):
return True
for prefix in build_write_denied_prefixes(home):
if resolved.startswith(prefix):
return True
safe_root = get_safe_write_root()
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
return True
return False
def get_read_block_error(path: str) -> Optional[str]:
"""Return an error message when a read targets internal Hermes cache files."""
resolved = Path(path).expanduser().resolve()
hermes_home = _hermes_home_path().resolve()
blocked_dirs = [
hermes_home / "skills" / ".hub" / "index-cache",
hermes_home / "skills" / ".hub",
]
for blocked in blocked_dirs:
try:
resolved.relative_to(blocked)
except ValueError:
continue
return (
f"Access denied: {path} is an internal Hermes cache file "
"and cannot be read directly to prevent prompt injection. "
"Use the skills_list or skill_view tools instead."
)
return None
+1 -2
View File
@@ -799,8 +799,7 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
_raw_details = err_obj.get("details")
err_details_list = _raw_details if isinstance(_raw_details, list) else []
err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
# than one ErrorInfo (rare), so we pick the first one with a reason.
+1 -2
View File
@@ -613,8 +613,7 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
_raw_details = err_obj.get("details")
details_list = _raw_details if isinstance(_raw_details, list) else []
details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
reason = ""
retry_after: Optional[float] = None
-242
View File
@@ -1,242 +0,0 @@
"""
Image Generation Provider ABC
=============================
Defines the pluggable-backend interface for image generation. Providers register
instances via ``PluginContext.register_image_gen_provider()``; the active one
(selected via ``image_gen.provider`` in ``config.yaml``) services every
``image_generate`` tool call.
Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
via ``plugins.enabled``).
Response shape
--------------
All providers return a dict that :func:`success_response` / :func:`error_response`
produce. The tool wrapper JSON-serializes it. Keys:
success bool
image str | None URL or absolute file path
model str provider-specific model identifier
prompt str echoed prompt
aspect_ratio str "landscape" | "square" | "portrait"
provider str provider name (for diagnostics)
error str only when success=False
error_type str only when success=False
"""
from __future__ import annotations
import abc
import base64
import datetime
import logging
import uuid
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
DEFAULT_ASPECT_RATIO = "landscape"
# ---------------------------------------------------------------------------
# ABC
# ---------------------------------------------------------------------------
class ImageGenProvider(abc.ABC):
"""Abstract base class for an image generation backend.
Subclasses must implement :meth:`generate`. Everything else has sane
defaults override only what your provider needs.
"""
@property
@abc.abstractmethod
def name(self) -> str:
"""Stable short identifier used in ``image_gen.provider`` config.
Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
"""
@property
def display_name(self) -> str:
"""Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
return self.name.title()
def is_available(self) -> bool:
"""Return True when this provider can service calls.
Typically checks for a required API key. Default: True
(providers with no external dependencies are always available).
"""
return True
def list_models(self) -> List[Dict[str, Any]]:
"""Return catalog entries for ``hermes tools`` model picker.
Each entry::
{
"id": "gpt-image-1.5", # required
"display": "GPT Image 1.5", # optional; defaults to id
"speed": "~10s", # optional
"strengths": "...", # optional
"price": "$...", # optional
}
Default: empty list (provider has no user-selectable models).
"""
return []
def get_setup_schema(self) -> Dict[str, Any]:
"""Return provider metadata for the ``hermes tools`` picker.
Used by ``tools_config.py`` to inject this provider as a row in
the Image Generation provider list. Shape::
{
"name": "OpenAI", # picker label
"badge": "paid", # optional short tag
"tag": "One-line description...", # optional subtitle
"env_vars": [ # keys to prompt for
{"key": "OPENAI_API_KEY",
"prompt": "OpenAI API key",
"url": "https://platform.openai.com/api-keys"},
],
}
Default: minimal entry derived from ``display_name``. Override to
expose API key prompts and custom badges.
"""
return {
"name": self.display_name,
"badge": "",
"tag": "",
"env_vars": [],
}
def default_model(self) -> Optional[str]:
"""Return the default model id, or None if not applicable."""
models = self.list_models()
if models:
return models[0].get("id")
return None
@abc.abstractmethod
def generate(
self,
prompt: str,
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
**kwargs: Any,
) -> Dict[str, Any]:
"""Generate an image.
Implementations should return the dict from :func:`success_response`
or :func:`error_response`. ``kwargs`` may contain forward-compat
parameters future versions of the schema will expose implementations
should ignore unknown keys.
"""
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def resolve_aspect_ratio(value: Optional[str]) -> str:
"""Clamp an aspect_ratio value to the valid set, defaulting to landscape.
Invalid values are coerced rather than rejected so the tool surface is
forgiving of agent mistakes.
"""
if not isinstance(value, str):
return DEFAULT_ASPECT_RATIO
v = value.strip().lower()
if v in VALID_ASPECT_RATIOS:
return v
return DEFAULT_ASPECT_RATIO
def _images_cache_dir() -> Path:
"""Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
from hermes_constants import get_hermes_home
path = get_hermes_home() / "cache" / "images"
path.mkdir(parents=True, exist_ok=True)
return path
def save_b64_image(
b64_data: str,
*,
prefix: str = "image",
extension: str = "png",
) -> Path:
"""Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
Returns the absolute :class:`Path` to the saved file.
Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
"""
raw = base64.b64decode(b64_data)
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
short = uuid.uuid4().hex[:8]
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
path.write_bytes(raw)
return path
def success_response(
*,
image: str,
model: str,
prompt: str,
aspect_ratio: str,
provider: str,
extra: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Build a uniform success response dict.
``image`` may be an HTTP URL or an absolute filesystem path (for b64
providers like OpenAI). Callers that need to pass through additional
backend-specific fields can supply ``extra``.
"""
payload: Dict[str, Any] = {
"success": True,
"image": image,
"model": model,
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"provider": provider,
}
if extra:
for k, v in extra.items():
payload.setdefault(k, v)
return payload
def error_response(
*,
error: str,
error_type: str = "provider_error",
provider: str = "",
model: str = "",
prompt: str = "",
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
) -> Dict[str, Any]:
"""Build a uniform error response dict."""
return {
"success": False,
"image": None,
"error": error,
"error_type": error_type,
"model": model,
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"provider": provider,
}
-120
View File
@@ -1,120 +0,0 @@
"""
Image Generation Provider Registry
==================================
Central map of registered providers. Populated by plugins at import-time via
``PluginContext.register_image_gen_provider()``; consumed by the
``image_generate`` tool to dispatch each call to the active backend.
Active selection
----------------
The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
If unset, :func:`get_active_provider` applies fallback logic:
1. If exactly one provider is registered, use it.
2. Otherwise if a provider named ``fal`` is registered, use it (legacy
default matches pre-plugin behavior).
3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
the user at ``hermes tools``).
"""
from __future__ import annotations
import logging
import threading
from typing import Dict, List, Optional
from agent.image_gen_provider import ImageGenProvider
logger = logging.getLogger(__name__)
_providers: Dict[str, ImageGenProvider] = {}
_lock = threading.Lock()
def register_provider(provider: ImageGenProvider) -> None:
"""Register an image generation provider.
Re-registration (same ``name``) overwrites the previous entry and logs
a debug message this makes hot-reload scenarios (tests, dev loops)
behave predictably.
"""
if not isinstance(provider, ImageGenProvider):
raise TypeError(
f"register_provider() expects an ImageGenProvider instance, "
f"got {type(provider).__name__}"
)
name = provider.name
if not isinstance(name, str) or not name.strip():
raise ValueError("Image gen provider .name must be a non-empty string")
with _lock:
existing = _providers.get(name)
_providers[name] = provider
if existing is not None:
logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
else:
logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
def list_providers() -> List[ImageGenProvider]:
"""Return all registered providers, sorted by name."""
with _lock:
items = list(_providers.values())
return sorted(items, key=lambda p: p.name)
def get_provider(name: str) -> Optional[ImageGenProvider]:
"""Return the provider registered under *name*, or None."""
if not isinstance(name, str):
return None
with _lock:
return _providers.get(name.strip())
def get_active_provider() -> Optional[ImageGenProvider]:
"""Resolve the currently-active provider.
Reads ``image_gen.provider`` from config.yaml; falls back per the
module docstring.
"""
configured: Optional[str] = None
try:
from hermes_cli.config import load_config
cfg = load_config()
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
if isinstance(section, dict):
raw = section.get("provider")
if isinstance(raw, str) and raw.strip():
configured = raw.strip()
except Exception as exc:
logger.debug("Could not read image_gen.provider from config: %s", exc)
with _lock:
snapshot = dict(_providers)
if configured:
provider = snapshot.get(configured)
if provider is not None:
return provider
logger.debug(
"image_gen.provider='%s' configured but not registered; falling back",
configured,
)
# Fallback: single-provider case
if len(snapshot) == 1:
return next(iter(snapshot.values()))
# Fallback: prefer legacy FAL for backward compat
if "fal" in snapshot:
return snapshot["fal"]
return None
def _reset_for_tests() -> None:
"""Clear the registry. **Test-only.**"""
with _lock:
_providers.clear()
-162
View File
@@ -124,7 +124,6 @@ class InsightsEngine:
# Gather raw data
sessions = self._get_sessions(cutoff, source)
tool_usage = self._get_tool_usage(cutoff, source)
skill_usage = self._get_skill_usage(cutoff, source)
message_stats = self._get_message_stats(cutoff, source)
if not sessions:
@@ -136,15 +135,6 @@ class InsightsEngine:
"models": [],
"platforms": [],
"tools": [],
"skills": {
"summary": {
"total_skill_loads": 0,
"total_skill_edits": 0,
"total_skill_actions": 0,
"distinct_skills_used": 0,
},
"top_skills": [],
},
"activity": {},
"top_sessions": [],
}
@@ -154,7 +144,6 @@ class InsightsEngine:
models = self._compute_model_breakdown(sessions)
platforms = self._compute_platform_breakdown(sessions)
tools = self._compute_tool_breakdown(tool_usage)
skills = self._compute_skill_breakdown(skill_usage)
activity = self._compute_activity_patterns(sessions)
top_sessions = self._compute_top_sessions(sessions)
@@ -167,7 +156,6 @@ class InsightsEngine:
"models": models,
"platforms": platforms,
"tools": tools,
"skills": skills,
"activity": activity,
"top_sessions": top_sessions,
}
@@ -296,82 +284,6 @@ class InsightsEngine:
for name, count in tool_counts.most_common()
]
def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
"""Extract per-skill usage from assistant tool calls."""
skill_counts: Dict[str, Dict[str, Any]] = {}
if source:
cursor = self._conn.execute(
"""SELECT m.tool_calls, m.timestamp
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ? AND s.source = ?
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
(cutoff, source),
)
else:
cursor = self._conn.execute(
"""SELECT m.tool_calls, m.timestamp
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ?
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
(cutoff,),
)
for row in cursor.fetchall():
try:
calls = row["tool_calls"]
if isinstance(calls, str):
calls = json.loads(calls)
if not isinstance(calls, list):
continue
except (json.JSONDecodeError, TypeError):
continue
timestamp = row["timestamp"]
for call in calls:
if not isinstance(call, dict):
continue
func = call.get("function", {})
tool_name = func.get("name")
if tool_name not in {"skill_view", "skill_manage"}:
continue
args = func.get("arguments")
if isinstance(args, str):
try:
args = json.loads(args)
except (json.JSONDecodeError, TypeError):
continue
if not isinstance(args, dict):
continue
skill_name = args.get("name")
if not isinstance(skill_name, str) or not skill_name.strip():
continue
entry = skill_counts.setdefault(
skill_name,
{
"skill": skill_name,
"view_count": 0,
"manage_count": 0,
"last_used_at": None,
},
)
if tool_name == "skill_view":
entry["view_count"] += 1
else:
entry["manage_count"] += 1
if timestamp is not None and (
entry["last_used_at"] is None or timestamp > entry["last_used_at"]
):
entry["last_used_at"] = timestamp
return list(skill_counts.values())
def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
"""Get aggregate message statistics."""
if source:
@@ -563,46 +475,6 @@ class InsightsEngine:
})
return result
def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
"""Process per-skill usage into summary + ranked list."""
total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
total_skill_actions = total_skill_loads + total_skill_edits
top_skills = []
for skill in skill_usage:
total_count = skill["view_count"] + skill["manage_count"]
percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
top_skills.append({
"skill": skill["skill"],
"view_count": skill["view_count"],
"manage_count": skill["manage_count"],
"total_count": total_count,
"percentage": percentage,
"last_used_at": skill.get("last_used_at"),
})
top_skills.sort(
key=lambda s: (
s["total_count"],
s["view_count"],
s["manage_count"],
s["last_used_at"] or 0,
s["skill"],
),
reverse=True,
)
return {
"summary": {
"total_skill_loads": total_skill_loads,
"total_skill_edits": total_skill_edits,
"total_skill_actions": total_skill_actions,
"distinct_skills_used": len(skill_usage),
},
"top_skills": top_skills,
}
def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
"""Analyze activity patterns by day of week and hour."""
day_counts = Counter() # 0=Monday ... 6=Sunday
@@ -798,28 +670,6 @@ class InsightsEngine:
lines.append(f" ... and {len(report['tools']) - 15} more tools")
lines.append("")
# Skill usage
skills = report.get("skills", {})
top_skills = skills.get("top_skills", [])
if top_skills:
lines.append(" 🧠 Top Skills")
lines.append(" " + "" * 56)
lines.append(f" {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
for skill in top_skills[:10]:
last_used = ""
if skill.get("last_used_at"):
last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
lines.append(
f" {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
)
summary = skills.get("summary", {})
lines.append(
f" Distinct skills: {summary.get('distinct_skills_used', 0)} "
f"Loads: {summary.get('total_skill_loads', 0):,} "
f"Edits: {summary.get('total_skill_edits', 0):,}"
)
lines.append("")
# Activity patterns
act = report.get("activity", {})
if act.get("by_day"):
@@ -903,18 +753,6 @@ class InsightsEngine:
lines.append(f" {t['tool']}{t['count']:,} calls ({t['percentage']:.1f}%)")
lines.append("")
skills = report.get("skills", {})
if skills.get("top_skills"):
lines.append("**🧠 Top Skills:**")
for skill in skills["top_skills"][:5]:
suffix = ""
if skill.get("last_used_at"):
suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
lines.append(
f" {skill['skill']}{skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
)
lines.append("")
# Activity summary
act = report.get("activity", {})
if act.get("busiest_day") and act.get("busiest_hour"):
+24 -128
View File
@@ -4,7 +4,6 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
and run_agent.py for pre-flight context checks.
"""
import ipaddress
import logging
import re
import time
@@ -15,8 +14,6 @@ from urllib.parse import urlparse
import requests
import yaml
from utils import base_url_host_matches, base_url_hostname
from hermes_constants import OPENROUTER_MODELS_URL
logger = logging.getLogger(__name__)
@@ -26,7 +23,7 @@ logger = logging.getLogger(__name__)
# are preserved so the full model name reaches cache lookups and server queries.
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
"qwen-oauth",
"xiaomi",
@@ -37,7 +34,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
"ollama",
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"xai", "x-ai", "x.ai", "grok",
@@ -52,13 +49,6 @@ _OLLAMA_TAG_PATTERN = re.compile(
)
# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
# block, so without an explicit check Ollama reached over Tailscale (e.g.
# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
def _strip_provider_prefix(model: str) -> str:
"""Strip a recognised provider prefix from a model string.
@@ -123,12 +113,10 @@ DEFAULT_CONTEXT_LENGTHS = {
"claude": 200000,
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
# Source: https://developers.openai.com/api/docs/models
# GPT-5.5 (launched Apr 23 2026). Verified via live ChatGPT codex/models
# endpoint: bare slug `gpt-5.5`, no -pro/-mini variants. 400k context on Codex.
"gpt-5.5": 400000,
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
"gpt-5.3-codex-spark": 128000, # Spark variant has reduced 128k context
"gpt-5.1-chat": 128000, # Chat variant has 128k context
"gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k)
"gpt-4.1": 1047576,
@@ -136,8 +124,6 @@ DEFAULT_CONTEXT_LENGTHS = {
# Google
"gemini": 1048576,
# Gemma (open models served via AI Studio)
"gemma-4": 256000, # Gemma 4 family
"gemma4": 256000, # Ollama-style naming (e.g. gemma4:31b-cloud)
"gemma-4-31b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
@@ -183,15 +169,12 @@ DEFAULT_CONTEXT_LENGTHS = {
"Qwen/Qwen3.5-35B-A3B": 131072,
"deepseek-ai/DeepSeek-V3.2": 65536,
"moonshotai/Kimi-K2.5": 262144,
"moonshotai/Kimi-K2.6": 262144,
"moonshotai/Kimi-K2-Thinking": 262144,
"MiniMaxAI/MiniMax-M2.5": 204800,
"XiaomiMiMo/MiMo-V2-Flash": 262144,
"mimo-v2-pro": 1048576,
"mimo-v2.5-pro": 1048576,
"mimo-v2.5": 1048576,
"mimo-v2-omni": 262144,
"mimo-v2-flash": 262144,
"XiaomiMiMo/MiMo-V2-Flash": 256000,
"mimo-v2-pro": 1000000,
"mimo-v2-omni": 256000,
"mimo-v2-flash": 256000,
"zai-org/GLM-5": 202752,
}
@@ -206,7 +189,6 @@ _CONTEXT_LENGTH_KEYS = (
"max_seq_len",
"n_ctx_train",
"n_ctx",
"ctx_size",
)
_MAX_COMPLETION_KEYS = (
@@ -229,15 +211,8 @@ def _normalize_base_url(base_url: str) -> str:
return (base_url or "").strip().rstrip("/")
def _auth_headers(api_key: str = "") -> Dict[str, str]:
token = str(api_key or "").strip()
if not token:
return {}
return {"Authorization": f"Bearer {token}"}
def _is_openrouter_base_url(base_url: str) -> bool:
return base_url_host_matches(base_url, "openrouter.ai")
return "openrouter.ai" in _normalize_base_url(base_url).lower()
def _is_custom_endpoint(base_url: str) -> bool:
@@ -250,12 +225,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"chatgpt.com": "openai",
"api.anthropic.com": "anthropic",
"api.z.ai": "zai",
"open.bigmodel.cn": "zai",
"api.moonshot.ai": "kimi-coding",
"api.moonshot.cn": "kimi-coding-cn",
"api.kimi.com": "kimi-coding",
"api.stepfun.ai": "stepfun",
"api.stepfun.com": "stepfun",
"api.arcee.ai": "arcee",
"api.minimax": "minimax",
"dashscope.aliyuncs.com": "alibaba",
@@ -300,15 +272,7 @@ def _is_known_provider_base_url(base_url: str) -> bool:
def is_local_endpoint(base_url: str) -> bool:
"""Return True if base_url points to a local machine.
Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
container-internal DNS names (``host.docker.internal`` et al.),
RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
is included so remote-but-trusted Ollama boxes reached over a
Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
"""
"""Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
normalized = _normalize_base_url(base_url)
if not normalized:
return False
@@ -323,17 +287,14 @@ def is_local_endpoint(base_url: str) -> bool:
# Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
return True
# RFC-1918 private ranges, link-local, and Tailscale CGNAT
# RFC-1918 private ranges and link-local
import ipaddress
try:
addr = ipaddress.ip_address(host)
if addr.is_private or addr.is_loopback or addr.is_link_local:
return True
if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
return True
return addr.is_private or addr.is_loopback or addr.is_link_local
except ValueError:
pass
# Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
# or Tailscale CGNAT (100.64.x.x100.127.x.x).
parts = host.split(".")
if len(parts) == 4:
try:
@@ -344,14 +305,12 @@ def is_local_endpoint(base_url: str) -> bool:
return True
if first == 192 and second == 168:
return True
if first == 100 and 64 <= second <= 127:
return True
except ValueError:
pass
return False
def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
def detect_local_server_type(base_url: str) -> Optional[str]:
"""Detect which local server is running at base_url by probing known endpoints.
Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -363,10 +322,8 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=2.0, headers=headers) as client:
with httpx.Client(timeout=2.0) as client:
# LM Studio exposes /api/v1/models — check first (most specific)
try:
r = client.get(f"{server_url}/api/v1/models")
@@ -553,59 +510,6 @@ def fetch_endpoint_model_metadata(
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
last_error: Optional[Exception] = None
if is_local_endpoint(normalized):
try:
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
response = requests.get(
server_url.rstrip("/") + "/api/v1/models",
headers=headers,
timeout=10,
)
response.raise_for_status()
payload = response.json()
cache: Dict[str, Dict[str, Any]] = {}
for model in payload.get("models", []):
if not isinstance(model, dict):
continue
model_id = model.get("key") or model.get("id")
if not model_id:
continue
entry: Dict[str, Any] = {"name": model.get("name", model_id)}
context_length = None
for inst in model.get("loaded_instances", []) or []:
if not isinstance(inst, dict):
continue
cfg = inst.get("config", {})
ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
if isinstance(ctx, int) and ctx > 0:
context_length = ctx
break
if context_length is None:
context_length = _extract_context_length(model)
if context_length is not None:
entry["context_length"] = context_length
max_completion_tokens = _extract_max_completion_tokens(model)
if max_completion_tokens is not None:
entry["max_completion_tokens"] = max_completion_tokens
pricing = _extract_pricing(model)
if pricing:
entry["pricing"] = pricing
_add_model_aliases(cache, model_id, entry)
alt_id = model.get("id")
if isinstance(alt_id, str) and alt_id and alt_id != model_id:
_add_model_aliases(cache, alt_id, entry)
_endpoint_model_metadata_cache[normalized] = cache
_endpoint_model_metadata_cache_time[normalized] = time.time()
return cache
except Exception as exc:
last_error = exc
for candidate in candidates:
url = candidate.rstrip("/") + "/models"
try:
@@ -812,7 +716,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
return False
def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
"""Query an Ollama server for the model's context length.
Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -830,16 +734,14 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
server_url = server_url[:-3]
try:
server_type = detect_local_server_type(base_url, api_key=api_key)
server_type = detect_local_server_type(base_url)
except Exception:
return None
if server_type != "ollama":
return None
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
with httpx.Client(timeout=3.0) as client:
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
if resp.status_code != 200:
return None
@@ -867,7 +769,7 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
return None
def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
"""Query a local server for the model's context length."""
import httpx
@@ -880,15 +782,13 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
server_type = detect_local_server_type(base_url, api_key=api_key)
server_type = detect_local_server_type(base_url)
except Exception:
server_type = None
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
with httpx.Client(timeout=3.0) as client:
# Ollama: /api/show returns model details with context info
if server_type == "ollama":
resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -1099,7 +999,7 @@ def get_model_context_length(
if not _is_known_provider_base_url(base_url):
# 3. Try querying local server directly
if is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
local_ctx = _query_local_context_length(model, base_url)
if local_ctx and local_ctx > 0:
save_context_length(model, base_url, local_ctx)
return local_ctx
@@ -1113,7 +1013,7 @@ def get_model_context_length(
# 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
if provider == "anthropic" or (
base_url and base_url_hostname(base_url) == "api.anthropic.com"
base_url and "api.anthropic.com" in base_url
):
ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
if ctx:
@@ -1122,11 +1022,7 @@ def get_model_context_length(
# 4b. AWS Bedrock — use static context length table.
# Bedrock's ListFoundationModels doesn't expose context window sizes,
# so we maintain a curated table in bedrock_adapter.py.
if provider == "bedrock" or (
base_url
and base_url_hostname(base_url).startswith("bedrock-runtime.")
and base_url_host_matches(base_url, "amazonaws.com")
):
if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
try:
from agent.bedrock_adapter import get_bedrock_context_length
return get_bedrock_context_length(model)
@@ -1173,7 +1069,7 @@ def get_model_context_length(
# 9. Query local server as last resort
if base_url and is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
local_ctx = _query_local_context_length(model, base_url)
if local_ctx and local_ctx > 0:
save_context_length(model, base_url, local_ctx)
return local_ctx
-4
View File
@@ -146,7 +146,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
"openai-codex": "openai",
"zai": "zai",
"kimi-coding": "kimi-for-coding",
"stepfun": "stepfun",
"kimi-coding-cn": "kimi-for-coding",
"minimax": "minimax",
"minimax-cn": "minimax-cn",
@@ -418,9 +417,6 @@ def list_provider_models(provider: str) -> List[str]:
Returns an empty list if the provider is unknown or has no data.
"""
from hermes_cli.models import normalize_provider
provider = normalize_provider(provider) or provider
models = _get_provider_models(provider)
if models is None:
return []
-190
View File
@@ -1,190 +0,0 @@
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
tool calling. Requests that violate it fail with HTTP 400:
tools.function.parameters is not a valid moonshot flavored json schema,
details: <...>
Known rejection modes documented at
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
and MoonshotAI/kimi-cli#1595:
1. Every property schema must carry a ``type``. Standard JSON Schema allows
type to be omitted (the value is then unconstrained); Moonshot refuses.
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
the parent. Presence of both causes "type should be defined in anyOf
items instead of the parent schema".
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
applies at MCP registration time for all providers.
"""
from __future__ import annotations
import copy
from typing import Any, Dict, List
# Keys whose values are maps of name → schema (not schemas themselves).
# When we recurse, we walk the values of these maps as schemas, but we do
# NOT apply the missing-type repair to the map itself.
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
# Keys whose values are lists of schemas.
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
# Keys whose values are a single nested schema.
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
"""Recursively apply Moonshot repairs to a schema node.
``is_schema=True`` means this dict is a JSON Schema node and gets the
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
it's a container map (e.g. the value of ``properties``) and we only
recurse into its values.
"""
if isinstance(node, list):
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
# every element is itself a schema.
return [_repair_schema(item, is_schema=True) for item in node]
if not isinstance(node, dict):
return node
# Walk the dict, deciding per-key whether recursion is into a schema
# node, a container map, or a scalar.
repaired: Dict[str, Any] = {}
for key, value in node.items():
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
# Map of name → schema. Don't treat the map itself as a schema
# (it has no type / properties of its own), but each value is.
repaired[key] = {
sub_key: _repair_schema(sub_val, is_schema=True)
for sub_key, sub_val in value.items()
}
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
elif key in _SCHEMA_NODE_KEYS:
# items / not / additionalProperties: single nested schema.
# additionalProperties can also be a bool — leave those alone.
if isinstance(value, dict):
repaired[key] = _repair_schema(value, is_schema=True)
else:
repaired[key] = value
else:
# Scalars (description, title, format, enum values, etc.) pass through.
repaired[key] = value
if not is_schema:
return repaired
# Rule 2: when anyOf is present, type belongs only on the children.
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
repaired.pop("type", None)
return repaired
# Rule 1: property schemas without type need one. $ref nodes are exempt
# — their type comes from the referenced definition.
if "$ref" in repaired:
return repaired
return _fill_missing_type(repaired)
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
"""Infer a reasonable ``type`` if this schema node has none."""
if "type" in node and node["type"] not in (None, ""):
return node
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
# → type of first enum value, else fall back to ``string`` (safest scalar).
if "properties" in node or "required" in node or "additionalProperties" in node:
inferred = "object"
elif "items" in node or "prefixItems" in node:
inferred = "array"
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
sample = node["enum"][0]
if isinstance(sample, bool):
inferred = "boolean"
elif isinstance(sample, int):
inferred = "integer"
elif isinstance(sample, float):
inferred = "number"
else:
inferred = "string"
else:
inferred = "string"
return {**node, "type": inferred}
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
"""Normalize tool parameters to a Moonshot-compatible object schema.
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
applied. Input is not mutated.
"""
if not isinstance(parameters, dict):
return {"type": "object", "properties": {}}
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
if not isinstance(repaired, dict):
return {"type": "object", "properties": {}}
# Top-level must be an object schema
if repaired.get("type") != "object":
repaired["type"] = "object"
if "properties" not in repaired:
repaired["properties"] = {}
return repaired
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
if not tools:
return tools
sanitized: List[Dict[str, Any]] = []
any_change = False
for tool in tools:
if not isinstance(tool, dict):
sanitized.append(tool)
continue
fn = tool.get("function")
if not isinstance(fn, dict):
sanitized.append(tool)
continue
params = fn.get("parameters")
repaired = sanitize_moonshot_tool_parameters(params)
if repaired is not params:
any_change = True
new_fn = {**fn, "parameters": repaired}
sanitized.append({**tool, "function": new_fn})
else:
sanitized.append(tool)
return sanitized if any_change else tools
def is_moonshot_model(model: str | None) -> bool:
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
Detection by model name covers Nous / OpenRouter / other aggregators that
route to Moonshot's inference, where the base URL is the aggregator's, not
``api.moonshot.ai``.
"""
if not model:
return False
bare = model.strip().lower()
# Last path segment (covers aggregator-prefixed slugs)
tail = bare.rsplit("/", 1)[-1]
if tail.startswith("kimi-") or tail == "kimi":
return True
# Vendor-prefixed forms commonly used on aggregators
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
return True
return False
+1 -33
View File
@@ -350,13 +350,7 @@ PLATFORM_HINTS = {
),
"cli": (
"You are a CLI AI Agent. Try not to use markdown but simple text "
"renderable inside a terminal. "
"File delivery: there is no attachment channel — the user reads your "
"response directly in their terminal. Do NOT emit MEDIA:/path tags "
"(those are only intercepted on messaging platforms like Telegram, "
"Discord, Slack, etc.; on the CLI they render as literal text). "
"When referring to a file you created or changed, just state its "
"absolute path in plain text; the user can open it from there."
"renderable inside a terminal."
),
"sms": (
"You are communicating via SMS. Keep responses concise and use plain text "
@@ -370,32 +364,6 @@ PLATFORM_HINTS = {
"MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
".heic) appear as photos and other files arrive as attachments."
),
"mattermost": (
"You are in a Mattermost workspace communicating with your user. "
"Mattermost renders standard Markdown — headings, bold, italic, code "
"blocks, and tables all work. "
"You can send media files natively: include MEDIA:/absolute/path/to/file "
"in your response. Images (.jpg, .png, .webp) are uploaded as photo "
"attachments, audio and video as file attachments. "
"Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
),
"matrix": (
"You are in a Matrix room communicating with your user. "
"Matrix renders Markdown — bold, italic, code blocks, and links work; "
"the adapter converts your Markdown to HTML for rich display. "
"You can send media files natively: include MEDIA:/absolute/path/to/file "
"in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
"audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
"and other files as downloadable attachments."
),
"feishu": (
"You are in a Feishu (Lark) workspace communicating with your user. "
"Feishu renders Markdown in messages — bold, italic, code blocks, and "
"links are supported. "
"You can send media files natively: include MEDIA:/absolute/path/to/file "
"in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
"inline, audio files as voice messages, and other files as attachments."
),
"weixin": (
"You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
"it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
-142
View File
@@ -13,48 +13,6 @@ import re
logger = logging.getLogger(__name__)
# Sensitive query-string parameter names (case-insensitive exact match).
# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
_SENSITIVE_QUERY_PARAMS = frozenset({
"access_token",
"refresh_token",
"id_token",
"token",
"api_key",
"apikey",
"client_secret",
"password",
"auth",
"jwt",
"session",
"secret",
"key",
"code", # OAuth authorization codes
"signature", # pre-signed URL signatures
"x-amz-signature",
})
# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
# Ported from nearai/ironclaw#2529.
_SENSITIVE_BODY_KEYS = frozenset({
"access_token",
"refresh_token",
"id_token",
"token",
"api_key",
"apikey",
"client_secret",
"password",
"auth",
"jwt",
"secret",
"private_key",
"authorization",
"key",
})
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -150,30 +108,6 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
# Negative lookahead prevents matching hex strings or identifiers
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
# URLs containing query strings — matches `scheme://...?...[# or end]`.
# Used to scan text for URLs whose query params may contain secrets.
# Ported from nearai/ironclaw#2529.
_URL_WITH_QUERY_RE = re.compile(
r"(https?|wss?|ftp)://" # scheme
r"([^\s/?#]+)" # authority (may include userinfo)
r"([^\s?#]*)" # path
r"\?([^\s#]+)" # query (required)
r"(#\S*)?", # optional fragment
)
# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
# (not just DB protocols already covered by _DB_CONNSTR_RE above).
# Catches things like `https://user:token@api.example.com/v1/foo`.
_URL_USERINFO_RE = re.compile(
r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
)
# Form-urlencoded body detection: conservative — only applies when the entire
# text looks like a query string (k=v&k=v pattern with no newlines).
_FORM_BODY_RE = re.compile(
r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
)
# Compile known prefix patterns into one alternation
_PREFIX_RE = re.compile(
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -187,72 +121,6 @@ def _mask_token(token: str) -> str:
return f"{token[:6]}...{token[-4:]}"
def _redact_query_string(query: str) -> str:
"""Redact sensitive parameter values in a URL query string.
Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
replaced with `***`. Non-sensitive keys pass through unchanged.
Empty or malformed pairs are preserved as-is.
"""
if not query:
return query
parts = []
for pair in query.split("&"):
if "=" not in pair:
parts.append(pair)
continue
key, _, value = pair.partition("=")
if key.lower() in _SENSITIVE_QUERY_PARAMS:
parts.append(f"{key}=***")
else:
parts.append(pair)
return "&".join(parts)
def _redact_url_query_params(text: str) -> str:
"""Scan text for URLs with query strings and redact sensitive params.
Catches opaque tokens that don't match vendor prefix regexes, e.g.
`https://example.com/cb?code=ABC123&state=xyz` `...?code=***&state=xyz`.
"""
def _sub(m: re.Match) -> str:
scheme = m.group(1)
authority = m.group(2)
path = m.group(3)
query = _redact_query_string(m.group(4))
fragment = m.group(5) or ""
return f"{scheme}://{authority}{path}?{query}{fragment}"
return _URL_WITH_QUERY_RE.sub(_sub, text)
def _redact_url_userinfo(text: str) -> str:
"""Strip `user:password@` from HTTP/WS/FTP URLs.
DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
separately by `_DB_CONNSTR_RE`.
"""
return _URL_USERINFO_RE.sub(
lambda m: f"{m.group(1)}://{m.group(2)}:***@",
text,
)
def _redact_form_body(text: str) -> str:
"""Redact sensitive values in a form-urlencoded body.
Only applies when the entire input looks like a pure form body
(k=v&k=v with no newlines, no other text). Single-line non-form
text passes through unchanged. This is a conservative pass the
`_redact_url_query_params` function handles embedded query strings.
"""
if not text or "\n" in text or "&" not in text:
return text
# The body-body form check is strict: only trigger on clean k=v&k=v.
if not _FORM_BODY_RE.match(text.strip()):
return text
return _redact_query_string(text.strip())
def redact_sensitive_text(text: str) -> str:
"""Apply all redaction patterns to a block of text.
@@ -305,16 +173,6 @@ def redact_sensitive_text(text: str) -> str:
# JWT tokens (eyJ... — base64-encoded JSON headers)
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
# DB schemes are handled above by _DB_CONNSTR_RE.
text = _redact_url_userinfo(text)
# URL query params containing opaque tokens (?access_token=…&code=…)
text = _redact_url_query_params(text)
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
text = _redact_form_body(text)
# Discord user/role mentions (<@snowflake_id>)
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
-831
View File
@@ -1,831 +0,0 @@
"""
Shell-script hooks bridge.
Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
consent on first use of each ``(event, command)`` pair, and registers
callbacks on the existing plugin hook manager so every existing
``invoke_hook()`` site dispatches to the configured shell scripts with
zero changes to call sites.
Design notes
------------
* Python plugins and shell hooks compose naturally: both flow through
:func:`hermes_cli.plugins.invoke_hook` and its aggregators. Python
plugins are registered first (via ``discover_and_load()``) so their
block decisions win ties over shell-hook blocks.
* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
with ``shell=False`` no shell injection footguns. Users that need
pipes/redirection wrap their logic in a script.
* First-use consent is gated by the allowlist under
``~/.hermes/shell-hooks-allowlist.json``. Non-TTY callers must pass
``accept_hooks=True`` (resolved from ``--accept-hooks``,
``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
for registration to succeed without a prompt.
* Registration is idempotent safe to invoke from both the CLI entry
point (``hermes_cli/main.py``) and the gateway entry point
(``gateway/run.py``).
Wire protocol
-------------
**stdin** (JSON, piped to the script)::
{
"hook_event_name": "pre_tool_call",
"tool_name": "terminal",
"tool_input": {"command": "rm -rf /"},
"session_id": "sess_abc123",
"cwd": "/home/user/project",
"extra": {...} # event-specific kwargs
}
**stdout** (JSON, optional anything else is ignored)::
# Block a pre_tool_call (either shape accepted; normalised internally):
{"decision": "block", "reason": "Forbidden command"} # Claude-Code-style
{"action": "block", "message": "Forbidden command"} # Hermes-canonical
# Inject context for pre_llm_call:
{"context": "Today is Friday"}
# Silent no-op:
<empty or any non-matching JSON object>
"""
from __future__ import annotations
import difflib
import json
import logging
import os
import re
import shlex
import subprocess
import sys
import tempfile
import threading
import time
from contextlib import contextmanager
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
try:
import fcntl # POSIX only; Windows falls back to best-effort without flock.
except ImportError: # pragma: no cover
fcntl = None # type: ignore[assignment]
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
DEFAULT_TIMEOUT_SECONDS = 60
MAX_TIMEOUT_SECONDS = 300
ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
# (event, matcher, command) triples that have been wired to the plugin
# manager in the current process. Matcher is part of the key because
# the same script can legitimately register for different matchers under
# the same event (e.g. one entry per tool the user wants to gate).
# Second registration attempts for the exact same triple become no-ops
# so the CLI and gateway can both call register_from_config() safely.
_registered: Set[Tuple[str, Optional[str], str]] = set()
_registered_lock = threading.Lock()
# Intra-process lock for allowlist read-modify-write on platforms that
# lack ``fcntl`` (non-POSIX). Kept separate from ``_registered_lock``
# because ``register_from_config`` already holds ``_registered_lock`` when
# it triggers ``_record_approval`` — reusing it here would self-deadlock
# (``threading.Lock`` is non-reentrant). POSIX callers use the sibling
# ``.lock`` file via ``fcntl.flock`` and bypass this.
_allowlist_write_lock = threading.Lock()
@dataclass
class ShellHookSpec:
"""Parsed and validated representation of a single ``hooks:`` entry."""
event: str
command: str
matcher: Optional[str] = None
timeout: int = DEFAULT_TIMEOUT_SECONDS
compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
def __post_init__(self) -> None:
# Strip whitespace introduced by YAML quirks (e.g. multi-line string
# folding) — a matcher of " terminal" would otherwise silently fail
# to match "terminal" without any diagnostic.
if isinstance(self.matcher, str):
stripped = self.matcher.strip()
self.matcher = stripped if stripped else None
if self.matcher:
try:
self.compiled_matcher = re.compile(self.matcher)
except re.error as exc:
logger.warning(
"shell hook matcher %r is invalid (%s) — treating as "
"literal equality", self.matcher, exc,
)
self.compiled_matcher = None
def matches_tool(self, tool_name: Optional[str]) -> bool:
if not self.matcher:
return True
if tool_name is None:
return False
if self.compiled_matcher is not None:
return self.compiled_matcher.fullmatch(tool_name) is not None
# compiled_matcher is None only when the regex failed to compile,
# in which case we already warned and fall back to literal equality.
return tool_name == self.matcher
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def register_from_config(
cfg: Optional[Dict[str, Any]],
*,
accept_hooks: bool = False,
) -> List[ShellHookSpec]:
"""Register every configured shell hook on the plugin manager.
``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
output). The ``hooks:`` key is read out of it. Missing, empty, or
non-dict ``hooks`` is treated as zero configured hooks.
``accept_hooks=True`` skips the TTY consent prompt the caller is
promising that the user has opted in via a flag, env var, or config
setting. ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
also honored inside this function so either CLI or gateway call sites
pick them up.
Returns the list of :class:`ShellHookSpec` entries that ended up wired
up on the plugin manager. Skipped entries (unknown events, malformed,
not allowlisted, already registered) are logged but not returned.
"""
if not isinstance(cfg, dict):
return []
effective_accept = _resolve_effective_accept(cfg, accept_hooks)
specs = _parse_hooks_block(cfg.get("hooks"))
if not specs:
return []
registered: List[ShellHookSpec] = []
# Import lazily — avoids circular imports at module-load time.
from hermes_cli.plugins import get_plugin_manager
manager = get_plugin_manager()
# Idempotence + allowlist read happen under the lock; the TTY
# prompt runs outside so other threads aren't parked on a blocking
# input(). Mutation re-takes the lock with a defensive idempotence
# re-check in case two callers ever race through the prompt.
for spec in specs:
key = (spec.event, spec.matcher, spec.command)
with _registered_lock:
if key in _registered:
continue
already_allowlisted = _is_allowlisted(spec.event, spec.command)
if not already_allowlisted:
if not _prompt_and_record(
spec.event, spec.command, accept_hooks=effective_accept,
):
logger.warning(
"shell hook for %s (%s) not allowlisted — skipped. "
"Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
"hooks_auto_accept: true, or approve at the TTY "
"prompt next run.",
spec.event, spec.command,
)
continue
with _registered_lock:
if key in _registered:
continue
manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
_registered.add(key)
registered.append(spec)
logger.info(
"shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
spec.event, spec.command, spec.matcher, spec.timeout,
)
return registered
def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
"""Return the parsed ``ShellHookSpec`` entries from config without
registering anything. Used by ``hermes hooks list`` and ``doctor``."""
if not isinstance(cfg, dict):
return []
return _parse_hooks_block(cfg.get("hooks"))
def reset_for_tests() -> None:
"""Clear the idempotence set. Test-only helper."""
with _registered_lock:
_registered.clear()
# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------
def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
"""Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
Malformed entries warn-and-skip we never raise from config parsing
because a broken hook must not crash the agent.
"""
from hermes_cli.plugins import VALID_HOOKS
if not isinstance(hooks_cfg, dict):
return []
specs: List[ShellHookSpec] = []
for event_name, entries in hooks_cfg.items():
if event_name not in VALID_HOOKS:
suggestion = difflib.get_close_matches(
str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
)
if suggestion:
logger.warning(
"unknown hook event %r in hooks: config — did you mean %r?",
event_name, suggestion[0],
)
else:
logger.warning(
"unknown hook event %r in hooks: config (valid: %s)",
event_name, ", ".join(sorted(VALID_HOOKS)),
)
continue
if entries is None:
continue
if not isinstance(entries, list):
logger.warning(
"hooks.%s must be a list of hook definitions; got %s",
event_name, type(entries).__name__,
)
continue
for i, raw in enumerate(entries):
spec = _parse_single_entry(event_name, i, raw)
if spec is not None:
specs.append(spec)
return specs
def _parse_single_entry(
event: str, index: int, raw: Any,
) -> Optional[ShellHookSpec]:
if not isinstance(raw, dict):
logger.warning(
"hooks.%s[%d] must be a mapping with a 'command' key; got %s",
event, index, type(raw).__name__,
)
return None
command = raw.get("command")
if not isinstance(command, str) or not command.strip():
logger.warning(
"hooks.%s[%d] is missing a non-empty 'command' field",
event, index,
)
return None
matcher = raw.get("matcher")
if matcher is not None and not isinstance(matcher, str):
logger.warning(
"hooks.%s[%d].matcher must be a string regex; ignoring",
event, index,
)
matcher = None
if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
logger.warning(
"hooks.%s[%d].matcher=%r will be ignored at runtime — the "
"matcher field is only honored for pre_tool_call / "
"post_tool_call. The hook will fire on every %s event.",
event, index, matcher, event,
)
matcher = None
timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
try:
timeout = int(timeout_raw)
except (TypeError, ValueError):
logger.warning(
"hooks.%s[%d].timeout must be an int (got %r); using default %ds",
event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
)
timeout = DEFAULT_TIMEOUT_SECONDS
if timeout < 1:
logger.warning(
"hooks.%s[%d].timeout must be >=1; using default %ds",
event, index, DEFAULT_TIMEOUT_SECONDS,
)
timeout = DEFAULT_TIMEOUT_SECONDS
if timeout > MAX_TIMEOUT_SECONDS:
logger.warning(
"hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
event, index, timeout, MAX_TIMEOUT_SECONDS,
)
timeout = MAX_TIMEOUT_SECONDS
return ShellHookSpec(
event=event,
command=command.strip(),
matcher=matcher,
timeout=timeout,
)
# ---------------------------------------------------------------------------
# Subprocess callback
# ---------------------------------------------------------------------------
_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
"""Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
Returns a diagnostic dict with the same keys for every outcome
(``returncode``, ``stdout``, ``stderr``, ``timed_out``,
``elapsed_seconds``, ``error``). This is the single place the
subprocess is actually invoked both the live callback path
(:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
go through it.
"""
result: Dict[str, Any] = {
"returncode": None,
"stdout": "",
"stderr": "",
"timed_out": False,
"elapsed_seconds": 0.0,
"error": None,
}
try:
argv = shlex.split(os.path.expanduser(spec.command))
except ValueError as exc:
result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
return result
if not argv:
result["error"] = "empty command"
return result
t0 = time.monotonic()
try:
proc = subprocess.run(
argv,
input=stdin_json,
capture_output=True,
timeout=spec.timeout,
text=True,
shell=False,
)
except subprocess.TimeoutExpired:
result["timed_out"] = True
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
return result
except FileNotFoundError:
result["error"] = "command not found"
return result
except PermissionError:
result["error"] = "command not executable"
return result
except Exception as exc: # pragma: no cover — defensive
result["error"] = str(exc)
return result
result["returncode"] = proc.returncode
result["stdout"] = proc.stdout or ""
result["stderr"] = proc.stderr or ""
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
return result
def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
"""Build the closure that ``invoke_hook()`` will call per firing."""
def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
# Matcher gate — only meaningful for tool-scoped events.
if spec.event in ("pre_tool_call", "post_tool_call"):
if not spec.matches_tool(kwargs.get("tool_name")):
return None
r = _spawn(spec, _serialize_payload(spec.event, kwargs))
if r["error"]:
logger.warning(
"shell hook failed (event=%s command=%s): %s",
spec.event, spec.command, r["error"],
)
return None
if r["timed_out"]:
logger.warning(
"shell hook timed out after %.2fs (event=%s command=%s)",
r["elapsed_seconds"], spec.event, spec.command,
)
return None
stderr = r["stderr"].strip()
if stderr:
logger.debug(
"shell hook stderr (event=%s command=%s): %s",
spec.event, spec.command, stderr[:400],
)
# Non-zero exits: log but still parse stdout so scripts that
# signal failure via exit code can also return a block directive.
if r["returncode"] != 0:
logger.warning(
"shell hook exited %d (event=%s command=%s); stderr=%s",
r["returncode"], spec.event, spec.command, stderr[:400],
)
return _parse_response(spec.event, r["stdout"])
_callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
_callback.__qualname__ = _callback.__name__
return _callback
def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
"""Render the stdin JSON payload. Unserialisable values are
stringified via ``default=str`` rather than dropped."""
extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
try:
cwd = str(Path.cwd())
except OSError:
cwd = ""
payload = {
"hook_event_name": event,
"tool_name": kwargs.get("tool_name"),
"tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
"session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
"cwd": cwd,
"extra": extras,
}
return json.dumps(payload, ensure_ascii=False, default=str)
def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
"""Translate stdout JSON into a Hermes wire-shape dict.
For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
"reason": "..."}`` payload is translated into the canonical Hermes
``{"action": "block", "message": "..."}`` shape expected by
:func:`hermes_cli.plugins.get_pre_tool_call_block_message`. This is
the single most important correctness invariant in this module
skipping the translation silently breaks every ``pre_tool_call``
block directive.
For ``pre_llm_call``, ``{"context": "..."}`` is passed through
unchanged to match the existing plugin-hook contract.
Anything else returns ``None``.
"""
stdout = (stdout or "").strip()
if not stdout:
return None
try:
data = json.loads(stdout)
except json.JSONDecodeError:
logger.warning(
"shell hook stdout was not valid JSON (event=%s): %s",
event, stdout[:200],
)
return None
if not isinstance(data, dict):
return None
if event == "pre_tool_call":
if data.get("action") == "block":
message = data.get("message") or data.get("reason") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
if data.get("decision") == "block":
message = data.get("reason") or data.get("message") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
return None
context = data.get("context")
if isinstance(context, str) and context.strip():
return {"context": context}
return None
# ---------------------------------------------------------------------------
# Allowlist / consent
# ---------------------------------------------------------------------------
def allowlist_path() -> Path:
"""Path to the per-user shell-hook allowlist file."""
return get_hermes_home() / ALLOWLIST_FILENAME
def load_allowlist() -> Dict[str, Any]:
"""Return the parsed allowlist, or an empty skeleton if absent."""
try:
raw = json.loads(allowlist_path().read_text())
except (FileNotFoundError, json.JSONDecodeError, OSError):
return {"approvals": []}
if not isinstance(raw, dict):
return {"approvals": []}
approvals = raw.get("approvals")
if not isinstance(approvals, list):
raw["approvals"] = []
return raw
def save_allowlist(data: Dict[str, Any]) -> None:
"""Atomically persist the allowlist via per-process ``mkstemp`` +
``os.replace``. Cross-process read-modify-write races are handled
by :func:`_locked_update_approvals` (``fcntl.flock``). On OSError
the failure is logged; the in-process hook still registers but
the approval won't survive across runs."""
p = allowlist_path()
try:
p.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(
prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
)
try:
with os.fdopen(fd, "w") as fh:
fh.write(json.dumps(data, indent=2, sort_keys=True))
os.replace(tmp_path, p)
except Exception:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
except OSError as exc:
logger.warning(
"Failed to persist shell hook allowlist to %s: %s. "
"The approval is in-memory for this run, but the next "
"startup will re-prompt (or skip registration on non-TTY "
"runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
p, exc,
)
def _is_allowlisted(event: str, command: str) -> bool:
data = load_allowlist()
return any(
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
for e in data.get("approvals", [])
)
@contextmanager
def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
"""Serialise read-modify-write on the allowlist across processes.
Holds an exclusive ``flock`` on a sibling lock file for the duration
of the update so concurrent ``_record_approval``/``revoke`` callers
cannot clobber each other's changes (the race Codex reproduced with
2050 simultaneous writers). Falls back to an in-process lock on
platforms without ``fcntl``.
"""
p = allowlist_path()
p.parent.mkdir(parents=True, exist_ok=True)
lock_path = p.with_suffix(p.suffix + ".lock")
if fcntl is None: # pragma: no cover — non-POSIX fallback
with _allowlist_write_lock:
data = load_allowlist()
yield data
save_allowlist(data)
return
with open(lock_path, "a+") as lock_fh:
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
try:
data = load_allowlist()
yield data
save_allowlist(data)
finally:
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
def _prompt_and_record(
event: str, command: str, *, accept_hooks: bool,
) -> bool:
"""Decide whether to approve an unseen ``(event, command)`` pair.
Returns ``True`` iff the approval was granted and recorded.
"""
if accept_hooks:
_record_approval(event, command)
logger.info(
"shell hook auto-approved via --accept-hooks / env / config: "
"%s -> %s", event, command,
)
return True
if not sys.stdin.isatty():
return False
print(
f"\n⚠ Hermes is about to register a shell hook that will run a\n"
f" command on your behalf.\n\n"
f" Event: {event}\n"
f" Command: {command}\n\n"
f" Commands run with your full user credentials. Only approve\n"
f" commands you trust."
)
try:
answer = input("Allow this hook to run? [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
print() # keep the terminal tidy after ^C
return False
if answer in ("y", "yes"):
_record_approval(event, command)
return True
return False
def _record_approval(event: str, command: str) -> None:
entry = {
"event": event,
"command": command,
"approved_at": _utc_now_iso(),
"script_mtime_at_approval": script_mtime_iso(command),
}
with _locked_update_approvals() as data:
data["approvals"] = [
e for e in data.get("approvals", [])
if not (
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
)
] + [entry]
def _utc_now_iso() -> str:
return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
def revoke(command: str) -> int:
"""Remove every allowlist entry matching ``command``.
Returns the number of entries removed. Does not unregister any
callbacks that are already live on the plugin manager in the current
process restart the CLI / gateway to drop them.
"""
with _locked_update_approvals() as data:
before = len(data.get("approvals", []))
data["approvals"] = [
e for e in data.get("approvals", [])
if not (isinstance(e, dict) and e.get("command") == command)
]
after = len(data["approvals"])
return before - after
_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
".sh", ".bash", ".zsh", ".fish",
".py", ".pyw",
".rb", ".pl", ".lua",
".js", ".mjs", ".cjs", ".ts",
)
def _command_script_path(command: str) -> str:
"""Return the script path from ``command`` for doctor / drift checks.
Prefers a token ending in a known script extension, then a token
containing ``/`` or leading ``~``, then the first token. Handles
``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
common bare-path form.
"""
try:
parts = shlex.split(command)
except ValueError:
return command
if not parts:
return command
for part in parts:
if part.lower().endswith(_SCRIPT_EXTENSIONS):
return part
for part in parts:
if "/" in part or part.startswith("~"):
return part
return parts[0]
# ---------------------------------------------------------------------------
# Helpers for accept-hooks resolution
# ---------------------------------------------------------------------------
def _resolve_effective_accept(
cfg: Dict[str, Any], accept_hooks_arg: bool,
) -> bool:
"""Combine all three opt-in channels into a single boolean.
Precedence (any truthy source flips us on):
1. ``--accept-hooks`` flag (CLI) / explicit argument
2. ``HERMES_ACCEPT_HOOKS`` env var
3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
"""
if accept_hooks_arg:
return True
env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
if env in ("1", "true", "yes", "on"):
return True
cfg_val = cfg.get("hooks_auto_accept", False)
return bool(cfg_val)
# ---------------------------------------------------------------------------
# Introspection (used by `hermes hooks` CLI)
# ---------------------------------------------------------------------------
def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
"""Return the allowlist record for this pair, if any."""
for e in load_allowlist().get("approvals", []):
if (
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
):
return e
return None
def script_mtime_iso(command: str) -> Optional[str]:
"""ISO-8601 mtime of the resolved script path, or ``None`` if the
script is missing."""
path = _command_script_path(command)
if not path:
return None
try:
expanded = os.path.expanduser(path)
return datetime.fromtimestamp(
os.path.getmtime(expanded), tz=timezone.utc,
).isoformat().replace("+00:00", "Z")
except OSError:
return None
def script_is_executable(command: str) -> bool:
"""Return ``True`` iff ``command`` is runnable as configured.
For a bare invocation (``/path/hook.sh``) the script itself must be
executable. For interpreter-prefixed commands (``python3
/path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
to be readable the interpreter doesn't care about the ``X_OK``
bit. Mirrors what ``_spawn`` would actually do at runtime."""
path = _command_script_path(command)
if not path:
return False
expanded = os.path.expanduser(path)
if not os.path.isfile(expanded):
return False
try:
argv = shlex.split(command)
except ValueError:
return False
is_bare_invocation = bool(argv) and argv[0] == path
required = os.X_OK if is_bare_invocation else os.R_OK
return os.access(expanded, required)
def run_once(
spec: ShellHookSpec, kwargs: Dict[str, Any],
) -> Dict[str, Any]:
"""Fire a single shell-hook invocation with a synthetic payload.
Used by ``hermes hooks test`` and ``hermes hooks doctor``.
``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
would pass at runtime. It is routed through :func:`_serialize_payload`
so the synthetic stdin exactly matches what a real hook firing would
produce otherwise scripts tested via ``hermes hooks test`` could
diverge silently from production behaviour.
Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
holding the canonical Hermes-wire-shape response."""
stdin_json = _serialize_payload(spec.event, kwargs)
result = _spawn(spec, stdin_json)
result["parsed"] = _parse_response(spec.event, result["stdout"])
return result
+5 -136
View File
@@ -8,7 +8,6 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
import json
import logging
import re
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
@@ -23,110 +22,6 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
# left as-is so the user can debug them.
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
# Matches inline shell snippets like: !`date +%Y-%m-%d`
# Non-greedy, single-line only — no newlines inside the backticks.
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
# Cap inline-shell output so a runaway command can't blow out the context.
_INLINE_SHELL_MAX_OUTPUT = 4000
def _load_skills_config() -> dict:
"""Load the ``skills`` section of config.yaml (best-effort)."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
skills_cfg = cfg.get("skills")
if isinstance(skills_cfg, dict):
return skills_cfg
except Exception:
logger.debug("Could not read skills config", exc_info=True)
return {}
def _substitute_template_vars(
content: str,
skill_dir: Path | None,
session_id: str | None,
) -> str:
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
Only substitutes tokens for which a concrete value is available
unresolved tokens are left in place so the author can spot them.
"""
if not content:
return content
skill_dir_str = str(skill_dir) if skill_dir else None
def _replace(match: re.Match) -> str:
token = match.group(1)
if token == "HERMES_SKILL_DIR" and skill_dir_str:
return skill_dir_str
if token == "HERMES_SESSION_ID" and session_id:
return str(session_id)
return match.group(0)
return _SKILL_TEMPLATE_RE.sub(_replace, content)
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
"""Execute a single inline-shell snippet and return its stdout (trimmed).
Failures return a short ``[inline-shell error: ...]`` marker instead of
raising, so one bad snippet can't wreck the whole skill message.
"""
try:
completed = subprocess.run(
["bash", "-c", command],
cwd=str(cwd) if cwd else None,
capture_output=True,
text=True,
timeout=max(1, int(timeout)),
check=False,
)
except subprocess.TimeoutExpired:
return f"[inline-shell timeout after {timeout}s: {command}]"
except FileNotFoundError:
return f"[inline-shell error: bash not found]"
except Exception as exc:
return f"[inline-shell error: {exc}]"
output = (completed.stdout or "").rstrip("\n")
if not output and completed.stderr:
output = completed.stderr.rstrip("\n")
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
return output
def _expand_inline_shell(
content: str,
skill_dir: Path | None,
timeout: int,
) -> str:
"""Replace every !`cmd` snippet in ``content`` with its stdout.
Runs each snippet with the skill directory as CWD so relative paths in
the snippet work the way the author expects.
"""
if "!`" not in content:
return content
def _replace(match: re.Match) -> str:
cmd = match.group(1).strip()
if not cmd:
return ""
return _run_inline_shell(cmd, skill_dir, timeout)
return _INLINE_SHELL_RE.sub(_replace, content)
def build_plan_path(
user_instruction: str = "",
@@ -238,36 +133,14 @@ def _build_skill_message(
activation_note: str,
user_instruction: str = "",
runtime_note: str = "",
session_id: str | None = None,
) -> str:
"""Format a loaded skill into a user/system message payload."""
from tools.skills_tool import SKILLS_DIR
content = str(loaded_skill.get("content") or "")
# ── Template substitution and inline-shell expansion ──
# Done before anything else so downstream blocks (setup notes,
# supporting-file hints) see the expanded content.
skills_cfg = _load_skills_config()
if skills_cfg.get("template_vars", True):
content = _substitute_template_vars(content, skill_dir, session_id)
if skills_cfg.get("inline_shell", False):
timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
content = _expand_inline_shell(content, skill_dir, timeout)
parts = [activation_note, "", content.strip()]
# ── Inject the absolute skill directory so the agent can reference
# bundled scripts without an extra skill_view() round-trip. ──
if skill_dir:
parts.append("")
parts.append(f"[Skill directory: {skill_dir}]")
parts.append(
"Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
"`templates/config.yaml`) against that directory, then run them "
"with the terminal tool using the absolute path."
)
# ── Inject resolved skill config values ──
_inject_skill_config(loaded_skill, parts)
@@ -315,13 +188,11 @@ def _build_skill_message(
# Skill is from an external dir — use the skill name instead
skill_view_target = skill_dir.name
parts.append("")
parts.append("[This skill has supporting files:]")
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
for sf in supporting:
parts.append(f"- {sf} -> {skill_dir / sf}")
parts.append(f"- {sf}")
parts.append(
f'\nLoad any of these with skill_view(name="{skill_view_target}", '
f'file_path="<path>"), or run scripts directly by absolute path '
f"(e.g. `node {skill_dir}/scripts/foo.js`)."
f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
)
if user_instruction:
@@ -345,7 +216,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
_skill_commands = {}
try:
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
from agent.skill_utils import get_external_skills_dirs
disabled = _get_disabled_skill_names()
seen_names: set = set()
@@ -356,7 +227,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
dirs_to_scan.extend(get_external_skills_dirs())
for scan_dir in dirs_to_scan:
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
for skill_md in scan_dir.rglob("SKILL.md"):
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
continue
try:
@@ -461,7 +332,6 @@ def build_skill_invocation_message(
activation_note,
user_instruction=user_instruction,
runtime_note=runtime_note,
session_id=task_id,
)
@@ -500,7 +370,6 @@ def build_preloaded_skills_prompt(
loaded_skill,
skill_dir,
activation_note,
session_id=task_id,
)
)
loaded_names.append(skill_name)
+1 -1
View File
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
Excludes ``.git``, ``.github``, ``.hub`` directories.
"""
matches = []
for root, dirs, files in os.walk(skills_dir, followlinks=True):
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
if filename in files:
matches.append(Path(root) / filename)
+1 -1
View File
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
response = call_llm(
task="title_generation",
messages=messages,
max_tokens=500,
max_tokens=30,
temperature=0.3,
timeout=timeout,
)
-51
View File
@@ -1,51 +0,0 @@
"""Transport layer types and registry for provider response normalization.
Usage:
from agent.transports import get_transport
transport = get_transport("anthropic_messages")
result = transport.normalize_response(raw_response)
"""
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
_REGISTRY: dict = {}
def register_transport(api_mode: str, transport_cls: type) -> None:
"""Register a transport class for an api_mode string."""
_REGISTRY[api_mode] = transport_cls
def get_transport(api_mode: str):
"""Get a transport instance for the given api_mode.
Returns None if no transport is registered for this api_mode.
This allows gradual migration call sites can check for None
and fall back to the legacy code path.
"""
if not _REGISTRY:
_discover_transports()
cls = _REGISTRY.get(api_mode)
if cls is None:
return None
return cls()
def _discover_transports() -> None:
"""Import all transport modules to trigger auto-registration."""
try:
import agent.transports.anthropic # noqa: F401
except ImportError:
pass
try:
import agent.transports.codex # noqa: F401
except ImportError:
pass
try:
import agent.transports.chat_completions # noqa: F401
except ImportError:
pass
try:
import agent.transports.bedrock # noqa: F401
except ImportError:
pass
-177
View File
@@ -1,177 +0,0 @@
"""Anthropic Messages API transport.
Delegates to the existing adapter functions in agent/anthropic_adapter.py.
This transport owns format conversion and normalization NOT client lifecycle.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse
class AnthropicTransport(ProviderTransport):
"""Transport for api_mode='anthropic_messages'.
Wraps the existing functions in anthropic_adapter.py behind the
ProviderTransport ABC. Each method delegates no logic is duplicated.
"""
@property
def api_mode(self) -> str:
return "anthropic_messages"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI messages to Anthropic (system, messages) tuple.
kwargs:
base_url: Optional[str] affects thinking signature handling.
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
base_url = kwargs.get("base_url")
return convert_messages_to_anthropic(messages, base_url=base_url)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Anthropic input_schema format."""
from agent.anthropic_adapter import convert_tools_to_anthropic
return convert_tools_to_anthropic(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Anthropic messages.create() kwargs.
Calls convert_messages and convert_tools internally.
params (all optional):
max_tokens: int
reasoning_config: dict | None
tool_choice: str | None
is_oauth: bool
preserve_dots: bool
context_length: int | None
base_url: str | None
fast_mode: bool
"""
from agent.anthropic_adapter import build_anthropic_kwargs
return build_anthropic_kwargs(
model=model,
messages=messages,
tools=tools,
max_tokens=params.get("max_tokens", 16384),
reasoning_config=params.get("reasoning_config"),
tool_choice=params.get("tool_choice"),
is_oauth=params.get("is_oauth", False),
preserve_dots=params.get("preserve_dots", False),
context_length=params.get("context_length"),
base_url=params.get("base_url"),
fast_mode=params.get("fast_mode", False),
)
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Anthropic response to NormalizedResponse.
Parses content blocks (text, thinking, tool_use), maps stop_reason
to OpenAI finish_reason, and collects reasoning_details in provider_data.
"""
import json
from agent.anthropic_adapter import _to_plain_data
from agent.transports.types import ToolCall
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
_MCP_PREFIX = "mcp_"
text_parts = []
reasoning_parts = []
reasoning_details = []
tool_calls = []
for block in response.content:
if block.type == "text":
text_parts.append(block.text)
elif block.type == "thinking":
reasoning_parts.append(block.thinking)
block_dict = _to_plain_data(block)
if isinstance(block_dict, dict):
reasoning_details.append(block_dict)
elif block.type == "tool_use":
name = block.name
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
name = name[len(_MCP_PREFIX):]
tool_calls.append(
ToolCall(
id=block.id,
name=name,
arguments=json.dumps(block.input),
)
)
finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
provider_data = {}
if reasoning_details:
provider_data["reasoning_details"] = reasoning_details
return NormalizedResponse(
content="\n".join(text_parts) if text_parts else None,
tool_calls=tool_calls or None,
finish_reason=finish_reason,
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
usage=None,
provider_data=provider_data or None,
)
def validate_response(self, response: Any) -> bool:
"""Check Anthropic response structure is valid.
An empty content list is legitimate when ``stop_reason == "end_turn"``
the model's canonical way of signalling "nothing more to add" after
a tool turn that already delivered the user-facing text. Treating it
as invalid falsely retries a completed response.
"""
if response is None:
return False
content_blocks = getattr(response, "content", None)
if not isinstance(content_blocks, list):
return False
if not content_blocks:
return getattr(response, "stop_reason", None) == "end_turn"
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Extract Anthropic cache_read and cache_creation token counts."""
usage = getattr(response, "usage", None)
if usage is None:
return None
cached = getattr(usage, "cache_read_input_tokens", 0) or 0
written = getattr(usage, "cache_creation_input_tokens", 0) or 0
if cached or written:
return {"cached_tokens": cached, "creation_tokens": written}
return None
# Promote the adapter's canonical mapping to module level so it's shared
_STOP_REASON_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Anthropic stop_reason to OpenAI finish_reason."""
return self._STOP_REASON_MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("anthropic_messages", AnthropicTransport)
-89
View File
@@ -1,89 +0,0 @@
"""Abstract base for provider transports.
A transport owns the data path for one api_mode:
convert_messages convert_tools build_kwargs normalize_response
It does NOT own: client construction, streaming, credential refresh,
prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
"""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from agent.transports.types import NormalizedResponse
class ProviderTransport(ABC):
"""Base class for provider-specific format conversion and normalization."""
@property
@abstractmethod
def api_mode(self) -> str:
"""The api_mode string this transport handles (e.g. 'anthropic_messages')."""
...
@abstractmethod
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI-format messages to provider-native format.
Returns provider-specific structure (e.g. (system, messages) for Anthropic,
or the messages list unchanged for chat_completions).
"""
...
@abstractmethod
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI-format tool definitions to provider-native format.
Returns provider-specific tool list (e.g. Anthropic input_schema format).
"""
...
@abstractmethod
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build the complete API call kwargs dict.
This is the primary entry point it typically calls convert_messages()
and convert_tools() internally, then adds model-specific config.
Returns a dict ready to be passed to the provider's SDK client.
"""
...
@abstractmethod
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize a raw provider response to the shared NormalizedResponse type.
This is the only method that returns a transport-layer type.
"""
...
def validate_response(self, response: Any) -> bool:
"""Optional: check if the raw response is structurally valid.
Returns True if valid, False if the response should be treated as invalid.
Default implementation always returns True.
"""
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Optional: extract provider-specific cache hit/creation stats.
Returns dict with 'cached_tokens' and 'creation_tokens', or None.
Default returns None.
"""
return None
def map_finish_reason(self, raw_reason: str) -> str:
"""Optional: map provider-specific stop reason to OpenAI equivalent.
Default returns the raw reason unchanged. Override for providers
with different stop reason vocabularies.
"""
return raw_reason
-154
View File
@@ -1,154 +0,0 @@
"""AWS Bedrock Converse API transport.
Delegates to the existing adapter functions in agent/bedrock_adapter.py.
Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
owns format conversion and normalization, while client construction and
boto3 calls stay on AIAgent.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage
class BedrockTransport(ProviderTransport):
"""Transport for api_mode='bedrock_converse'."""
@property
def api_mode(self) -> str:
return "bedrock_converse"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI messages to Bedrock Converse format."""
from agent.bedrock_adapter import convert_messages_to_converse
return convert_messages_to_converse(messages)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
from agent.bedrock_adapter import convert_tools_to_converse
return convert_tools_to_converse(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Bedrock converse() kwargs.
Calls convert_messages and convert_tools internally.
params:
max_tokens: int output token limit (default 4096)
temperature: float | None
guardrail_config: dict | None Bedrock guardrails
region: str AWS region (default 'us-east-1')
"""
from agent.bedrock_adapter import build_converse_kwargs
region = params.get("region", "us-east-1")
guardrail = params.get("guardrail_config")
kwargs = build_converse_kwargs(
model=model,
messages=messages,
tools=tools,
max_tokens=params.get("max_tokens", 4096),
temperature=params.get("temperature"),
guardrail_config=guardrail,
)
# Sentinel keys for dispatch — agent pops these before the boto3 call
kwargs["__bedrock_converse__"] = True
kwargs["__bedrock_region__"] = region
return kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Bedrock response to NormalizedResponse.
Handles two shapes:
1. Raw boto3 dict (from direct converse() calls)
2. Already-normalized SimpleNamespace with .choices (from dispatch site)
"""
from agent.bedrock_adapter import normalize_converse_response
# Normalize to OpenAI-compatible SimpleNamespace
if hasattr(response, "choices") and response.choices:
# Already normalized at dispatch site
ns = response
else:
# Raw boto3 dict
ns = normalize_converse_response(response)
choice = ns.choices[0]
msg = choice.message
finish_reason = choice.finish_reason or "stop"
tool_calls = None
if msg.tool_calls:
tool_calls = [
ToolCall(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
)
for tc in msg.tool_calls
]
usage = None
if hasattr(ns, "usage") and ns.usage:
u = ns.usage
usage = Usage(
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
total_tokens=getattr(u, "total_tokens", 0) or 0,
)
reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
return NormalizedResponse(
content=msg.content,
tool_calls=tool_calls,
finish_reason=finish_reason,
reasoning=reasoning,
usage=usage,
)
def validate_response(self, response: Any) -> bool:
"""Check Bedrock response structure.
After normalize_converse_response, the response has OpenAI-compatible
.choices same check as chat_completions.
"""
if response is None:
return False
# Raw Bedrock dict response — check for 'output' key
if isinstance(response, dict):
return "output" in response
# Already-normalized SimpleNamespace
if hasattr(response, "choices"):
return bool(response.choices)
return False
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Bedrock stop reason to OpenAI finish_reason.
The adapter already does this mapping inside normalize_converse_response,
so this is only used for direct access to raw responses.
"""
_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"guardrail_intervened": "content_filter",
"content_filtered": "content_filter",
}
return _MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("bedrock_converse", BedrockTransport)
-393
View File
@@ -1,393 +0,0 @@
"""OpenAI Chat Completions transport.
Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
Messages and tools are already in OpenAI format convert_messages and
convert_tools are near-identity. The complexity lives in build_kwargs
which has provider-specific conditionals for max_tokens defaults,
reasoning configuration, temperature handling, and extra_body assembly.
"""
import copy
from typing import Any, Dict, List, Optional
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage
class ChatCompletionsTransport(ProviderTransport):
"""Transport for api_mode='chat_completions'.
The default path for OpenAI-compatible providers.
"""
@property
def api_mode(self) -> str:
return "chat_completions"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
"""Messages are already in OpenAI format — sanitize Codex leaks only.
Strips Codex Responses API fields (``codex_reasoning_items`` on the
message, ``call_id``/``response_item_id`` on tool_calls) that strict
chat-completions providers reject with 400/422.
"""
needs_sanitize = False
for msg in messages:
if not isinstance(msg, dict):
continue
if "codex_reasoning_items" in msg:
needs_sanitize = True
break
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
needs_sanitize = True
break
if needs_sanitize:
break
if not needs_sanitize:
return messages
sanitized = copy.deepcopy(messages)
for msg in sanitized:
if not isinstance(msg, dict):
continue
msg.pop("codex_reasoning_items", None)
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict):
tc.pop("call_id", None)
tc.pop("response_item_id", None)
return sanitized
def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Tools are already in OpenAI format — identity."""
return tools
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build chat.completions.create() kwargs.
This is the most complex transport method it handles ~16 providers
via params rather than subclasses.
params:
timeout: float API call timeout
max_tokens: int | None user-configured max tokens
ephemeral_max_output_tokens: int | None one-shot override (error recovery)
max_tokens_param_fn: callable returns {max_tokens: N} or {max_completion_tokens: N}
reasoning_config: dict | None
request_overrides: dict | None
session_id: str | None
qwen_session_metadata: dict | None {sessionId, promptId} precomputed
model_lower: str lowercase model name for pattern matching
# Provider detection flags (all optional, default False)
is_openrouter: bool
is_nous: bool
is_qwen_portal: bool
is_github_models: bool
is_nvidia_nim: bool
is_kimi: bool
is_custom_provider: bool
ollama_num_ctx: int | None
# Provider routing
provider_preferences: dict | None
# Qwen-specific
qwen_prepare_fn: callable | None runs AFTER codex sanitization
qwen_prepare_inplace_fn: callable | None in-place variant for deepcopied lists
# Temperature
fixed_temperature: Any from _fixed_temperature_for_model()
omit_temperature: bool
# Reasoning
supports_reasoning: bool
github_reasoning_extra: dict | None
# Claude on OpenRouter/Nous max output
anthropic_max_output: int | None
# Extra
extra_body_additions: dict | None pre-built extra_body entries
"""
# Codex sanitization: drop reasoning_items / call_id / response_item_id
sanitized = self.convert_messages(messages)
# Qwen portal prep AFTER codex sanitization. If sanitize already
# deepcopied, reuse that copy via the in-place variant to avoid a
# second deepcopy.
is_qwen = params.get("is_qwen_portal", False)
if is_qwen:
qwen_prep = params.get("qwen_prepare_fn")
qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
if sanitized is messages:
if qwen_prep is not None:
sanitized = qwen_prep(sanitized)
else:
# Already deepcopied — transform in place
if qwen_prep_inplace is not None:
qwen_prep_inplace(sanitized)
elif qwen_prep is not None:
sanitized = qwen_prep(sanitized)
# Developer role swap for GPT-5/Codex models
model_lower = params.get("model_lower", (model or "").lower())
if (
sanitized
and isinstance(sanitized[0], dict)
and sanitized[0].get("role") == "system"
and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
):
sanitized = list(sanitized)
sanitized[0] = {**sanitized[0], "role": "developer"}
api_kwargs: Dict[str, Any] = {
"model": model,
"messages": sanitized,
}
timeout = params.get("timeout")
if timeout is not None:
api_kwargs["timeout"] = timeout
# Temperature
fixed_temp = params.get("fixed_temperature")
omit_temp = params.get("omit_temperature", False)
if omit_temp:
api_kwargs.pop("temperature", None)
elif fixed_temp is not None:
api_kwargs["temperature"] = fixed_temp
# Qwen metadata (caller precomputes {sessionId, promptId})
qwen_meta = params.get("qwen_session_metadata")
if qwen_meta and is_qwen:
api_kwargs["metadata"] = qwen_meta
# Tools
if tools:
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
# etc.) compatible, in addition to direct moonshot.ai endpoints.
if is_moonshot_model(model):
tools = sanitize_moonshot_tools(tools)
api_kwargs["tools"] = tools
# max_tokens resolution — priority: ephemeral > user > provider default
max_tokens_fn = params.get("max_tokens_param_fn")
ephemeral = params.get("ephemeral_max_output_tokens")
max_tokens = params.get("max_tokens")
anthropic_max_out = params.get("anthropic_max_output")
is_nvidia_nim = params.get("is_nvidia_nim", False)
is_kimi = params.get("is_kimi", False)
reasoning_config = params.get("reasoning_config")
if ephemeral is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(ephemeral))
elif max_tokens is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(max_tokens))
elif is_nvidia_nim and max_tokens_fn:
api_kwargs.update(max_tokens_fn(16384))
elif is_qwen and max_tokens_fn:
api_kwargs.update(max_tokens_fn(65536))
elif is_kimi and max_tokens_fn:
# Kimi/Moonshot: 32000 matches Kimi CLI's default
api_kwargs.update(max_tokens_fn(32000))
elif anthropic_max_out is not None:
api_kwargs["max_tokens"] = anthropic_max_out
# Kimi: top-level reasoning_effort (unless thinking disabled)
if is_kimi:
_kimi_thinking_off = bool(
reasoning_config
and isinstance(reasoning_config, dict)
and reasoning_config.get("enabled") is False
)
if not _kimi_thinking_off:
_kimi_effort = "medium"
if reasoning_config and isinstance(reasoning_config, dict):
_e = (reasoning_config.get("effort") or "").strip().lower()
if _e in ("low", "medium", "high"):
_kimi_effort = _e
api_kwargs["reasoning_effort"] = _kimi_effort
# extra_body assembly
extra_body: Dict[str, Any] = {}
is_openrouter = params.get("is_openrouter", False)
is_nous = params.get("is_nous", False)
is_github_models = params.get("is_github_models", False)
provider_prefs = params.get("provider_preferences")
if provider_prefs and is_openrouter:
extra_body["provider"] = provider_prefs
# Kimi extra_body.thinking
if is_kimi:
_kimi_thinking_enabled = True
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is False:
_kimi_thinking_enabled = False
extra_body["thinking"] = {
"type": "enabled" if _kimi_thinking_enabled else "disabled",
}
# Reasoning
if params.get("supports_reasoning", False):
if is_github_models:
gh_reasoning = params.get("github_reasoning_extra")
if gh_reasoning is not None:
extra_body["reasoning"] = gh_reasoning
else:
if reasoning_config is not None:
rc = dict(reasoning_config)
if is_nous and rc.get("enabled") is False:
pass # omit for Nous when disabled
else:
extra_body["reasoning"] = rc
else:
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
if is_nous:
extra_body["tags"] = ["product=hermes-agent"]
# Ollama num_ctx
ollama_ctx = params.get("ollama_num_ctx")
if ollama_ctx:
options = extra_body.get("options", {})
options["num_ctx"] = ollama_ctx
extra_body["options"] = options
# Ollama/custom think=false
if params.get("is_custom_provider", False):
if reasoning_config and isinstance(reasoning_config, dict):
_effort = (reasoning_config.get("effort") or "").strip().lower()
_enabled = reasoning_config.get("enabled", True)
if _effort == "none" or _enabled is False:
extra_body["think"] = False
if is_qwen:
extra_body["vl_high_resolution_images"] = True
# Merge any pre-built extra_body additions
additions = params.get("extra_body_additions")
if additions:
extra_body.update(additions)
if extra_body:
api_kwargs["extra_body"] = extra_body
# Request overrides last (service_tier etc.)
overrides = params.get("request_overrides")
if overrides:
api_kwargs.update(overrides)
return api_kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
For chat_completions, this is near-identity the response is already
in OpenAI format. extra_content on tool_calls (Gemini thought_signature)
is preserved via ToolCall.provider_data. reasoning_details (OpenRouter
unified format) and reasoning_content (DeepSeek/Moonshot) are also
preserved for downstream replay.
"""
choice = response.choices[0]
msg = choice.message
finish_reason = choice.finish_reason or "stop"
tool_calls = None
if msg.tool_calls:
tool_calls = []
for tc in msg.tool_calls:
# Preserve provider-specific extras on the tool call.
# Gemini 3 thinking models attach extra_content with
# thought_signature — without replay on the next turn the API
# rejects the request with 400.
tc_provider_data: Dict[str, Any] = {}
extra = getattr(tc, "extra_content", None)
if extra is None and hasattr(tc, "model_extra"):
extra = (tc.model_extra or {}).get("extra_content")
if extra is not None:
if hasattr(extra, "model_dump"):
try:
extra = extra.model_dump()
except Exception:
pass
tc_provider_data["extra_content"] = extra
tool_calls.append(ToolCall(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
provider_data=tc_provider_data or None,
))
usage = None
if hasattr(response, "usage") and response.usage:
u = response.usage
usage = Usage(
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
total_tokens=getattr(u, "total_tokens", 0) or 0,
)
# Preserve reasoning fields separately. DeepSeek/Moonshot use
# ``reasoning_content``; others use ``reasoning``. Downstream code
# (_extract_reasoning, thinking-prefill retry) reads both distinctly,
# so keep them apart in provider_data rather than merging.
reasoning = getattr(msg, "reasoning", None)
reasoning_content = getattr(msg, "reasoning_content", None)
provider_data: Dict[str, Any] = {}
if reasoning_content:
provider_data["reasoning_content"] = reasoning_content
rd = getattr(msg, "reasoning_details", None)
if rd:
provider_data["reasoning_details"] = rd
return NormalizedResponse(
content=msg.content,
tool_calls=tool_calls,
finish_reason=finish_reason,
reasoning=reasoning,
usage=usage,
provider_data=provider_data or None,
)
def validate_response(self, response: Any) -> bool:
"""Check that response has valid choices."""
if response is None:
return False
if not hasattr(response, "choices") or response.choices is None:
return False
if not response.choices:
return False
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
usage = getattr(response, "usage", None)
if usage is None:
return None
details = getattr(usage, "prompt_tokens_details", None)
if details is None:
return None
cached = getattr(details, "cached_tokens", 0) or 0
written = getattr(details, "cache_write_tokens", 0) or 0
if cached or written:
return {"cached_tokens": cached, "creation_tokens": written}
return None
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("chat_completions", ChatCompletionsTransport)
-217
View File
@@ -1,217 +0,0 @@
"""OpenAI Responses API (Codex) transport.
Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
This transport owns format conversion and normalization NOT client lifecycle,
streaming, or the _run_codex_stream() call path.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage
class ResponsesApiTransport(ProviderTransport):
"""Transport for api_mode='codex_responses'.
Wraps the functions extracted into codex_responses_adapter.py (PR 1).
"""
@property
def api_mode(self) -> str:
return "codex_responses"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI chat messages to Responses API input items."""
from agent.codex_responses_adapter import _chat_messages_to_responses_input
return _chat_messages_to_responses_input(messages)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Responses API function definitions."""
from agent.codex_responses_adapter import _responses_tools
return _responses_tools(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Responses API kwargs.
Calls convert_messages and convert_tools internally.
params:
instructions: str system prompt (extracted from messages[0] if not given)
reasoning_config: dict | None {effort, enabled}
session_id: str | None used for prompt_cache_key + xAI conv header
max_tokens: int | None max_output_tokens
request_overrides: dict | None extra kwargs merged in
provider: str | None provider name for backend-specific logic
base_url: str | None endpoint URL
base_url_hostname: str | None hostname for backend detection
is_github_responses: bool Copilot/GitHub models backend
is_codex_backend: bool chatgpt.com/backend-api/codex
is_xai_responses: bool xAI/Grok backend
github_reasoning_extra: dict | None Copilot reasoning params
"""
from agent.codex_responses_adapter import (
_chat_messages_to_responses_input,
_responses_tools,
)
from run_agent import DEFAULT_AGENT_IDENTITY
instructions = params.get("instructions", "")
payload_messages = messages
if not instructions:
if messages and messages[0].get("role") == "system":
instructions = str(messages[0].get("content") or "").strip()
payload_messages = messages[1:]
if not instructions:
instructions = DEFAULT_AGENT_IDENTITY
is_github_responses = params.get("is_github_responses", False)
is_codex_backend = params.get("is_codex_backend", False)
is_xai_responses = params.get("is_xai_responses", False)
# Resolve reasoning effort
reasoning_effort = "medium"
reasoning_enabled = True
reasoning_config = params.get("reasoning_config")
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is False:
reasoning_enabled = False
elif reasoning_config.get("effort"):
reasoning_effort = reasoning_config["effort"]
_effort_clamp = {"minimal": "low"}
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
kwargs = {
"model": model,
"instructions": instructions,
"input": _chat_messages_to_responses_input(payload_messages),
"tools": _responses_tools(tools),
"tool_choice": "auto",
"parallel_tool_calls": True,
"store": False,
}
session_id = params.get("session_id")
if not is_github_responses and session_id:
kwargs["prompt_cache_key"] = session_id
if reasoning_enabled and is_xai_responses:
kwargs["include"] = ["reasoning.encrypted_content"]
elif reasoning_enabled:
if is_github_responses:
github_reasoning = params.get("github_reasoning_extra")
if github_reasoning is not None:
kwargs["reasoning"] = github_reasoning
else:
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
kwargs["include"] = ["reasoning.encrypted_content"]
elif not is_github_responses and not is_xai_responses:
kwargs["include"] = []
request_overrides = params.get("request_overrides")
if request_overrides:
kwargs.update(request_overrides)
max_tokens = params.get("max_tokens")
if max_tokens is not None and not is_codex_backend:
kwargs["max_output_tokens"] = max_tokens
if is_xai_responses and session_id:
kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
return kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Codex Responses API response to NormalizedResponse."""
from agent.codex_responses_adapter import (
_normalize_codex_response,
_extract_responses_message_text,
_extract_responses_reasoning_text,
)
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
msg, finish_reason = _normalize_codex_response(response)
tool_calls = None
if msg and msg.tool_calls:
tool_calls = []
for tc in msg.tool_calls:
provider_data = {}
if hasattr(tc, "call_id") and tc.call_id:
provider_data["call_id"] = tc.call_id
if hasattr(tc, "response_item_id") and tc.response_item_id:
provider_data["response_item_id"] = tc.response_item_id
tool_calls.append(ToolCall(
id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
provider_data=provider_data or None,
))
# Extract reasoning items for provider_data
provider_data = {}
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
provider_data["reasoning_details"] = msg.reasoning_details
return NormalizedResponse(
content=msg.content if msg else None,
tool_calls=tool_calls,
finish_reason=finish_reason or "stop",
reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
usage=None, # Codex usage is extracted separately in normalize_usage()
provider_data=provider_data or None,
)
def validate_response(self, response: Any) -> bool:
"""Check Codex Responses API response has valid output structure.
Returns True only if response.output is a non-empty list.
Does NOT check output_text fallback the caller handles that
with diagnostic logging for stream backfill recovery.
"""
if response is None:
return False
output = getattr(response, "output", None)
if not isinstance(output, list) or not output:
return False
return True
def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
"""Validate and sanitize Codex API kwargs before the call.
Normalizes input items, strips unsupported fields, validates structure.
"""
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Codex response.status to OpenAI finish_reason.
Codex uses response.status ('completed', 'incomplete') +
response.incomplete_details.reason for granular mapping.
This method handles the simple status string; the caller
should check incomplete_details separately for 'max_output_tokens'.
"""
_MAP = {
"completed": "stop",
"incomplete": "length",
"failed": "stop",
"cancelled": "stop",
}
return _MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("codex_responses", ResponsesApiTransport)
-156
View File
@@ -1,156 +0,0 @@
"""Shared types for normalized provider responses.
These dataclasses define the canonical shape that all provider adapters
normalize responses to. The shared surface is intentionally minimal
only fields that every downstream consumer reads are top-level.
Protocol-specific state goes in ``provider_data`` dicts (response-level
and per-tool-call) so that protocol-aware code paths can access it
without polluting the shared type.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class ToolCall:
"""A normalized tool call from any provider.
``id`` is the protocol's canonical identifier — what gets used in
``tool_call_id`` / ``tool_use_id`` when constructing tool result
messages. May be ``None`` when the provider omits it; the agent
fills it via ``_deterministic_call_id()`` before storing in history.
``provider_data`` carries per-tool-call protocol metadata that only
protocol-aware code reads:
* Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
* Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
* Others: ``None``
"""
id: Optional[str]
name: str
arguments: str # JSON string
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
# ── Backward compatibility ──────────────────────────────────
# The agent loop reads tc.function.name / tc.function.arguments
# throughout run_agent.py (45+ sites). These properties let
# NormalizedResponse pass through without the _nr_to_assistant_message
# shim, while keeping ToolCall's canonical fields flat.
@property
def type(self) -> str:
return "function"
@property
def function(self) -> "ToolCall":
"""Return self so tc.function.name / tc.function.arguments work."""
return self
@property
def call_id(self) -> Optional[str]:
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
return (self.provider_data or {}).get("call_id")
@property
def response_item_id(self) -> Optional[str]:
"""Codex response_item_id from provider_data."""
return (self.provider_data or {}).get("response_item_id")
@property
def extra_content(self) -> Optional[Dict[str, Any]]:
"""Gemini extra_content (thought_signature) from provider_data.
Gemini 3 thinking models attach ``extra_content`` with a
``thought_signature`` to each tool call. This signature must be
replayed on subsequent API calls without it the API rejects the
request with HTTP 400. The chat_completions transport stores this
in ``provider_data["extra_content"]``; this property exposes it so
``_build_assistant_message`` can ``getattr(tc, "extra_content")``
uniformly.
"""
return (self.provider_data or {}).get("extra_content")
@dataclass
class Usage:
"""Token usage from an API response."""
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
cached_tokens: int = 0
@dataclass
class NormalizedResponse:
"""Normalized API response from any provider.
Shared fields are truly cross-provider every caller can rely on
them without branching on api_mode. Protocol-specific state goes in
``provider_data`` so that only protocol-aware code paths read it.
Response-level ``provider_data`` examples:
* Anthropic: ``{"reasoning_details": [...]}``
* Codex: ``{"codex_reasoning_items": [...]}``
* Others: ``None``
"""
content: Optional[str]
tool_calls: Optional[List[ToolCall]]
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
reasoning: Optional[str] = None
usage: Optional[Usage] = None
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
# ── Backward compatibility ──────────────────────────────────
# The shim _nr_to_assistant_message() mapped these from provider_data.
# These properties let NormalizedResponse pass through directly.
@property
def reasoning_content(self) -> Optional[str]:
pd = self.provider_data or {}
return pd.get("reasoning_content")
@property
def reasoning_details(self):
pd = self.provider_data or {}
return pd.get("reasoning_details")
@property
def codex_reasoning_items(self):
pd = self.provider_data or {}
return pd.get("codex_reasoning_items")
# ---------------------------------------------------------------------------
# Factory helpers
# ---------------------------------------------------------------------------
def build_tool_call(
id: Optional[str],
name: str,
arguments: Any,
**provider_fields: Any,
) -> ToolCall:
"""Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
Any extra keyword arguments are collected into ``provider_data``.
"""
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
pd = dict(provider_fields) if provider_fields else None
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
"""Translate a provider-specific stop reason to the normalised set.
Falls back to ``"stop"`` for unknown or ``None`` reasons.
"""
if reason is None:
return "stop"
return mapping.get(reason, "stop")
+1 -14
View File
@@ -6,7 +6,6 @@ from decimal import Decimal
from typing import Any, Dict, Literal, Optional
from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
from utils import base_url_host_matches
DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
@@ -394,7 +393,7 @@ def resolve_billing_route(
if provider_name == "openai-codex":
return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
if provider_name == "openrouter" or "openrouter.ai" in base:
return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
if provider_name == "anthropic":
return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
@@ -533,22 +532,10 @@ def normalize_usage(
prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
details = getattr(response_usage, "prompt_tokens_details", None)
# Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
# top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
# AI Gateway, Cline) expose when routing Claude models — without this
# fallback, cache writes are undercounted as 0 and cache reads can be
# missed when the proxy only surfaces them at the top level.
# Port of cline/cline#10266.
cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
if not cache_read_tokens:
cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
cache_write_tokens = _to_int(
getattr(details, "cache_write_tokens", 0) if details else 0
)
if not cache_write_tokens:
cache_write_tokens = _to_int(
getattr(response_usage, "cache_creation_input_tokens", 0)
)
input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)
reasoning_tokens = 0
+4 -5
View File
@@ -444,7 +444,6 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
if not reasoning.get("has_any_reasoning", True):
print(f" 🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
discarded_no_reasoning += 1
completed_in_batch.append(prompt_index)
continue
# Get and normalize tool stats for consistent schema across all entries
@@ -1190,12 +1189,12 @@ def main(
"""
# Handle list distributions
if list_distributions:
from toolset_distributions import print_distribution_info
from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
print("📊 Available Toolset Distributions")
print("=" * 70)
all_dists = list_distributions()
all_dists = get_all_dists()
for dist_name in sorted(all_dists.keys()):
print_distribution_info(dist_name)
+2 -48
View File
@@ -507,13 +507,6 @@ agent:
# finish, then interrupts anything still running after this timeout.
# 0 = no drain, interrupt immediately.
# restart_drain_timeout: 60
# Max app-level retry attempts for API errors (connection drops, provider
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
# to 1 if you use fallback providers and want fast failover on flaky
# primaries (default 3). The OpenAI SDK does its own low-level retries
# underneath this wrapper — this is the Hermes-level loop.
# api_max_retries: 3
# Enable verbose logging
verbose: false
@@ -777,13 +770,10 @@ code_execution:
# Subagent Delegation
# =============================================================================
# The delegate_task tool spawns child agents with isolated context.
# Supports single tasks and batch mode (default 3 parallel, configurable).
# Supports single tasks and batch mode (up to 3 parallel).
delegation:
max_iterations: 50 # Max tool-calling turns per child (default: 50)
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
# # Resolves full credentials (base_url, api_key) automatically.
@@ -927,39 +917,3 @@ display:
# # Names and usernames are NOT affected (user-chosen, publicly visible).
# # Routing/delivery still uses the original values internally.
# redact_pii: false
# =============================================================================
# Shell-script hooks
# =============================================================================
# Register shell scripts as plugin-hook callbacks. Each entry is executed as
# a subprocess (shell=False, shlex.split) with a JSON payload on stdin. On
# stdout the script may return JSON that either blocks the tool call or
# injects context into the next LLM call.
#
# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
# pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
# pre_api_request, post_api_request, on_session_start, on_session_end,
# on_session_finalize, on_session_reset, subagent_stop
#
# First-use consent: each (event, command) pair prompts once on a TTY, then
# is persisted to ~/.hermes/shell-hooks-allowlist.json. Non-interactive
# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
# hooks_auto_accept key below.
#
# See website/docs/user-guide/features/hooks.md for the full JSON wire
# protocol and worked examples.
#
# hooks:
# pre_tool_call:
# - matcher: "terminal"
# command: "~/.hermes/agent-hooks/block-rm-rf.sh"
# timeout: 10
# post_tool_call:
# - matcher: "write_file|patch"
# command: "~/.hermes/agent-hooks/auto-format.sh"
# pre_llm_call:
# - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
# subagent_stop:
# - command: "~/.hermes/agent-hooks/log-orchestration.sh"
#
# hooks_auto_accept: false
+100 -391
View File
@@ -19,14 +19,12 @@ import shutil
import sys
import json
import re
import concurrent.futures
import base64
import atexit
import tempfile
import time
import uuid
import textwrap
from urllib.parse import unquote, urlparse
from contextlib import contextmanager
from pathlib import Path
from datetime import datetime
@@ -67,7 +65,6 @@ from agent.usage_pricing import (
format_duration_compact,
format_token_count_compact,
)
from agent.account_usage import fetch_account_usage, render_account_usage_lines
from hermes_cli.banner import _format_context_length, format_banner_version_label
_COMMAND_SPINNER_FRAMES = ("", "", "", "", "", "", "", "", "", "")
@@ -77,7 +74,6 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
# User-managed env files should override stale shell exports on restart.
from hermes_constants import get_hermes_home, display_hermes_home
from hermes_cli.env_loader import load_hermes_dotenv
from utils import base_url_host_matches
_hermes_home = get_hermes_home()
_project_env = Path(__file__).parent / '.env'
@@ -108,11 +104,6 @@ def _strip_reasoning_tags(text: str) -> str:
``<thought>`` (Gemma 4). Must stay in sync with
``run_agent.py::_strip_think_blocks`` and the stream consumer's
``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
Also strips tool-call XML blocks some open models leak into visible
content (``<tool_call>``, ``<function_calls>``, Gemma-style
``<function name=""></function>``). Ported from
openclaw/openclaw#67318.
"""
cleaned = text
for tag in _REASONING_TAGS:
@@ -137,31 +128,6 @@ def _strip_reasoning_tags(text: str) -> str:
cleaned,
flags=re.IGNORECASE,
)
# Tool-call XML blocks (openclaw/openclaw#67318).
for tc_tag in ("tool_call", "tool_calls", "tool_result",
"function_call", "function_calls"):
cleaned = re.sub(
rf"<{tc_tag}\b[^>]*>.*?</{tc_tag}>\s*",
"",
cleaned,
flags=re.DOTALL | re.IGNORECASE,
)
# <function name="..."> — boundary + attribute gated to avoid prose FPs.
cleaned = re.sub(
r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
r'<function\b[^>]*\bname\s*=[^>]*>'
r'(?:(?:(?!</function>).)*)</function>\s*',
'',
cleaned,
flags=re.DOTALL | re.IGNORECASE,
)
# Stray tool-call close tags.
cleaned = re.sub(
r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
'',
cleaned,
flags=re.IGNORECASE,
)
return cleaned.strip()
@@ -305,23 +271,13 @@ def load_cli_config() -> Dict[str, Any]:
Environment variables take precedence over config file values.
Returns default values if no config file exists.
If HERMES_IGNORE_USER_CONFIG=1 is set (via ``hermes chat --ignore-user-config``),
the user config at ``~/.hermes/config.yaml`` is skipped entirely and only the
built-in defaults plus the project-level ``cli-config.yaml`` (if any) are used.
Credentials in ``.env`` are still loaded this flag only suppresses
behavioral/config settings.
"""
# Check user config first ({HERMES_HOME}/config.yaml)
user_config_path = _hermes_home / 'config.yaml'
project_config_path = Path(__file__).parent / 'cli-config.yaml'
# --ignore-user-config: force-skip the user config.yaml (still honor project
# config as a fallback so defaults stay sensible).
ignore_user_config = os.environ.get("HERMES_IGNORE_USER_CONFIG") == "1"
# Use user config if it exists, otherwise project config
if user_config_path.exists() and not ignore_user_config:
if user_config_path.exists():
config_path = user_config_path
else:
config_path = project_config_path
@@ -411,6 +367,7 @@ def load_cli_config() -> Dict[str, Any]:
},
"delegation": {
"max_iterations": 45, # Max tool-calling turns per child agent
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
"model": "", # Subagent model override (empty = inherit parent model)
"provider": "", # Subagent provider override (empty = inherit parent provider)
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
@@ -571,6 +528,7 @@ def load_cli_config() -> Dict[str, Any]:
if _file_has_terminal_config or env_var not in os.environ:
val = terminal_config[config_key]
if isinstance(val, list):
import json
os.environ[env_var] = json.dumps(val)
else:
os.environ[env_var] = str(val)
@@ -954,32 +912,6 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
print(f"\033[32m✓ Worktree cleaned up: {wt_path}\033[0m")
def _run_state_db_auto_maintenance(session_db) -> None:
"""Call ``SessionDB.maybe_auto_prune_and_vacuum`` using current config.
Reads the ``sessions:`` section from config.yaml via
:func:`hermes_cli.config.load_config` (the authoritative loader that
deep-merges DEFAULT_CONFIG, so unmigrated configs still get default
values). Honours ``auto_prune`` / ``retention_days`` /
``vacuum_after_prune`` / ``min_interval_hours``, and delegates to the
DB. Never raises maintenance must never block interactive startup.
"""
if session_db is None:
return
try:
from hermes_cli.config import load_config as _load_full_config
cfg = (_load_full_config().get("sessions") or {})
if not cfg.get("auto_prune", False):
return
session_db.maybe_auto_prune_and_vacuum(
retention_days=int(cfg.get("retention_days", 90)),
min_interval_hours=int(cfg.get("min_interval_hours", 24)),
vacuum=bool(cfg.get("vacuum_after_prune", True)),
)
except Exception as exc:
logger.debug("state.db auto-maintenance skipped: %s", exc)
def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
"""Remove stale worktrees and orphaned branches on startup.
@@ -1211,6 +1143,8 @@ def _rich_text_from_ansi(text: str) -> _RichText:
def _strip_markdown_syntax(text: str) -> str:
"""Best-effort markdown marker removal for plain-text display."""
import re
plain = _rich_text_from_ansi(text or "").plain
plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE)
plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE)
@@ -1220,11 +1154,11 @@ def _strip_markdown_syntax(text: str) -> str:
plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain)
plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain)
plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain)
plain = re.sub(r"(?<!\w)___([^_]+)___(?!\w)", r"\1", plain)
plain = re.sub(r"___([^_]+)___", r"\1", plain)
plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain)
plain = re.sub(r"(?<!\w)__([^_]+)__(?!\w)", r"\1", plain)
plain = re.sub(r"__([^_]+)__", r"\1", plain)
plain = re.sub(r"\*([^*]+)\*", r"\1", plain)
plain = re.sub(r"(?<!\w)_([^_]+)_(?!\w)", r"\1", plain)
plain = re.sub(r"_([^_]+)_", r"\1", plain)
plain = re.sub(r"~~([^~]+)~~", r"\1", plain)
plain = re.sub(r"\n{3,}", "\n\n", plain)
return plain.strip("\n")
@@ -1337,21 +1271,10 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")):
token = token[1:-1].strip()
token = token.replace('\\ ', ' ')
if not token:
return None
expanded = token
if token.startswith("file://"):
try:
parsed = urlparse(token)
if parsed.scheme == "file":
expanded = unquote(parsed.path or "")
if parsed.netloc and os.name == "nt":
expanded = f"//{parsed.netloc}{expanded}"
except Exception:
expanded = token
expanded = os.path.expandvars(os.path.expanduser(expanded))
expanded = os.path.expandvars(os.path.expanduser(token))
if os.name != "nt":
normalized = expanded.replace("\\", "/")
if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
@@ -1438,7 +1361,6 @@ def _detect_file_drop(user_input: str) -> "dict | None":
or stripped.startswith("~")
or stripped.startswith("./")
or stripped.startswith("../")
or stripped.startswith("file://")
or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
or stripped.startswith('"/')
or stripped.startswith('"~')
@@ -1449,25 +1371,8 @@ def _detect_file_drop(user_input: str) -> "dict | None":
if not starts_like_path:
return None
direct_path = _resolve_attachment_path(stripped)
if direct_path is not None:
return {
"path": direct_path,
"is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS,
"remainder": "",
}
first_token, remainder = _split_path_input(stripped)
drop_path = _resolve_attachment_path(first_token)
if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}:
space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "]
for pos in reversed(space_positions):
candidate = stripped[:pos].rstrip()
resolved = _resolve_attachment_path(candidate)
if resolved is not None:
drop_path = resolved
remainder = stripped[pos + 1 :].strip()
break
if drop_path is None:
return None
@@ -1812,7 +1717,6 @@ class HermesCLI:
resume: str = None,
checkpoints: bool = False,
pass_session_id: bool = False,
ignore_rules: bool = False,
):
"""
Initialize the Hermes CLI.
@@ -1932,7 +1836,7 @@ class HermesCLI:
# Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY,
# custom endpoint → prefer OPENAI_API_KEY (issue #560).
# Note: _ensure_runtime_credentials() re-resolves this before first use.
if self.base_url and base_url_host_matches(self.base_url, "openrouter.ai"):
if self.base_url and "openrouter.ai" in self.base_url:
self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY")
else:
self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
@@ -1966,11 +1870,6 @@ class HermesCLI:
self.checkpoints_enabled = checkpoints or cp_cfg.get("enabled", False)
self.checkpoint_max_snapshots = cp_cfg.get("max_snapshots", 50)
self.pass_session_id = pass_session_id
# --ignore-rules: honor either the constructor flag or the env var set
# by `hermes chat --ignore-rules` in hermes_cli/main.py. When true we
# pass skip_context_files=True and skip_memory=True to AIAgent so
# AGENTS.md/SOUL.md/.cursorrules and persistent memory are not loaded.
self.ignore_rules = ignore_rules or os.environ.get("HERMES_IGNORE_RULES") == "1"
# Ephemeral system prompt: env var takes precedence, then config
self.system_prompt = (
@@ -2033,13 +1932,7 @@ class HermesCLI:
self._session_db = SessionDB()
except Exception as e:
logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e)
# Opportunistic state.db maintenance — runs at most once per
# min_interval_hours, tracked via state_meta in state.db itself so
# it's shared across all Hermes processes for this HERMES_HOME.
# Never blocks startup on failure.
_run_state_db_auto_maintenance(self._session_db)
# Deferred title: stored in memory until the session is created in the DB
self._pending_title: Optional[str] = None
@@ -2108,7 +2001,8 @@ class HermesCLI:
def _invalidate(self, min_interval: float = 0.25) -> None:
"""Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
now = time.monotonic()
import time as _time
now = _time.monotonic()
if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval:
self._last_invalidate = now
self._app.invalidate()
@@ -2326,7 +2220,8 @@ class HermesCLI:
return ""
t0 = getattr(self, "_tool_start_time", 0) or 0
if t0 > 0:
elapsed = time.monotonic() - t0
import time as _time
elapsed = _time.monotonic() - t0
if elapsed >= 60:
_m, _s = int(elapsed // 60), int(elapsed % 60)
elapsed_str = f"{_m}m {_s}s"
@@ -2581,6 +2476,9 @@ class HermesCLI:
def _emit_reasoning_preview(self, reasoning_text: str) -> None:
"""Render a buffered reasoning preview as a single [thinking] block."""
import re
import textwrap
preview_text = reasoning_text.strip()
if not preview_text:
return
@@ -2699,7 +2597,9 @@ class HermesCLI:
"""Expand [Pasted text #N -> file] placeholders into file contents."""
if not isinstance(text, str) or "[Pasted text #" not in text:
return text or ""
paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
import re as _re
paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
def _expand_ref(match):
path = Path(match.group(1))
@@ -3022,7 +2922,9 @@ class HermesCLI:
def _command_spinner_frame(self) -> str:
"""Return the current spinner frame for slow slash commands."""
frame_idx = int(time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
import time as _time
frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
return _COMMAND_SPINNER_FRAMES[frame_idx]
@contextmanager
@@ -3328,8 +3230,6 @@ class HermesCLI:
checkpoints_enabled=self.checkpoints_enabled,
checkpoint_max_snapshots=self.checkpoint_max_snapshots,
pass_session_id=self.pass_session_id,
skip_context_files=self.ignore_rules,
skip_memory=self.ignore_rules,
tool_progress_callback=self._on_tool_progress,
tool_start_callback=self._on_tool_start if self._inline_diffs_enabled else None,
tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
@@ -4035,6 +3935,7 @@ class HermesCLI:
image later with ``vision_analyze`` if needed.
"""
import asyncio as _asyncio
import json as _json
from tools.vision_tools import vision_analyze_tool
analysis_prompt = (
@@ -4054,7 +3955,7 @@ class HermesCLI:
result_json = _asyncio.run(
vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt)
)
result = json.loads(result_json)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "")
enriched_parts.append(
@@ -4309,37 +4210,8 @@ class HermesCLI:
"""
import shlex
from argparse import Namespace
from contextlib import redirect_stdout
from io import StringIO
from hermes_cli.tools_config import tools_disable_enable_command
def _run_capture(ns: Namespace) -> None:
"""Run tools_disable_enable_command, routing its ANSI-colored
print() output through _cprint when inside the interactive TUI
so escapes aren't mangled by patch_stdout's StdoutProxy into
garbled '?[32m...?[0m' text.
Outside the TUI (standalone mode, tests), call straight through
so real stdout / pytest capture works as expected.
"""
# Standalone/tests, run as usual
if getattr(self, "_app", None) is None:
tools_disable_enable_command(ns)
return
# Buffer reports isatty()=True so color() in hermes_cli/colors.py
# still emits ANSI escapes. StringIO.isatty() is False, which
# would otherwise strip all colors before we re-render them.
class _TTYBuf(StringIO):
def isatty(self) -> bool:
return True
buf = _TTYBuf()
with redirect_stdout(buf):
tools_disable_enable_command(ns)
for line in buf.getvalue().splitlines():
_cprint(line)
try:
parts = shlex.split(cmd)
except ValueError:
@@ -4351,7 +4223,8 @@ class HermesCLI:
return
if subcommand == "list":
_run_capture(Namespace(tools_action="list", platform="cli"))
tools_disable_enable_command(
Namespace(tools_action="list", platform="cli"))
return
names = parts[2:]
@@ -4368,7 +4241,8 @@ class HermesCLI:
label = ", ".join(names)
_cprint(f"{_ACCENT}{verb} {label}...{_RST}")
_run_capture(Namespace(tools_action=subcommand, names=names, platform="cli"))
tools_disable_enable_command(
Namespace(tools_action=subcommand, names=names, platform="cli"))
# Reset session so the new tool config is picked up from a clean state
from hermes_cli.tools_config import _get_platform_tools
@@ -5095,7 +4969,7 @@ class HermesCLI:
pass
cache_enabled = (
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
or result.api_mode == "anthropic_messages"
)
if cache_enabled:
@@ -5323,7 +5197,7 @@ class HermesCLI:
# Cache notice
cache_enabled = (
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
or result.api_mode == "anthropic_messages"
)
if cache_enabled:
@@ -5354,30 +5228,6 @@ class HermesCLI:
except Exception:
return False
def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool:
"""Return True when /steer should be dispatched immediately while the agent is running.
/steer MUST bypass the normal _pending_input process_loop path when
the agent is active, because process_loop is blocked inside
self.chat() for the duration of the run. By the time the queued
command is pulled from _pending_input, _agent_running has already
flipped back to False, and process_command() takes the idle
fallback delivering the steer as a next-turn message instead of
injecting it mid-run. Dispatching inline on the UI thread calls
agent.steer() directly, which is thread-safe (uses _pending_steer_lock).
"""
if not text or has_images or not _looks_like_slash_command(text):
return False
if not getattr(self, "_agent_running", False):
return False
try:
from hermes_cli.commands import resolve_command
base = text.split(None, 1)[0].lower().lstrip('/')
cmd = resolve_command(base)
return bool(cmd and cmd.name == "steer")
except Exception:
return False
def _show_model_and_providers(self):
"""Show current model + provider and list all authenticated providers.
@@ -6380,7 +6230,8 @@ class HermesCLI:
# with the output (fixes #2718).
if self._app:
self._app.invalidate()
time.sleep(0.05) # brief pause for refresh
import time as _tmod
_tmod.sleep(0.05) # brief pause for refresh
print()
ChatConsole().print(f"[{_accent_hex()}]{'' * 40}[/]")
_cprint(f" ✅ Background task #{task_num} complete")
@@ -6420,7 +6271,8 @@ class HermesCLI:
# Same TUI refresh pattern as success path (#2718)
if self._app:
self._app.invalidate()
time.sleep(0.05)
import time as _tmod
_tmod.sleep(0.05)
print()
_cprint(f" ❌ Background task #{task_num} failed: {e}")
finally:
@@ -6640,6 +6492,7 @@ class HermesCLI:
_launched = self._try_launch_chrome_debug(_port, _plat.system())
if _launched:
# Wait for the port to come up
import time as _time
for _wait in range(10):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -6649,7 +6502,7 @@ class HermesCLI:
_already_open = True
break
except (OSError, socket.timeout):
time.sleep(0.5)
_time.sleep(0.5)
if _already_open:
print(f" ✓ Chrome launched and listening on port {_port}")
else:
@@ -7129,27 +6982,6 @@ class HermesCLI:
if cost_result.status == "unknown":
print(f" Note: Pricing unknown for {agent.model}")
# Account limits -- fetched off-thread with a hard timeout so slow
# provider APIs don't hang the prompt.
provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
account_snapshot = None
if provider:
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
try:
account_snapshot = _pool.submit(
fetch_account_usage, provider,
base_url=base_url, api_key=api_key,
).result(timeout=10.0)
except (concurrent.futures.TimeoutError, Exception):
account_snapshot = None
account_lines = [f" {line}" for line in render_account_usage_lines(account_snapshot)]
if account_lines:
print()
for line in account_lines:
print(line)
if self.verbose:
logging.getLogger().setLevel(logging.DEBUG)
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@@ -7200,6 +7032,7 @@ class HermesCLI:
known state. When a change is detected, triggers _reload_mcp() and
informs the user so they know the tool list has been refreshed.
"""
import time
import yaml as _yaml
CONFIG_WATCH_INTERVAL = 5.0 # seconds between config.yaml stat() calls
@@ -7291,6 +7124,7 @@ class HermesCLI:
# Refresh the agent's tool list so the model can call new tools
if self.agent is not None:
from model_tools import get_tool_definitions
self.agent.tools = get_tool_definitions(
enabled_toolsets=self.agent.enabled_toolsets
if hasattr(self.agent, "enabled_toolsets") else None,
@@ -7373,6 +7207,7 @@ class HermesCLI:
full history of tool calls (not just the current one in the spinner).
"""
if event_type == "tool.completed":
import time as _time
self._tool_start_time = 0.0
# Print stacked scrollback line for "all" / "new" modes
if function_name and self.tool_progress_mode in ("all", "new"):
@@ -7401,6 +7236,7 @@ class HermesCLI:
if event_type != "tool.started":
return
if function_name and not function_name.startswith("_"):
import time as _time
from agent.display import get_tool_emoji
emoji = get_tool_emoji(function_name)
label = preview or function_name
@@ -7409,7 +7245,7 @@ class HermesCLI:
if _pl > 0 and len(label) > _pl:
label = label[:_pl - 3] + "..."
self._spinner_text = f"{emoji} {label}"
self._tool_start_time = time.monotonic()
self._tool_start_time = _time.monotonic()
# Store args for stacked scrollback line on completion
self._pending_tool_info.setdefault(function_name, []).append(
function_args if function_args is not None else {}
@@ -7526,12 +7362,11 @@ class HermesCLI:
self._voice_stop_and_transcribe()
# Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict)
if self._voice_beeps_enabled():
try:
from tools.voice_mode import play_beep
play_beep(frequency=880, count=1)
except Exception:
pass
try:
from tools.voice_mode import play_beep
play_beep(frequency=880, count=1)
except Exception:
pass
try:
self._voice_recorder.start(on_silence_stop=_on_silence)
@@ -7579,12 +7414,11 @@ class HermesCLI:
wav_path = self._voice_recorder.stop()
# Audio cue: double beep after stream stopped (no CoreAudio conflict)
if self._voice_beeps_enabled():
try:
from tools.voice_mode import play_beep
play_beep(frequency=660, count=2)
except Exception:
pass
try:
from tools.voice_mode import play_beep
play_beep(frequency=660, count=2)
except Exception:
pass
if wav_path is None:
_cprint(f"{_DIM}No speech detected.{_RST}")
@@ -7667,6 +7501,7 @@ class HermesCLI:
try:
from tools.tts_tool import text_to_speech_tool
from tools.voice_mode import play_audio_file
import re
# Strip markdown and non-speech content for cleaner TTS
tts_text = text[:4000] if len(text) > 4000 else text
@@ -7734,17 +7569,6 @@ class HermesCLI:
_cprint(f"Unknown voice subcommand: {subcommand}")
_cprint("Usage: /voice [on|off|tts|status]")
def _voice_beeps_enabled(self) -> bool:
"""Return whether CLI voice mode should play record start/stop beeps."""
try:
from hermes_cli.config import load_config
voice_cfg = load_config().get("voice", {})
if isinstance(voice_cfg, dict):
return bool(voice_cfg.get("beep_enabled", True))
except Exception:
pass
return True
def _enable_voice_mode(self):
"""Enable voice mode after checking requirements."""
if self._voice_mode:
@@ -8054,9 +7878,7 @@ class HermesCLI:
return
selected = state.get("selected", 0)
choices = state.get("choices")
if not isinstance(choices, list):
choices = []
choices = state.get("choices") or []
if not (0 <= selected < len(choices)):
return
@@ -8148,18 +7970,8 @@ class HermesCLI:
choice_wrapped: list[tuple[int, str]] = []
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' ' # No number for items beyond 10th
if i == selected:
prefix = f' {num_prefix}. '
else:
prefix = f' {num_prefix}. '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Budget vertical space so HSplit never clips the command or choices.
@@ -8450,17 +8262,6 @@ class HermesCLI:
def run_agent():
nonlocal result
# Set callbacks inside the agent thread so thread-local storage
# in terminal_tool is populated for this thread. The main thread
# registration (run() line ~9046) is invisible here because
# _callback_tls is threading.local(). Matches the pattern used
# by acp_adapter/server.py for ACP sessions.
set_sudo_password_callback(self._sudo_password_callback)
set_approval_callback(self._approval_callback)
try:
set_secret_capture_callback(self._secret_capture_callback)
except Exception:
pass
agent_message = _voice_prefix + message if _voice_prefix else message
# Prepend pending model switch note so the model knows about the switch
_msn = getattr(self, '_pending_model_switch_note', None)
@@ -8486,15 +8287,6 @@ class HermesCLI:
"failed": True,
"error": _summary,
}
finally:
# Clear thread-local callbacks so a reused thread doesn't
# hold stale references to a disposed CLI instance.
try:
set_sudo_password_callback(None)
set_approval_callback(None)
set_secret_capture_callback(None)
except Exception:
pass
# Start agent in background thread (daemon so it cannot keep the
# process alive when the user closes the terminal tab — SIGHUP
@@ -8532,7 +8324,8 @@ class HermesCLI:
try:
_dbg = _hermes_home / "interrupt_debug.log"
with open(_dbg, "a") as _f:
_f.write(f"{time.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
import time as _t
_f.write(f"{_t.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
f"children={len(self.agent._active_children)}, "
f"parent._interrupt={self.agent._interrupt_requested}\n")
for _ci, _ch in enumerate(self.agent._active_children):
@@ -8608,8 +8401,9 @@ class HermesCLI:
# buffer so tool/status lines render ABOVE our response box.
# The flush pushes data into the renderer queue; the short
# sleep lets the renderer actually paint it before we draw.
import time as _time
sys.stdout.flush()
time.sleep(0.15)
_time.sleep(0.15)
# Update history with full conversation
self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history
@@ -9246,17 +9040,6 @@ class HermesCLI:
event.app.current_buffer.reset(append_to_history=True)
return
# Handle /steer while the agent is running immediately on the
# UI thread. Queuing through _pending_input would deadlock the
# steer until after the agent loop finishes (process_loop is
# blocked inside self.chat()), which turns /steer into a
# post-run next-turn message — defeating mid-run injection.
# agent.steer() is thread-safe (holds _pending_steer_lock).
if self._should_handle_steer_command_inline(text, has_images=has_images):
self.process_command(text)
event.app.current_buffer.reset(append_to_history=True)
return
# Snapshot and clear attached images
images = list(self._attached_images)
self._attached_images.clear()
@@ -9275,7 +9058,8 @@ class HermesCLI:
try:
_dbg = _hermes_home / "interrupt_debug.log"
with open(_dbg, "a") as _f:
_f.write(f"{time.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
import time as _t
_f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
f"agent_running={self._agent_running}\n")
except Exception:
pass
@@ -9354,29 +9138,6 @@ class HermesCLI:
self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
event.app.invalidate()
# Number keys for quick clarify selection (1-9, 0 for 10th item)
def _make_clarify_number_handler(idx):
def handler(event):
if self._clarify_state and not self._clarify_freetext:
choices = self._clarify_state.get("choices") or []
# Map index to choice (treating "Other" as the last option)
if idx < len(choices):
# Select a numbered choice
self._clarify_state["response_queue"].put(choices[idx])
self._clarify_state = None
self._clarify_freetext = False
event.app.invalidate()
elif idx == len(choices):
# Select "Other" option
self._clarify_freetext = True
event.app.invalidate()
return handler
for _num in range(10):
# 1-9 select items 0-8, 0 selects item 9 (10thitem)
_idx = 9 if _num == 0 else _num - 1
kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx))
# --- Dangerous command approval: arrow-key navigation ---
@kb.add('up', filter=Condition(lambda: bool(self._approval_state)))
@@ -9418,20 +9179,6 @@ class HermesCLI:
event.app.current_buffer.reset()
event.app.invalidate()
# Number keys for quick approval selection (1-9, 0 for 10th item)
def _make_approval_number_handler(idx):
def handler(event):
if self._approval_state and idx < len(self._approval_state["choices"]):
self._approval_state["selected"] = idx
self._handle_approval_selection()
event.app.invalidate()
return handler
for _num in range(10):
# 1-9 select items 0-8, 0 selects item 9 (10th item)
_idx = 9 if _num == 0 else _num - 1
kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx))
# --- History navigation: up/down browse history in normal input mode ---
# The TextArea is multiline, so by default up/down only move the cursor.
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@@ -9460,7 +9207,8 @@ class HermesCLI:
2. Interrupt the running agent (first press)
3. Force exit (second press within 2s, or when idle)
"""
now = time.time()
import time as _time
now = _time.time()
# Cancel active voice recording.
# Run cancel() in a background thread to prevent blocking the
@@ -9568,11 +9316,12 @@ class HermesCLI:
@kb.add('c-z')
def handle_ctrl_z(event):
"""Handle Ctrl+Z - suspend process to background (Unix only)."""
import sys
if sys.platform == 'win32':
_cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}")
event.app.invalidate()
return
import signal as _sig
import os, signal as _sig
from prompt_toolkit.application import run_in_terminal
from hermes_cli.skin_engine import get_active_skin
agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent")
@@ -9886,29 +9635,31 @@ class HermesCLI:
# extra instructions (sudo countdown, approval navigation, clarify).
# The agent-running interrupt hint is now an inline placeholder above.
def get_hint_text():
import time as _time
if cli_ref._sudo_state:
remaining = max(0, int(cli_ref._sudo_deadline - time.monotonic()))
remaining = max(0, int(cli_ref._sudo_deadline - _time.monotonic()))
return [
('class:hint', ' password hidden · Enter to skip'),
('class:clarify-countdown', f' ({remaining}s)'),
]
if cli_ref._secret_state:
remaining = max(0, int(cli_ref._secret_deadline - time.monotonic()))
remaining = max(0, int(cli_ref._secret_deadline - _time.monotonic()))
return [
('class:hint', ' secret hidden · Enter to skip'),
('class:clarify-countdown', f' ({remaining}s)'),
]
if cli_ref._approval_state:
remaining = max(0, int(cli_ref._approval_deadline - time.monotonic()))
remaining = max(0, int(cli_ref._approval_deadline - _time.monotonic()))
return [
('class:hint', ' ↑/↓ to select, Enter to confirm'),
('class:clarify-countdown', f' ({remaining}s)'),
]
if cli_ref._clarify_state:
remaining = max(0, int(cli_ref._clarify_deadline - time.monotonic()))
remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
countdown = f' ({remaining}s)' if cli_ref._clarify_deadline else ''
if cli_ref._clarify_freetext:
return [
@@ -10000,32 +9751,14 @@ class HermesCLI:
selected = state.get("selected", 0)
preview_lines = _wrap_panel_text(question, 60)
for i, choice in enumerate(choices):
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' '
if i == selected and not cli_ref._clarify_freetext:
prefix = f" {num_prefix}. "
else:
prefix = f" {num_prefix}. "
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
# "Other" option in preview
other_num = len(choices) + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_num_prefix = ' '
prefix = " " if i == selected and not cli_ref._clarify_freetext else " "
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
other_label = (
f" {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext
else f" {other_num_prefix}. Other (type your answer)" if selected == len(choices)
else f" {other_num_prefix}. Other (type your answer)"
" Other (type below)" if cli_ref._clarify_freetext
else " Other (type your answer)" if selected == len(choices)
else " Other (type your answer)"
)
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
box_width = _panel_box_width("Hermes needs your input", preview_lines)
inner_text_width = max(8, box_width - 2)
@@ -10033,35 +9766,18 @@ class HermesCLI:
choice_wrapped: list[tuple[int, str]] = []
if choices:
for i, choice in enumerate(choices):
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' '
if i == selected and not cli_ref._clarify_freetext:
prefix = f' {num_prefix}. '
else:
prefix = f' {num_prefix}. '
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
prefix = ' ' if i == selected and not cli_ref._clarify_freetext else ' '
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Trailing Other row(s)
other_idx = len(choices)
other_num = other_idx + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_num_prefix = ' '
if selected == other_idx and not cli_ref._clarify_freetext:
other_label_mand = f' {other_num_prefix}. Other (type your answer)'
other_label_mand = ' Other (type your answer)'
elif cli_ref._clarify_freetext:
other_label_mand = f' {other_num_prefix}. Other (type below)'
other_label_mand = ' Other (type below)'
else:
other_label_mand = f' {other_num_prefix}. Other (type your answer)'
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
other_label_mand = ' Other (type your answer)'
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
elif cli_ref._clarify_freetext:
# Freetext-only mode: the guidance line takes the place of choices.
other_wrapped = _wrap_panel_text(
@@ -10126,15 +9842,6 @@ class HermesCLI:
# "Other" option (trailing row(s), only shown when choices exist)
other_idx = len(choices)
# Calculate number prefix for "Other" option
other_num = other_idx + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_num_prefix = ' '
if selected == other_idx and not cli_ref._clarify_freetext:
other_style = 'class:clarify-selected'
elif cli_ref._clarify_freetext:
@@ -10242,8 +9949,7 @@ class HermesCLI:
if stage == "provider":
title = "⚙ Model Picker — Select Provider"
choices = []
_providers = state.get("providers")
for p in _providers if isinstance(_providers, list) else []:
for p in state.get("providers") or []:
count = p.get("total_models", len(p.get("models", [])))
label = f"{p['name']} ({count} model{'s' if count != 1 else ''})"
if p.get("is_current"):
@@ -10500,20 +10206,22 @@ class HermesCLI:
app._on_resize = _resize_clear_ghosts
def spinner_loop():
import time as _time
last_idle_refresh = 0.0
while not self._should_exit:
if not self._app:
time.sleep(0.1)
_time.sleep(0.1)
continue
if self._command_running:
self._invalidate(min_interval=0.1)
time.sleep(0.1)
_time.sleep(0.1)
else:
now = time.monotonic()
now = _time.monotonic()
if now - last_idle_refresh >= 1.0:
last_idle_refresh = now
self._invalidate(min_interval=1.0)
time.sleep(0.2)
_time.sleep(0.2)
spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
spinner_thread.start()
@@ -10582,7 +10290,8 @@ class HermesCLI:
continue
# Expand paste references back to full content
_paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
import re as _re
_paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else []
if paste_refs:
user_input = self._expand_paste_references(user_input)
@@ -10674,12 +10383,13 @@ class HermesCLI:
try:
if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
self.agent.interrupt(f"received signal {signum}")
import time as _t
try:
_grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
except (TypeError, ValueError):
_grace = 1.5
if _grace > 0:
time.sleep(_grace)
_t.sleep(_grace)
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
@@ -10712,7 +10422,8 @@ class HermesCLI:
# uv-managed Python, fd 0 can be invalid or unregisterable with the
# asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
try:
os.fstat(0)
import os as _os
_os.fstat(0)
except OSError:
print(
"Error: stdin (fd 0) is not available.\n"
@@ -10834,8 +10545,6 @@ def main(
w: bool = False,
checkpoints: bool = False,
pass_session_id: bool = False,
ignore_user_config: bool = False,
ignore_rules: bool = False,
):
"""
Hermes Agent CLI - Interactive AI Assistant
@@ -10945,7 +10654,6 @@ def main(
resume=resume,
checkpoints=checkpoints,
pass_session_id=pass_session_id,
ignore_rules=ignore_rules,
)
if parsed_skills:
@@ -11008,12 +10716,13 @@ def main(
_agent = getattr(cli, "agent", None)
if _agent is not None:
_agent.interrupt(f"received signal {signum}")
import time as _t
try:
_grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
except (TypeError, ValueError):
_grace = 1.5
if _grace > 0:
time.sleep(_grace)
_t.sleep(_grace)
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
+46 -61
View File
@@ -9,7 +9,6 @@ import copy
import json
import logging
import tempfile
import threading
import os
import re
import uuid
@@ -35,11 +34,6 @@ except ImportError:
HERMES_DIR = get_hermes_home().resolve()
CRON_DIR = HERMES_DIR / "cron"
JOBS_FILE = CRON_DIR / "jobs.json"
# In-process lock protecting load_jobs→modify→save_jobs cycles.
# Required when tick() runs jobs in parallel threads — without this,
# concurrent mark_job_run / advance_next_run calls can clobber each other.
_jobs_file_lock = threading.Lock()
OUTPUT_DIR = CRON_DIR / "output"
ONESHOT_GRACE_SECONDS = 120
@@ -384,7 +378,6 @@ def create_job(
provider: Optional[str] = None,
base_url: Optional[str] = None,
script: Optional[str] = None,
enabled_toolsets: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""
Create a new cron job.
@@ -404,9 +397,6 @@ def create_job(
script: Optional path to a Python script whose stdout is injected into the
prompt each run. The script runs before the agent turn, and its output
is prepended as context. Useful for data collection / change detection.
enabled_toolsets: Optional list of toolset names to restrict the agent to.
When set, only tools from these toolsets are loaded, reducing
token overhead. When omitted, all default tools are loaded.
Returns:
The created job dict
@@ -437,8 +427,6 @@ def create_job(
normalized_base_url = normalized_base_url or None
normalized_script = str(script).strip() if isinstance(script, str) else None
normalized_script = normalized_script or None
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
normalized_toolsets = normalized_toolsets or None
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
job = {
@@ -470,7 +458,6 @@ def create_job(
# Delivery configuration
"deliver": deliver,
"origin": origin, # Tracks where job was created for "origin" delivery
"enabled_toolsets": normalized_toolsets,
}
jobs = load_jobs()
@@ -607,44 +594,43 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
``delivery_error`` is tracked separately from the agent error a job
can succeed (agent produced output) but fail delivery (platform down).
"""
with _jobs_file_lock:
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
save_jobs(jobs)
return
save_jobs(jobs)
return
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
def advance_next_run(job_id: str) -> bool:
@@ -659,21 +645,20 @@ def advance_next_run(job_id: str) -> bool:
Returns True if next_run_at was advanced, False otherwise.
"""
with _jobs_file_lock:
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in ("cron", "interval"):
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
if new_next and new_next != job.get("next_run_at"):
job["next_run_at"] = new_next
save_jobs(jobs)
return True
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in ("cron", "interval"):
return False
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
if new_next and new_next != job.get("next_run_at"):
job["next_run_at"] = new_next
save_jobs(jobs)
return True
return False
return False
def get_due_jobs() -> List[Dict[str, Any]]:
+33 -108
View File
@@ -40,37 +40,6 @@ from hermes_time import now as _hermes_now
logger = logging.getLogger(__name__)
def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
"""Resolve the toolset list for a cron job.
Precedence:
1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
Keeps the agent's job-scoped toolset override intact — #6130.
2. Per-platform ``hermes tools`` config for the ``cron`` platform.
Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
so users can gate cron toolsets globally without recreating every job.
3. ``None`` on any lookup failure AIAgent loads the full default set
(legacy behavior before this change, preserved as the safety net).
_DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
``_get_platform_tools`` for unconfigured platforms, so fresh installs
get cron WITHOUT ``moa`` by default (issue reported by Norbert
surprise $4.63 run).
"""
per_job = job.get("enabled_toolsets")
if per_job:
return per_job
try:
from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load
return sorted(_get_platform_tools(cfg or {}, "cron"))
except Exception as exc:
logger.warning(
"Cron toolset resolution failed, falling back to full default toolset: %s",
exc,
)
return None
# Valid delivery platforms — used to validate user-supplied platform names
# in cron delivery targets, preventing env var enumeration via crafted names.
_KNOWN_DELIVERY_PLATFORMS = frozenset({
@@ -283,11 +252,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
future = asyncio.run_coroutine_threadsafe(coro, loop)
try:
result = future.result(timeout=30)
except TimeoutError:
future.cancel()
raise
result = future.result(timeout=30)
if result and not getattr(result, "success", True):
logger.warning(
"Job '%s': media send failed for %s: %s",
@@ -417,11 +382,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
try:
send_result = future.result(timeout=60)
except TimeoutError:
future.cancel()
raise
send_result = future.result(timeout=60)
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
@@ -461,6 +422,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
@@ -785,17 +747,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
# scheduler process — every job this process runs is a cron job.
os.environ["HERMES_CRON_SESSION"] = "1"
# Use ContextVars for per-job session/delivery state so parallel jobs
# don't clobber each other's targets (os.environ is process-global).
from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
_ctx_tokens = set_session_vars(
platform=origin["platform"] if origin else "",
chat_id=str(origin["chat_id"]) if origin else "",
chat_name=origin.get("chat_name", "") if origin else "",
)
try:
# Inject origin context so the agent's send_message tool knows the chat.
# Must be INSIDE the try block so the finally cleanup always runs.
if origin:
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
if origin.get("chat_name"):
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
from dotenv import load_dotenv
@@ -806,10 +765,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
delivery_target = _resolve_delivery_target(job)
if delivery_target:
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
if delivery_target.get("thread_id") is not None:
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
@@ -848,13 +807,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
prefill_messages = None
prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
if prefill_file:
import json as _json
pfpath = Path(prefill_file).expanduser()
if not pfpath.is_absolute():
pfpath = _hermes_home / pfpath
if pfpath.exists():
try:
with open(pfpath, "r", encoding="utf-8") as _pf:
prefill_messages = json.load(_pf)
prefill_messages = _json.load(_pf)
if not isinstance(prefill_messages, list):
prefill_messages = None
except Exception as e:
@@ -917,7 +877,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
providers_ignored=pr.get("ignore"),
providers_order=pr.get("order"),
provider_sort=pr.get("sort"),
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
disabled_toolsets=["cronjob", "messaging", "clarify"],
quiet_mode=True,
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
@@ -1004,12 +963,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
f"— last activity: {_last_desc}"
)
# Guard against non-dict returns from run_conversation under error conditions
if not isinstance(result, dict):
raise RuntimeError(
f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
)
final_response = result.get("final_response", "") or ""
# Strip leaked placeholder text that upstream may inject on empty completions.
if final_response.strip() == "(No response generated)":
@@ -1059,8 +1012,16 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
return False, output, "", error_msg
finally:
# Clean up ContextVar session/delivery state for this job.
clear_session_vars(_ctx_tokens)
# Clean up injected env vars so they don't leak to other jobs
for key in (
"HERMES_SESSION_PLATFORM",
"HERMES_SESSION_CHAT_ID",
"HERMES_SESSION_CHAT_NAME",
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
):
os.environ.pop(key, None)
if _session_db:
try:
_session_db.end_session(_cron_session_id, "cron_complete")
@@ -1113,41 +1074,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
if verbose:
logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
# Advance next_run_at for all recurring jobs FIRST, under the file lock,
# before any execution begins. This preserves at-most-once semantics.
executed = 0
for job in due_jobs:
advance_next_run(job["id"])
# Resolve max parallel workers: env var > config.yaml > unbounded.
# Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
_max_workers: Optional[int] = None
try:
_env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
if _env_par:
_max_workers = int(_env_par) or None
except (ValueError, TypeError):
logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
if _max_workers is None:
try:
_ucfg = load_config() or {}
_cfg_par = (
_ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
).get("max_parallel_jobs")
if _cfg_par is not None:
_max_workers = int(_cfg_par) or None
except Exception:
pass
# For recurring jobs (cron/interval), advance next_run_at to the
# next future occurrence BEFORE execution. This way, if the
# process crashes mid-run, the job won't re-fire on restart.
# One-shot jobs are left alone so they can retry on restart.
advance_next_run(job["id"])
if verbose:
logger.info(
"Running %d job(s) in parallel (max_workers=%s)",
len(due_jobs),
_max_workers if _max_workers else "unbounded",
)
def _process_job(job: dict) -> bool:
"""Run one due job end-to-end: execute, save, deliver, mark."""
try:
success, output, final_response, error = run_job(job)
output_file = save_job_output(job["id"], output)
@@ -1179,23 +1114,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
return True
executed += 1
except Exception as e:
logger.error("Error processing job %s: %s", job['id'], e)
mark_job_run(job["id"], False, str(e))
return False
# Run all due jobs concurrently, each in its own ContextVar copy
# so session/delivery state stays isolated per-thread.
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
_futures = []
for job in due_jobs:
_ctx = contextvars.copy_context()
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
_results = [f.result() for f in _futures]
return sum(_results)
return executed
finally:
if fcntl:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
-22
View File
@@ -58,13 +58,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
fi
# Ensure the main config file remains accessible to the hermes runtime user
# even if it was edited on the host after initial ownership setup.
if [ -f "$HERMES_HOME/config.yaml" ]; then
chown hermes:hermes "$HERMES_HOME/config.yaml"
chmod 640 "$HERMES_HOME/config.yaml"
fi
# SOUL.md
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -75,19 +68,4 @@ if [ -d "$INSTALL_DIR/skills" ]; then
python3 "$INSTALL_DIR/tools/skills_sync.py"
fi
# Final exec: two supported invocation patterns.
#
# docker run <image> -> exec `hermes` with no args (legacy default)
# docker run <image> chat -q "..." -> exec `hermes chat -q "..."` (legacy wrap)
# docker run <image> sleep infinity -> exec `sleep infinity` directly
# docker run <image> bash -> exec `bash` directly
#
# If the first positional arg resolves to an executable on PATH, we assume the
# caller wants to run it directly (needed by the launcher which runs long-lived
# `sleep infinity` sandbox containers — see tools/environments/docker.py).
# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
# preserving the documented `docker run <image> <subcommand>` behavior.
if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
exec "$@"
fi
exec hermes "$@"
+1
View File
@@ -53,6 +53,7 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
try:
loop = asyncio.get_running_loop()
# We're in an async context -- need to run in thread
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(
handle_function_call, tool_name, arguments, task_id
+3 -25
View File
@@ -576,14 +576,6 @@ def load_gateway_config() -> GatewayConfig:
bridged["free_response_channels"] = platform_cfg["free_response_channels"]
if "mention_patterns" in platform_cfg:
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
if "dm_policy" in platform_cfg:
bridged["dm_policy"] = platform_cfg["dm_policy"]
if "allow_from" in platform_cfg:
bridged["allow_from"] = platform_cfg["allow_from"]
if "group_policy" in platform_cfg:
bridged["group_policy"] = platform_cfg["group_policy"]
if "group_allow_from" in platform_cfg:
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
@@ -616,8 +608,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()
# Discord settings → env vars (env vars take precedence)
discord_cfg = yaml_cfg.get("discord", {})
@@ -672,7 +662,8 @@ def load_gateway_config() -> GatewayConfig:
if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
import json as _json
os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
frc = telegram_cfg.get("free_response_chats")
if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
if isinstance(frc, list):
@@ -709,20 +700,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
af = whatsapp_cfg.get("allow_from")
if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
if isinstance(af, list):
af = ",".join(str(v) for v in af)
os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
gaf = whatsapp_cfg.get("group_allow_from")
if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
if isinstance(gaf, list):
gaf = ",".join(str(v) for v in gaf)
os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)
# DingTalk settings → env vars (env vars take precedence)
dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -1260,6 +1237,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
if legacy_home:
qq_home = legacy_home
qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
import logging
logging.getLogger(__name__).warning(
"QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
"in your .env for consistency with the platform key."
+11 -44
View File
@@ -135,22 +135,9 @@ class HookRegistry:
except Exception as e:
print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)
def _resolve_handlers(self, event_type: str) -> List[Callable]:
"""Return all handlers that should fire for ``event_type``.
Exact matches fire first, followed by wildcard matches (e.g.
``command:*`` matches ``command:reset``).
"""
handlers = list(self._handlers.get(event_type, []))
if ":" in event_type:
base = event_type.split(":")[0]
wildcard_key = f"{base}:*"
handlers.extend(self._handlers.get(wildcard_key, []))
return handlers
async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
"""
Fire all handlers registered for an event, discarding return values.
Fire all handlers registered for an event.
Supports wildcard matching: handlers registered for "command:*" will
fire for any "command:..." event. Handlers registered for a base type
@@ -164,7 +151,16 @@ class HookRegistry:
if context is None:
context = {}
for fn in self._resolve_handlers(event_type):
# Collect handlers: exact match + wildcard match
handlers = list(self._handlers.get(event_type, []))
# Check for wildcard patterns (e.g., "command:*" matches "command:reset")
if ":" in event_type:
base = event_type.split(":")[0]
wildcard_key = f"{base}:*"
handlers.extend(self._handlers.get(wildcard_key, []))
for fn in handlers:
try:
result = fn(event_type, context)
# Support both sync and async handlers
@@ -172,32 +168,3 @@ class HookRegistry:
await result
except Exception as e:
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
async def emit_collect(
self,
event_type: str,
context: Optional[Dict[str, Any]] = None,
) -> List[Any]:
"""Fire handlers and return their non-None return values in order.
Like :meth:`emit` but captures each handler's return value. Used for
decision-style hooks (e.g. ``command:<name>`` policies that want to
allow/deny/rewrite the command before normal dispatch).
Exceptions from individual handlers are logged but do not abort the
remaining handlers.
"""
if context is None:
context = {}
results: List[Any] = []
for fn in self._resolve_handlers(event_type):
try:
result = fn(event_type, context)
if asyncio.iscoroutine(result):
result = await result
if result is not None:
results.append(result)
except Exception as e:
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
return results
+60 -237
View File
@@ -117,160 +117,6 @@ def _normalize_chat_content(
return ""
# Content part type aliases used by the OpenAI Chat Completions and Responses
# APIs. We accept both spellings on input and emit a single canonical internal
# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
# rest of the agent pipeline already understands.
_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
_FILE_PART_TYPES = frozenset({"file", "input_file"})
def _normalize_multimodal_content(content: Any) -> Any:
"""Validate and normalize multimodal content for the API server.
Returns a plain string when the content is text-only, or a list of
``{"type": "text"|"image_url", ...}`` parts when images are present.
The output shape is the native OpenAI Chat Completions vision format,
which the agent pipeline accepts verbatim (OpenAI-wire providers) or
converts (``_preprocess_anthropic_content`` for Anthropic).
Raises ``ValueError`` with an OpenAI-style code on invalid input:
* ``unsupported_content_type`` file/input_file/file_id parts, or
non-image ``data:`` URLs.
* ``invalid_image_url`` missing URL or unsupported scheme.
* ``invalid_content_part`` malformed text/image objects.
Callers translate the ValueError into a 400 response.
"""
# Scalar passthrough mirrors ``_normalize_chat_content``.
if content is None:
return ""
if isinstance(content, str):
return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
if not isinstance(content, list):
# Mirror the legacy text-normalizer's fallback so callers that
# pre-existed image support still get a string back.
return _normalize_chat_content(content)
items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
normalized_parts: List[Dict[str, Any]] = []
text_accum_len = 0
for part in items:
if isinstance(part, str):
if part:
trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
normalized_parts.append({"type": "text", "text": trimmed})
text_accum_len += len(trimmed)
continue
if not isinstance(part, dict):
# Ignore unknown scalars for forward compatibility with future
# Responses API additions (e.g. ``refusal``). The same policy
# the text normalizer applies.
continue
raw_type = part.get("type")
part_type = str(raw_type or "").strip().lower()
if part_type in _TEXT_PART_TYPES:
text = part.get("text")
if text is None:
continue
if not isinstance(text, str):
text = str(text)
if text:
trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
normalized_parts.append({"type": "text", "text": trimmed})
text_accum_len += len(trimmed)
continue
if part_type in _IMAGE_PART_TYPES:
detail = part.get("detail")
image_ref = part.get("image_url")
# OpenAI Responses sends ``input_image`` with a top-level
# ``image_url`` string; Chat Completions sends ``image_url`` as
# ``{"url": "...", "detail": "..."}``. Support both.
if isinstance(image_ref, dict):
url_value = image_ref.get("url")
detail = image_ref.get("detail", detail)
else:
url_value = image_ref
if not isinstance(url_value, str) or not url_value.strip():
raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
url_value = url_value.strip()
lowered = url_value.lower()
if lowered.startswith("data:"):
if not lowered.startswith("data:image/") or "," not in url_value:
raise ValueError(
"unsupported_content_type:Only image data URLs are supported. "
"Non-image data payloads are not supported."
)
elif not (lowered.startswith("http://") or lowered.startswith("https://")):
raise ValueError(
"invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
)
image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
if detail is not None:
if not isinstance(detail, str) or not detail.strip():
raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
image_part["image_url"]["detail"] = detail.strip()
normalized_parts.append(image_part)
continue
if part_type in _FILE_PART_TYPES:
raise ValueError(
"unsupported_content_type:Inline image inputs are supported, "
"but uploaded files and document inputs are not supported on this endpoint."
)
# Unknown part type — reject explicitly so clients get a clear error
# instead of a silently dropped turn.
raise ValueError(
f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
"Only text and image_url/input_image parts are supported."
)
if not normalized_parts:
return ""
# Text-only: collapse to a plain string so downstream logging/trajectory
# code sees the native shape and prompt caching on text-only turns is
# unaffected.
if all(p.get("type") == "text" for p in normalized_parts):
return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
return normalized_parts
def _content_has_visible_payload(content: Any) -> bool:
"""True when content has any text or image attachment. Used to reject empty turns."""
if isinstance(content, str):
return bool(content.strip())
if isinstance(content, list):
for part in content:
if isinstance(part, dict):
ptype = str(part.get("type") or "").strip().lower()
if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
return True
if ptype in _IMAGE_PART_TYPES:
return True
return False
def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
"""Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
raw = str(exc)
code, _, message = raw.partition(":")
if not message:
code, message = "invalid_content_part", raw
return web.json_response(
_openai_error(message, code=code, param=param),
status=400,
)
def check_api_server_requirements() -> bool:
"""Check if API server dependencies are available."""
return AIOHTTP_AVAILABLE
@@ -323,6 +169,7 @@ class ResponseStore:
).fetchone()
if row is None:
return None
import time
self._conn.execute(
"UPDATE responses SET accessed_at = ? WHERE response_id = ?",
(time.time(), response_id),
@@ -332,6 +179,7 @@ class ResponseStore:
def put(self, response_id: str, data: Dict[str, Any]) -> None:
"""Store a response, evicting the oldest if at capacity."""
import time
self._conn.execute(
"INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
(response_id, json.dumps(data, default=str), time.time()),
@@ -467,12 +315,12 @@ class _IdempotencyCache:
def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
from collections import OrderedDict
self._store = OrderedDict()
self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
self._ttl = ttl_seconds
self._max = max_items
def _purge(self):
now = time.time()
import time as _t
now = _t.time()
expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
for k in expired:
self._store.pop(k, None)
@@ -484,27 +332,11 @@ class _IdempotencyCache:
item = self._store.get(key)
if item and item["fp"] == fingerprint:
return item["resp"]
inflight_key = (key, fingerprint)
task = self._inflight.get(inflight_key)
if task is None:
async def _compute_and_store():
resp = await compute_coro()
import time as _t
self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
self._purge()
return resp
task = asyncio.create_task(_compute_and_store())
self._inflight[inflight_key] = task
def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
if self._inflight.get(inflight_key) is done_task:
self._inflight.pop(inflight_key, None)
task.add_done_callback(_clear_inflight)
return await asyncio.shield(task)
resp = await compute_coro()
import time as _t
self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
self._purge()
return resp
_idem_cache = _IdempotencyCache()
@@ -534,30 +366,6 @@ def _derive_chat_session_id(
return f"api-{digest}"
_CRON_AVAILABLE = False
try:
from cron.jobs import (
list_jobs as _cron_list,
get_job as _cron_get,
create_job as _cron_create,
update_job as _cron_update,
remove_job as _cron_remove,
pause_job as _cron_pause,
resume_job as _cron_resume,
trigger_job as _cron_trigger,
)
_CRON_AVAILABLE = True
except ImportError:
_cron_list = None
_cron_get = None
_cron_create = None
_cron_update = None
_cron_remove = None
_cron_pause = None
_cron_resume = None
_cron_trigger = None
class APIServerAdapter(BasePlatformAdapter):
"""
OpenAI-compatible HTTP API server adapter.
@@ -829,32 +637,26 @@ class APIServerAdapter(BasePlatformAdapter):
system_prompt = None
conversation_messages: List[Dict[str, str]] = []
for idx, msg in enumerate(messages):
for msg in messages:
role = msg.get("role", "")
raw_content = msg.get("content", "")
content = _normalize_chat_content(msg.get("content", ""))
if role == "system":
# System messages don't support images (Anthropic rejects, OpenAI
# text-model systems don't render them). Flatten to text.
content = _normalize_chat_content(raw_content)
# Accumulate system messages
if system_prompt is None:
system_prompt = content
else:
system_prompt = system_prompt + "\n" + content
elif role in ("user", "assistant"):
try:
content = _normalize_multimodal_content(raw_content)
except ValueError as exc:
return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
conversation_messages.append({"role": role, "content": content})
# Extract the last user message as the primary input
user_message: Any = ""
user_message = ""
history = []
if conversation_messages:
user_message = conversation_messages[-1].get("content", "")
history = conversation_messages[:-1]
if not _content_has_visible_payload(user_message):
if not user_message:
return web.json_response(
{"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
status=400,
@@ -1622,19 +1424,16 @@ class APIServerAdapter(BasePlatformAdapter):
# No error if conversation doesn't exist yet — it's a new conversation
# Normalize input to message list
input_messages: List[Dict[str, Any]] = []
input_messages: List[Dict[str, str]] = []
if isinstance(raw_input, str):
input_messages = [{"role": "user", "content": raw_input}]
elif isinstance(raw_input, list):
for idx, item in enumerate(raw_input):
for item in raw_input:
if isinstance(item, str):
input_messages.append({"role": "user", "content": item})
elif isinstance(item, dict):
role = item.get("role", "user")
try:
content = _normalize_multimodal_content(item.get("content", ""))
except ValueError as exc:
return _multimodal_validation_error(exc, param=f"input[{idx}].content")
content = _normalize_chat_content(item.get("content", ""))
input_messages.append({"role": role, "content": content})
else:
return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1643,7 +1442,7 @@ class APIServerAdapter(BasePlatformAdapter):
# This lets stateless clients supply their own history instead of
# relying on server-side response chaining via previous_response_id.
# Precedence: explicit conversation_history > previous_response_id.
conversation_history: List[Dict[str, Any]] = []
conversation_history: List[Dict[str, str]] = []
raw_history = body.get("conversation_history")
if raw_history:
if not isinstance(raw_history, list):
@@ -1657,11 +1456,7 @@ class APIServerAdapter(BasePlatformAdapter):
_openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
status=400,
)
try:
entry_content = _normalize_multimodal_content(entry["content"])
except ValueError as exc:
return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
conversation_history.append({"role": str(entry["role"]), "content": entry_content})
conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
if previous_response_id:
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
@@ -1681,8 +1476,8 @@ class APIServerAdapter(BasePlatformAdapter):
conversation_history.append(msg)
# Last input message is the user_message
user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
if not _content_has_visible_payload(user_message):
user_message = input_messages[-1].get("content", "") if input_messages else ""
if not user_message:
return web.json_response(_openai_error("No user message found in input"), status=400)
# Truncation support
@@ -1887,16 +1682,44 @@ class APIServerAdapter(BasePlatformAdapter):
# Cron jobs API
# ------------------------------------------------------------------
# Check cron module availability once (not per-request)
_CRON_AVAILABLE = False
try:
from cron.jobs import (
list_jobs as _cron_list,
get_job as _cron_get,
create_job as _cron_create,
update_job as _cron_update,
remove_job as _cron_remove,
pause_job as _cron_pause,
resume_job as _cron_resume,
trigger_job as _cron_trigger,
)
# Wrap as staticmethod to prevent descriptor binding — these are plain
# module functions, not instance methods. Without this, self._cron_*()
# injects ``self`` as the first positional argument and every call
# raises TypeError.
_cron_list = staticmethod(_cron_list)
_cron_get = staticmethod(_cron_get)
_cron_create = staticmethod(_cron_create)
_cron_update = staticmethod(_cron_update)
_cron_remove = staticmethod(_cron_remove)
_cron_pause = staticmethod(_cron_pause)
_cron_resume = staticmethod(_cron_resume)
_cron_trigger = staticmethod(_cron_trigger)
_CRON_AVAILABLE = True
except ImportError:
pass
_JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
# Allowed fields for update — prevents clients injecting arbitrary keys
_UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
_MAX_NAME_LENGTH = 200
_MAX_PROMPT_LENGTH = 5000
@staticmethod
def _check_jobs_available() -> Optional["web.Response"]:
def _check_jobs_available(self) -> Optional["web.Response"]:
"""Return error response if cron module isn't available."""
if not _CRON_AVAILABLE:
if not self._CRON_AVAILABLE:
return web.json_response(
{"error": "Cron module not available"}, status=501,
)
@@ -1921,7 +1744,7 @@ class APIServerAdapter(BasePlatformAdapter):
return cron_err
try:
include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
jobs = _cron_list(include_disabled=include_disabled)
jobs = self._cron_list(include_disabled=include_disabled)
return web.json_response({"jobs": jobs})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -1969,7 +1792,7 @@ class APIServerAdapter(BasePlatformAdapter):
if repeat is not None:
kwargs["repeat"] = repeat
job = _cron_create(**kwargs)
job = self._cron_create(**kwargs)
return web.json_response({"job": job})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -1986,7 +1809,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_get(job_id)
job = self._cron_get(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2019,7 +1842,7 @@ class APIServerAdapter(BasePlatformAdapter):
return web.json_response(
{"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
)
job = _cron_update(job_id, sanitized)
job = self._cron_update(job_id, sanitized)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2038,7 +1861,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
success = _cron_remove(job_id)
success = self._cron_remove(job_id)
if not success:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"ok": True})
@@ -2057,7 +1880,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_pause(job_id)
job = self._cron_pause(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2076,7 +1899,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_resume(job_id)
job = self._cron_resume(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2095,7 +1918,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_trigger(job_id)
job = self._cron_trigger(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
+38 -310
View File
@@ -19,8 +19,6 @@ import uuid
from abc import ABC, abstractmethod
from urllib.parse import urlsplit
from utils import normalize_proxy_url
logger = logging.getLogger(__name__)
@@ -161,13 +159,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
if platform_env_var:
value = (os.environ.get(platform_env_var) or "").strip()
if value:
return normalize_proxy_url(value)
return value
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = (os.environ.get(key) or "").strip()
if value:
return normalize_proxy_url(value)
return normalize_proxy_url(_detect_macos_system_proxy())
return value
return _detect_macos_system_proxy()
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -393,9 +391,12 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
import asyncio
import httpx
_log = logging.getLogger(__name__)
import logging as _logging
_log = _logging.getLogger(__name__)
last_exc = None
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
@@ -413,6 +414,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
response.raise_for_status()
return cache_image_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < retries:
@@ -428,6 +430,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
await asyncio.sleep(wait)
continue
raise
raise last_exc
def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -507,9 +510,12 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
import asyncio
import httpx
_log = logging.getLogger(__name__)
import logging as _logging
_log = _logging.getLogger(__name__)
last_exc = None
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
@@ -527,6 +533,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
response.raise_for_status()
return cache_audio_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < retries:
@@ -542,39 +549,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
await asyncio.sleep(wait)
continue
raise
# ---------------------------------------------------------------------------
# Video cache utilities
#
# Same pattern as image/audio cache -- videos from platforms are downloaded
# here so the agent can reference them by local file path.
# ---------------------------------------------------------------------------
VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
SUPPORTED_VIDEO_TYPES = {
".mp4": "video/mp4",
".mov": "video/quicktime",
".webm": "video/webm",
".mkv": "video/x-matroska",
".avi": "video/x-msvideo",
}
def get_video_cache_dir() -> Path:
"""Return the video cache directory, creating it if it doesn't exist."""
VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
return VIDEO_CACHE_DIR
def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
"""Save raw video bytes to the cache and return the absolute file path."""
cache_dir = get_video_cache_dir()
filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
filepath = cache_dir / filename
filepath.write_bytes(data)
return str(filepath)
raise last_exc
# ---------------------------------------------------------------------------
@@ -752,10 +727,7 @@ class MessageEvent:
if not self.is_command():
return self.text
parts = self.text.split(maxsplit=1)
args = parts[1] if len(parts) > 1 else ""
# iOS auto-corrects -- to — (em dash) and - to (en dash)
args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
return args
return parts[1] if len(parts) > 1 else ""
@dataclass
@@ -900,16 +872,10 @@ class BasePlatformAdapter(ABC):
self._fatal_error_retryable = True
self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
# Track active message handlers per session for interrupt support.
# _active_sessions stores the per-session interrupt Event; _session_tasks
# maps session → the specific Task currently processing it so that
# session-terminating commands (/stop, /new, /reset) can cancel the
# right task and release the adapter-level guard deterministically.
# Without the owner-task map, an old task's finally block could delete
# a newer task's guard, leaving stale busy state.
# Track active message handlers per session for interrupt support
# Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
self._active_sessions: Dict[str, asyncio.Event] = {}
self._pending_messages: Dict[str, MessageEvent] = {}
self._session_tasks: Dict[str, asyncio.Task] = {}
# Background message-processing tasks spawned by handle_message().
# Gateway shutdown cancels these so an old gateway instance doesn't keep
# working on a task after --replace or manual restarts.
@@ -1352,7 +1318,7 @@ class BasePlatformAdapter(ABC):
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
# and quoted/backticked paths for LLM-formatted outputs.
media_pattern = re.compile(
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
)
for match in media_pattern.finditer(content):
path = match.group("path").strip()
@@ -1686,222 +1652,6 @@ class BasePlatformAdapter(ABC):
return f"{existing_text}\n\n{new_text}".strip()
return existing_text
# ------------------------------------------------------------------
# Session task + guard ownership helpers
# ------------------------------------------------------------------
# These were introduced together with the _session_tasks owner map to
# make session lifecycle reconciliation deterministic across (a) the
# normal completion path, (b) /stop/ /new/ /reset bypass commands,
# and (c) stale-lock self-heal on the next inbound message.
def _release_session_guard(
self,
session_key: str,
*,
guard: Optional[asyncio.Event] = None,
) -> None:
"""Release the adapter-level guard for a session.
When ``guard`` is provided, only release the entry if it still points
at that exact Event. This lets reset-like commands swap in a temporary
guard while the old processing task unwinds, without having the old
task's cleanup accidentally clear the replacement guard.
"""
current_guard = self._active_sessions.get(session_key)
if current_guard is None:
return
if guard is not None and current_guard is not guard:
return
del self._active_sessions[session_key]
def _session_task_is_stale(self, session_key: str) -> bool:
"""Return True if the owner task for ``session_key`` is done/cancelled.
A lock is "stale" when the adapter still has ``_active_sessions[key]``
AND a known owner task in ``_session_tasks`` that has already exited.
When there is no owner task at all, that usually means the guard was
installed by some path other than handle_message() (tests sometimes
install guards directly) don't treat that as stale. The on-entry
self-heal only needs to handle the production split-brain case where
an owner task was recorded, then exited without clearing its guard.
"""
task = self._session_tasks.get(session_key)
if task is None:
return False
done = getattr(task, "done", None)
return bool(done and done())
def _heal_stale_session_lock(self, session_key: str) -> bool:
"""Clear a stale session lock if the owner task is already gone.
Returns True if a stale lock was healed. Returns False if there is
no lock, or the owner task is still alive (the normal busy case).
This is the on-entry safety net sidbin's issue #11016 analysis calls
for: without it, a split-brain adapter still thinks the session is
active, but nothing is actually processing traps the chat in
infinite "Interrupting current task..." until the gateway is
restarted.
"""
if session_key not in self._active_sessions:
return False
if not self._session_task_is_stale(session_key):
return False
logger.warning(
"[%s] Healing stale session lock for %s (owner task is done/absent)",
self.name,
session_key,
)
self._active_sessions.pop(session_key, None)
self._pending_messages.pop(session_key, None)
self._session_tasks.pop(session_key, None)
return True
def _start_session_processing(
self,
event: MessageEvent,
session_key: str,
*,
interrupt_event: Optional[asyncio.Event] = None,
) -> bool:
"""Spawn a background processing task under the given session guard.
Returns True on success. If the runtime stubs ``create_task`` with a
non-Task sentinel (some tests do this), the guard is rolled back and
False is returned so the caller isn't left holding a half-installed
session lock.
"""
guard = interrupt_event or asyncio.Event()
self._active_sessions[session_key] = guard
task = asyncio.create_task(self._process_message_background(event, session_key))
self._session_tasks[session_key] = task
try:
self._background_tasks.add(task)
except TypeError:
# Tests stub create_task() with lightweight sentinels that are not
# hashable and do not support lifecycle callbacks.
self._session_tasks.pop(session_key, None)
self._release_session_guard(session_key, guard=guard)
return False
if hasattr(task, "add_done_callback"):
task.add_done_callback(self._background_tasks.discard)
task.add_done_callback(self._expected_cancelled_tasks.discard)
return True
async def cancel_session_processing(
self,
session_key: str,
*,
release_guard: bool = True,
discard_pending: bool = True,
) -> None:
"""Cancel in-flight processing for a single session.
``release_guard=False`` keeps the adapter-level session guard in place
so reset-like commands can finish atomically before follow-up messages
are allowed to start a fresh background task.
"""
task = self._session_tasks.pop(session_key, None)
if task is not None and not task.done():
logger.debug(
"[%s] Cancelling active processing for session %s",
self.name,
session_key,
)
self._expected_cancelled_tasks.add(task)
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
except Exception:
logger.debug(
"[%s] Session cancellation raised while unwinding %s",
self.name,
session_key,
exc_info=True,
)
if discard_pending:
self._pending_messages.pop(session_key, None)
if release_guard:
self._release_session_guard(session_key)
async def _drain_pending_after_session_command(
self,
session_key: str,
command_guard: asyncio.Event,
) -> None:
"""Resume the latest queued follow-up once a session command completes.
Called at the tail of /stop, /new, and /reset dispatch. Releases the
command-scoped guard, then if a follow-up message landed while the
command was running spawns a fresh processing task for it.
"""
pending_event = self._pending_messages.pop(session_key, None)
self._release_session_guard(session_key, guard=command_guard)
if pending_event is None:
return
self._start_session_processing(pending_event, session_key)
async def _dispatch_active_session_command(
self,
event: MessageEvent,
session_key: str,
cmd: str,
) -> None:
"""Dispatch a reset-like bypass command while preserving guard ordering.
/stop, /new, and /reset must:
1. Keep the session guard installed while the runner processes the
command (so a racing follow-up message stays queued, not
dispatched as a second parallel run).
2. Cancel the old in-flight adapter task only AFTER the runner has
finished handling the command (so the runner sees consistent
state and its response is sent in order).
3. Release the command-scoped guard and drain the latest queued
follow-up exactly once, after 1 and 2 complete.
"""
logger.debug(
"[%s] Command '/%s' bypassing active-session guard for %s",
self.name,
cmd,
session_key,
)
current_guard = self._active_sessions.get(session_key)
command_guard = asyncio.Event()
self._active_sessions[session_key] = command_guard
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
try:
response = await self._message_handler(event)
# Old adapter task (if any) is cancelled AFTER the runner has
# fully handled the command — keeps ordering deterministic.
await self.cancel_session_processing(
session_key,
release_guard=False,
discard_pending=False,
)
if response:
await self._send_with_retry(
chat_id=event.source.chat_id,
content=response,
reply_to=event.message_id,
metadata=thread_meta,
)
except Exception:
# On failure, restore the original guard if one still exists so
# we don't leave the session in a half-reset state.
if self._active_sessions.get(session_key) is command_guard:
if session_key in self._session_tasks and current_guard is not None:
self._active_sessions[session_key] = current_guard
else:
self._release_session_guard(session_key, guard=command_guard)
raise
await self._drain_pending_after_session_command(session_key, command_guard)
async def handle_message(self, event: MessageEvent) -> None:
"""
Process an incoming message.
@@ -1918,15 +1668,7 @@ class BasePlatformAdapter(ABC):
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
)
# On-entry self-heal: if the adapter still has an _active_sessions
# entry for this key but the owner task has already exited (done or
# cancelled), the lock is stale. Clear it and fall through to
# normal dispatch so the user isn't trapped behind a dead guard —
# this is the split-brain tail described in issue #11016.
if session_key in self._active_sessions:
self._heal_stale_session_lock(session_key)
# Check if there's already an active handler for this session
if session_key in self._active_sessions:
# Certain commands must bypass the active-session guard and be
@@ -1943,23 +1685,6 @@ class BasePlatformAdapter(ABC):
from hermes_cli.commands import should_bypass_active_session
if should_bypass_active_session(cmd):
# /stop, /new, /reset must cancel the in-flight adapter task
# and preserve ordering of queued follow-ups. Route those
# through the dedicated handoff path that serializes
# cancellation + runner response + pending drain.
if cmd in ("stop", "new", "reset"):
try:
await self._dispatch_active_session_command(event, session_key, cmd)
except Exception as e:
logger.error(
"[%s] Command '/%s' dispatch failed: %s",
self.name, cmd, e, exc_info=True,
)
return
# Other bypass commands (/approve, /deny, /status,
# /background, /restart) just need direct dispatch — they
# don't cancel the running task.
logger.debug(
"[%s] Command '/%s' bypassing active-session guard for %s",
self.name, cmd, session_key,
@@ -2005,9 +1730,19 @@ class BasePlatformAdapter(ABC):
# starts would also pass the _active_sessions check and spawn a
# duplicate task. (grammY sequentialize / aiogram EventIsolation
# pattern — set the guard synchronously, not inside the task.)
# _start_session_processing installs the guard AND the owner-task
# mapping atomically so stale-lock detection works.
self._start_session_processing(event, session_key)
self._active_sessions[session_key] = asyncio.Event()
# Spawn background task to process this message
task = asyncio.create_task(self._process_message_background(event, session_key))
try:
self._background_tasks.add(task)
except TypeError:
# Some tests stub create_task() with lightweight sentinels that are not
# hashable and do not support lifecycle callbacks.
return
if hasattr(task, "add_done_callback"):
task.add_done_callback(self._background_tasks.discard)
task.add_done_callback(self._expected_cancelled_tasks.discard)
@staticmethod
def _get_human_delay() -> float:
@@ -2019,6 +1754,8 @@ class BasePlatformAdapter(ABC):
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
"""
import random
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
if mode == "off":
return 0.0
@@ -2367,9 +2104,6 @@ class BasePlatformAdapter(ABC):
drain_task = asyncio.create_task(
self._process_message_background(late_pending, session_key)
)
# Hand ownership of the session to the drain task so stale-lock
# detection keeps working while it runs.
self._session_tasks[session_key] = drain_task
try:
self._background_tasks.add(drain_task)
drain_task.add_done_callback(self._background_tasks.discard)
@@ -2379,14 +2113,9 @@ class BasePlatformAdapter(ABC):
# Leave _active_sessions[session_key] populated — the drain
# task's own lifecycle will clean it up.
else:
# Clean up session tracking. Guard-match both deletes so a
# reset-like command that already swapped in its own
# command_guard (and cancelled us) can't be accidentally
# cleared by our unwind. The command owns the session now.
current_task = asyncio.current_task()
if current_task is not None and self._session_tasks.get(session_key) is current_task:
del self._session_tasks[session_key]
self._release_session_guard(session_key, guard=interrupt_event)
# Clean up session tracking
if session_key in self._active_sessions:
del self._active_sessions[session_key]
async def cancel_background_tasks(self) -> None:
"""Cancel any in-flight background message-processing tasks.
@@ -2416,7 +2145,6 @@ class BasePlatformAdapter(ABC):
# will be in self._background_tasks now. Re-check.
self._background_tasks.clear()
self._expected_cancelled_tasks.clear()
self._session_tasks.clear()
self._pending_messages.clear()
self._active_sessions.clear()
+1 -1
View File
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
def check_bluebubbles_requirements() -> bool:
try:
import aiohttp # noqa: F401
import httpx # noqa: F401
import httpx as _httpx # noqa: F401
except ImportError:
return False
return True
+47 -285
View File
@@ -23,7 +23,6 @@ from typing import Callable, Dict, Optional, Any
logger = logging.getLogger(__name__)
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
try:
import discord
@@ -528,7 +527,6 @@ class DiscordAdapter(BasePlatformAdapter):
# Reply threading mode: "off" (no replies), "first" (reply on first
# chunk only, default), "all" (reply-reference on every chunk).
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
self._slash_commands: bool = self.config.extra.get("slash_commands", True)
async def connect(self) -> bool:
"""Connect to Discord and start receiving events."""
@@ -543,6 +541,7 @@ class DiscordAdapter(BasePlatformAdapter):
# ctypes.util.find_library fails on macOS with Homebrew-installed libs,
# so fall back to known Homebrew paths if needed.
if not opus_path:
import sys
_homebrew_paths = (
"/opt/homebrew/lib/libopus.dylib", # Apple Silicon
"/usr/local/lib/libopus.dylib", # Intel Mac
@@ -746,8 +745,7 @@ class DiscordAdapter(BasePlatformAdapter):
)
# Register slash commands
if self._slash_commands:
self._register_slash_commands()
self._register_slash_commands()
# Start the bot in background
self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -803,27 +801,8 @@ class DiscordAdapter(BasePlatformAdapter):
if not self._client:
return
try:
sync_policy = self._get_discord_command_sync_policy()
if sync_policy == "off":
logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
return
if sync_policy == "bulk":
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
return
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
logger.info(
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
self.name,
summary["total"],
summary["unchanged"],
summary["updated"],
summary["recreated"],
summary["created"],
summary["deleted"],
)
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
except asyncio.TimeoutError:
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
except asyncio.CancelledError:
@@ -831,183 +810,6 @@ class DiscordAdapter(BasePlatformAdapter):
except Exception as e: # pragma: no cover - defensive logging
logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)
def _get_discord_command_sync_policy(self) -> str:
raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
if raw in _DISCORD_COMMAND_SYNC_POLICIES:
return raw
if raw:
logger.warning(
"[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
self.name,
raw,
)
return "safe"
def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Reduce command payloads to the semantic fields Hermes manages."""
contexts = payload.get("contexts")
integration_types = payload.get("integration_types")
return {
"type": int(payload.get("type", 1) or 1),
"name": str(payload.get("name", "") or ""),
"description": str(payload.get("description", "") or ""),
"default_member_permissions": self._normalize_permissions(
payload.get("default_member_permissions")
),
"dm_permission": bool(payload.get("dm_permission", True)),
"nsfw": bool(payload.get("nsfw", False)),
"contexts": sorted(int(c) for c in contexts) if contexts else None,
"integration_types": (
sorted(int(i) for i in integration_types) if integration_types else None
),
"options": [
self._canonicalize_app_command_option(item)
for item in payload.get("options", []) or []
if isinstance(item, dict)
],
}
@staticmethod
def _normalize_permissions(value: Any) -> Optional[str]:
"""Discord emits default_member_permissions as str server-side but discord.py
sets it as int locally. Normalize to str-or-None so the comparison is stable."""
if value is None:
return None
return str(value)
def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
"""Build a canonical-ready dict from an AppCommand.
discord.py's AppCommand.to_dict() does NOT include nsfw,
dm_permission, or default_member_permissions (they live only on the
attributes). Pull them from the attributes so the canonicalizer sees
the real server-side values instead of defaults otherwise any
command using non-default permissions would diff on every startup.
"""
payload = dict(command.to_dict())
nsfw = getattr(command, "nsfw", None)
if nsfw is not None:
payload["nsfw"] = bool(nsfw)
guild_only = getattr(command, "guild_only", None)
if guild_only is not None:
payload["dm_permission"] = not bool(guild_only)
default_permissions = getattr(command, "default_member_permissions", None)
if default_permissions is not None:
payload["default_member_permissions"] = getattr(
default_permissions, "value", default_permissions
)
return payload
def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
return {
"type": int(payload.get("type", 0) or 0),
"name": str(payload.get("name", "") or ""),
"description": str(payload.get("description", "") or ""),
"required": bool(payload.get("required", False)),
"autocomplete": bool(payload.get("autocomplete", False)),
"choices": [
{
"name": str(choice.get("name", "") or ""),
"value": choice.get("value"),
}
for choice in payload.get("choices", []) or []
if isinstance(choice, dict)
],
"channel_types": list(payload.get("channel_types", []) or []),
"min_value": payload.get("min_value"),
"max_value": payload.get("max_value"),
"min_length": payload.get("min_length"),
"max_length": payload.get("max_length"),
"options": [
self._canonicalize_app_command_option(item)
for item in payload.get("options", []) or []
if isinstance(item, dict)
],
}
def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Fields supported by discord.py's edit_global_command route."""
canonical = self._canonicalize_app_command_payload(payload)
return {
"name": canonical["name"],
"description": canonical["description"],
"options": canonical["options"],
}
async def _safe_sync_slash_commands(self) -> Dict[str, int]:
"""Diff existing global commands and only mutate the commands that changed."""
if not self._client:
return {
"total": 0,
"unchanged": 0,
"updated": 0,
"recreated": 0,
"created": 0,
"deleted": 0,
}
tree = self._client.tree
app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
if not app_id:
raise RuntimeError("Discord application ID is unavailable for slash command sync")
desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
desired_by_key = {
(int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
for payload in desired_payloads
}
existing_commands = await tree.fetch_commands()
existing_by_key = {
(
int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
str(command.name or "").lower(),
): command
for command in existing_commands
}
unchanged = 0
updated = 0
recreated = 0
created = 0
deleted = 0
http = self._client.http
for key, desired in desired_by_key.items():
current = existing_by_key.pop(key, None)
if current is None:
await http.upsert_global_command(app_id, desired)
created += 1
continue
current_existing_payload = self._existing_command_to_payload(current)
current_payload = self._canonicalize_app_command_payload(current_existing_payload)
desired_payload = self._canonicalize_app_command_payload(desired)
if current_payload == desired_payload:
unchanged += 1
continue
if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
await http.delete_global_command(app_id, current.id)
await http.upsert_global_command(app_id, desired)
recreated += 1
continue
await http.edit_global_command(app_id, current.id, desired)
updated += 1
for current in existing_by_key.values():
await http.delete_global_command(app_id, current.id)
deleted += 1
return {
"total": len(desired_payloads),
"unchanged": unchanged,
"updated": updated,
"recreated": recreated,
"created": created,
"deleted": deleted,
}
async def _add_reaction(self, message: Any, emoji: str) -> bool:
"""Add an emoji reaction to a Discord message."""
if not message or not hasattr(message, "add_reaction"):
@@ -1620,7 +1422,8 @@ class DiscordAdapter(BasePlatformAdapter):
speaking_user_ids: set = set()
receiver = self._voice_receivers.get(guild_id)
if receiver:
now = time.monotonic()
import time as _time
now = _time.monotonic()
with receiver._lock:
for ssrc, last_t in receiver._last_packet_time.items():
# Consider "speaking" if audio received within last 2 seconds
@@ -2328,42 +2131,10 @@ class DiscordAdapter(BasePlatformAdapter):
# This ensures new commands added to COMMAND_REGISTRY in
# hermes_cli/commands.py automatically appear as Discord slash
# commands without needing a manual entry here.
def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
"""Build a discord.app_commands.Command that proxies to _run_simple_slash."""
discord_name = _name.lower()[:32]
desc = (_description or f"Run /{_name}")[:100]
has_args = bool(_args_hint)
if has_args:
def _make_args_handler(__name: str, __hint: str):
@discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
async def _handler(interaction: discord.Interaction, args: str = ""):
await self._run_simple_slash(
interaction, f"/{__name} {args}".strip()
)
_handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
return _handler
handler = _make_args_handler(_name, _args_hint)
else:
def _make_simple_handler(__name: str):
async def _handler(interaction: discord.Interaction):
await self._run_simple_slash(interaction, f"/{__name}")
_handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
return _handler
handler = _make_simple_handler(_name)
return discord.app_commands.Command(
name=discord_name,
description=desc,
callback=handler,
)
already_registered: set[str] = set()
try:
from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates
already_registered = set()
try:
already_registered = {cmd.name for cmd in tree.get_commands()}
except Exception:
@@ -2378,10 +2149,38 @@ class DiscordAdapter(BasePlatformAdapter):
discord_name = cmd_def.name.lower()[:32]
if discord_name in already_registered:
continue
auto_cmd = _build_auto_slash_command(
cmd_def.name,
cmd_def.description,
cmd_def.args_hint,
# Skip aliases that overlap with already-registered names
# (aliases for explicitly registered commands are handled above).
desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
has_args = bool(cmd_def.args_hint)
if has_args:
# Command takes optional arguments — create handler with
# an optional ``args`` string parameter.
def _make_args_handler(_name: str, _hint: str):
@discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
async def _handler(interaction: discord.Interaction, args: str = ""):
await self._run_simple_slash(
interaction, f"/{_name} {args}".strip()
)
_handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
return _handler
handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
else:
# Parameterless command.
def _make_simple_handler(_name: str):
async def _handler(interaction: discord.Interaction):
await self._run_simple_slash(interaction, f"/{_name}")
_handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
return _handler
handler = _make_simple_handler(cmd_def.name)
auto_cmd = discord.app_commands.Command(
name=discord_name,
description=desc,
callback=handler,
)
try:
tree.add_command(auto_cmd)
@@ -2398,35 +2197,6 @@ class DiscordAdapter(BasePlatformAdapter):
except Exception as e:
logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)
# ── Plugin-registered slash commands ──
# Plugins register via PluginContext.register_command(); we mirror
# those into Discord's native slash picker so users get the same
# autocomplete UX as for built-in commands. No per-platform plugin
# API needed — plugin commands are platform-agnostic.
try:
from hermes_cli.commands import _iter_plugin_command_entries
for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
discord_name = plugin_name.lower()[:32]
if discord_name in already_registered:
continue
auto_cmd = _build_auto_slash_command(
plugin_name,
plugin_desc,
plugin_args_hint,
)
try:
tree.add_command(auto_cmd)
already_registered.add(discord_name)
except Exception:
# Silently skip commands that fail registration (e.g.
# name conflict with a subcommand group).
pass
except Exception as e:
logger.warning(
"Discord auto-register from plugin commands failed: %s", e
)
# Register skills under a single /skill command group with category
# subcommand groups. This uses 1 top-level slot instead of N,
# supporting up to 25 categories × 25 skills = 625 skills.
@@ -3192,17 +2962,6 @@ class DiscordAdapter(BasePlatformAdapter):
parent_channel_id = self._get_parent_channel_id(message.channel)
is_voice_linked_channel = False
# Save mention-stripped text before auto-threading since create_thread()
# can clobber message.content, breaking /command detection in channels.
raw_content = message.content.strip()
normalized_content = raw_content
mention_prefix = False
if self._client.user and self._client.user in message.mentions:
mention_prefix = True
normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
message.content = normalized_content
if not isinstance(message.channel, discord.DMChannel):
channel_ids = {str(message.channel.id)}
if parent_channel_id:
@@ -3240,8 +2999,13 @@ class DiscordAdapter(BasePlatformAdapter):
in_bot_thread = is_thread and thread_id in self._threads
if require_mention and not is_free_channel and not in_bot_thread:
if self._client.user not in message.mentions and not mention_prefix:
if self._client.user not in message.mentions:
return
if self._client.user and self._client.user in message.mentions:
message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
# Auto-thread: when enabled, automatically create a thread for every
# @mention in a text channel so each conversation is isolated (like Slack).
# Messages already inside threads or DMs are unaffected.
@@ -3263,7 +3027,7 @@ class DiscordAdapter(BasePlatformAdapter):
# Determine message type
msg_type = MessageType.TEXT
if normalized_content.startswith("/"):
if message.content.startswith("/"):
msg_type = MessageType.COMMAND
elif message.attachments:
# Check attachment types
@@ -3403,9 +3167,7 @@ class DiscordAdapter(BasePlatformAdapter):
att.filename, e, exc_info=True,
)
# Use normalized_content (saved before auto-threading) instead of message.content,
# to detect /slash commands in channel messages.
event_text = normalized_content
event_text = message.content
if pending_text_injection:
event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection
-1
View File
@@ -545,7 +545,6 @@ class EmailAdapter(BasePlatformAdapter):
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send a file as an email attachment."""
try:
+80 -349
View File
@@ -14,35 +14,6 @@ Supports:
- Interactive card button-click events routed as synthetic COMMAND events
- Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
- Verification token validation as second auth layer (matches openclaw)
Feishu identity model
---------------------
Feishu uses three user-ID tiers (official docs:
https://open.feishu.cn/document/home/user-identity-introduction/introduction):
open_id (ou_xxx) **App-scoped**. The same person gets a different
open_id under each Feishu app. Always available in
event payloads without extra permissions.
user_id (u_xxx) **Tenant-scoped**. Stable within a company but
requires the ``contact:user.employee_id:readonly``
scope. May not be present.
union_id (on_xxx) **Developer-scoped**. Same across all apps owned by
one developer/ISV. Best cross-app stable ID.
For bots specifically:
app_id The application's canonical credential identifier.
bot open_id Returned by ``/bot/v3/info``. This is the bot's own
open_id *within its app context* and is what Feishu
puts in ``mentions[].id.open_id`` when someone
@-mentions the bot. Used for mention gating only.
In single-bot mode (what Hermes currently supports), open_id works as a
de-facto unique user identifier since there is only one app context.
Session-key participant isolation prefers ``union_id`` (via user_id_alt)
over ``open_id`` (via user_id) so that sessions stay stable if the same
user is seen through different apps in the future.
"""
from __future__ import annotations
@@ -64,7 +35,7 @@ from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Sequence
from typing import Any, Dict, List, Optional
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import Request, urlopen
@@ -102,9 +73,7 @@ try:
UpdateMessageRequest,
UpdateMessageRequestBody,
)
from lark_oapi.core import AccessTokenType, HttpMethod
from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
from lark_oapi.core.model import BaseRequest
from lark_oapi.event.callback.model.p2_card_action_trigger import (
CallBackCard,
P2CardActionTriggerResponse,
@@ -265,8 +234,6 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
_PREFERRED_LOCALES = ("zh_cn", "en_us")
_MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
_MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、,。;:!?()[]{}<>\"'`")
_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。!?")
_WHITESPACE_RE = re.compile(r"\s+")
_SUPPORTED_CARD_TEXT_KEYS = (
"title",
@@ -310,36 +277,12 @@ class FeishuPostMediaRef:
resource_type: str = "file"
@dataclass(frozen=True)
class FeishuMentionRef:
name: str = ""
open_id: str = ""
is_all: bool = False
is_self: bool = False
@dataclass(frozen=True)
class _FeishuBotIdentity:
open_id: str = ""
user_id: str = ""
name: str = ""
def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
# Precedence: open_id > user_id > name. IDs are authoritative when both
# sides have them; the next tier is only considered when either side
# lacks the current one.
if open_id and self.open_id:
return open_id == self.open_id
if user_id and self.user_id:
return user_id == self.user_id
return bool(self.name) and name == self.name
@dataclass(frozen=True)
class FeishuPostParseResult:
text_content: str
image_keys: List[str] = field(default_factory=list)
media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
mentioned_ids: List[str] = field(default_factory=list)
@dataclass(frozen=True)
@@ -349,14 +292,14 @@ class FeishuNormalizedMessage:
preferred_message_type: str = "text"
image_keys: List[str] = field(default_factory=list)
media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
mentions: List[FeishuMentionRef] = field(default_factory=list)
mentioned_ids: List[str] = field(default_factory=list)
relation_kind: str = "plain"
metadata: Dict[str, Any] = field(default_factory=dict)
@dataclass(frozen=True)
class FeishuAdapterSettings:
app_id: str # Canonical bot/app identifier (credential, not from event payloads)
app_id: str
app_secret: str
domain_name: str
connection_mode: str
@@ -364,11 +307,7 @@ class FeishuAdapterSettings:
verification_token: str
group_policy: str
allowed_group_users: frozenset[str]
# Bot's own open_id (app-scoped) — returned by /bot/v3/info. Used only for
# @mention matching: Feishu puts this value in mentions[].id.open_id when
# a user @-mentions the bot in a group chat.
bot_open_id: str
# Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
bot_user_id: str
bot_name: str
dedup_cache_size: int
@@ -566,17 +505,14 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
return rows or [[{"tag": "md", "text": content}]]
def parse_feishu_post_payload(
payload: Any,
*,
mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
) -> FeishuPostParseResult:
def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
resolved = _resolve_post_payload(payload)
if not resolved:
return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)
image_keys: List[str] = []
media_refs: List[FeishuPostMediaRef] = []
mentioned_ids: List[str] = []
parts: List[str] = []
title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -587,10 +523,7 @@ def parse_feishu_post_payload(
if not isinstance(row, list):
continue
row_text = _normalize_feishu_text(
"".join(
_render_post_element(item, image_keys, media_refs, mentions_map)
for item in row
)
"".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
)
if row_text:
parts.append(row_text)
@@ -599,6 +532,7 @@ def parse_feishu_post_payload(
text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
image_keys=image_keys,
media_refs=media_refs,
mentioned_ids=mentioned_ids,
)
@@ -650,7 +584,7 @@ def _render_post_element(
element: Any,
image_keys: List[str],
media_refs: List[FeishuPostMediaRef],
mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
mentioned_ids: List[str],
) -> str:
if isinstance(element, str):
return element
@@ -668,21 +602,19 @@ def _render_post_element(
escaped_label = _escape_markdown_text(label)
return f"[{escaped_label}]({href})" if href else escaped_label
if tag == "at":
# Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
# the real ref in mentions_map for the display name.
placeholder = str(element.get("user_id", "")).strip()
if placeholder == "@_all":
# Feishu SDK sometimes omits @_all from the top-level mentions
# payload; record it here so the caller's mention list stays complete.
if mentions_map is not None and "@_all" not in mentions_map:
mentions_map["@_all"] = FeishuMentionRef(is_all=True)
return "@all"
ref = (mentions_map or {}).get(placeholder)
if ref is not None:
display_name = ref.name or ref.open_id or "user"
else:
display_name = str(element.get("user_name", "")).strip() or "user"
return f"@{_escape_markdown_text(display_name)}"
mentioned_id = (
str(element.get("open_id", "")).strip()
or str(element.get("user_id", "")).strip()
)
if mentioned_id and mentioned_id not in mentioned_ids:
mentioned_ids.append(mentioned_id)
display_name = (
str(element.get("user_name", "")).strip()
or str(element.get("name", "")).strip()
or str(element.get("text", "")).strip()
or mentioned_id
)
return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
if tag in {"img", "image"}:
image_key = str(element.get("image_key", "")).strip()
if image_key and image_key not in image_keys:
@@ -720,7 +652,8 @@ def _render_post_element(
nested_parts: List[str] = []
for key in ("text", "title", "content", "children", "elements"):
extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
value = element.get(key)
extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
if extracted:
nested_parts.append(extracted)
return " ".join(part for part in nested_parts if part)
@@ -730,7 +663,7 @@ def _render_nested_post(
value: Any,
image_keys: List[str],
media_refs: List[FeishuPostMediaRef],
mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
mentioned_ids: List[str],
) -> str:
if isinstance(value, str):
return _escape_markdown_text(value)
@@ -738,17 +671,17 @@ def _render_nested_post(
return " ".join(
part
for item in value
for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
if part
)
if isinstance(value, dict):
direct = _render_post_element(value, image_keys, media_refs, mentions_map)
direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
if direct:
return direct
return " ".join(
part
for item in value.values()
for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
if part
)
return ""
@@ -759,48 +692,31 @@ def _render_nested_post(
# ---------------------------------------------------------------------------
def normalize_feishu_message(
*,
message_type: str,
raw_content: str,
mentions: Optional[Sequence[Any]] = None,
bot: _FeishuBotIdentity = _FeishuBotIdentity(),
) -> FeishuNormalizedMessage:
def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
normalized_type = str(message_type or "").strip().lower()
payload = _load_feishu_payload(raw_content)
mentions_map = _build_mentions_map(mentions, bot)
if normalized_type == "text":
text = str(payload.get("text", "") or "")
# Feishu SDK sometimes omits @_all from the mentions payload even when
# the text literal contains it (confirmed via im.v1.message.get).
if "@_all" in text and "@_all" not in mentions_map:
mentions_map["@_all"] = FeishuMentionRef(is_all=True)
return FeishuNormalizedMessage(
raw_type=normalized_type,
text_content=_normalize_feishu_text(text, mentions_map),
mentions=list(mentions_map.values()),
text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
)
if normalized_type == "post":
# The walker writes back to mentions_map if it encounters
# <at user_id="@_all">, so reading .values() after parsing is enough.
parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
parsed_post = parse_feishu_post_payload(payload)
return FeishuNormalizedMessage(
raw_type=normalized_type,
text_content=parsed_post.text_content,
image_keys=list(parsed_post.image_keys),
media_refs=list(parsed_post.media_refs),
mentions=list(mentions_map.values()),
mentioned_ids=list(parsed_post.mentioned_ids),
relation_kind="post",
)
mention_refs = list(mentions_map.values())
if normalized_type == "image":
image_key = str(payload.get("image_key", "") or "").strip()
alt_text = _normalize_feishu_text(
str(payload.get("text", "") or "")
or str(payload.get("alt", "") or "")
or FALLBACK_IMAGE_TEXT,
mentions_map,
or FALLBACK_IMAGE_TEXT
)
return FeishuNormalizedMessage(
raw_type=normalized_type,
@@ -808,7 +724,6 @@ def normalize_feishu_message(
preferred_message_type="photo",
image_keys=[image_key] if image_key else [],
relation_kind="image",
mentions=mention_refs,
)
if normalized_type in {"file", "audio", "media"}:
media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -820,7 +735,6 @@ def normalize_feishu_message(
media_refs=[media_ref] if media_ref.file_key else [],
relation_kind=normalized_type,
metadata={"placeholder_text": placeholder},
mentions=mention_refs,
)
if normalized_type == "merge_forward":
return _normalize_merge_forward_message(payload)
@@ -1095,20 +1009,8 @@ def _first_non_empty_text(*values: Any) -> str:
# ---------------------------------------------------------------------------
def _normalize_feishu_text(
text: str,
mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
) -> str:
def _sub(match: "re.Match[str]") -> str:
key = match.group(0)
ref = (mentions_map or {}).get(key)
if ref is None:
return " "
name = ref.name or ref.open_id or "user"
return f"@{name}"
cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
cleaned = cleaned.replace("@_all", "@all")
def _normalize_feishu_text(text: str) -> str:
cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1127,117 +1029,6 @@ def _unique_lines(lines: List[str]) -> List[str]:
return unique
# ---------------------------------------------------------------------------
# Mention helpers
# ---------------------------------------------------------------------------
def _extract_mention_ids(mention: Any) -> tuple[str, str]:
# Returns (open_id, user_id). im.v1.message.get hands back id as a string
# plus id_type discriminator; event payloads hand back a nested UserId
# object carrying both fields.
mention_id = getattr(mention, "id", None)
if isinstance(mention_id, str):
id_type = str(getattr(mention, "id_type", "") or "").lower()
if id_type == "open_id":
return mention_id, ""
if id_type == "user_id":
return "", mention_id
return "", ""
if mention_id is None:
return "", ""
return (
str(getattr(mention_id, "open_id", "") or ""),
str(getattr(mention_id, "user_id", "") or ""),
)
def _build_mentions_map(
mentions: Optional[Sequence[Any]],
bot: _FeishuBotIdentity,
) -> Dict[str, FeishuMentionRef]:
result: Dict[str, FeishuMentionRef] = {}
for mention in mentions or []:
key = str(getattr(mention, "key", "") or "")
if not key:
continue
if key == "@_all":
result[key] = FeishuMentionRef(is_all=True)
continue
open_id, user_id = _extract_mention_ids(mention)
name = str(getattr(mention, "name", "") or "").strip()
result[key] = FeishuMentionRef(
name=name,
open_id=open_id,
is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
)
return result
def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
parts: List[str] = []
seen: set = set()
for ref in mentions:
if ref.is_self:
continue
signature = (ref.is_all, ref.open_id, ref.name)
if signature in seen:
continue
seen.add(signature)
if ref.is_all:
parts.append("@all")
elif ref.open_id:
parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
else:
parts.append(ref.name or "unknown")
return f"[Mentioned: {', '.join(parts)}]" if parts else ""
def _strip_edge_self_mentions(
text: str,
mentions: Sequence[FeishuMentionRef],
) -> str:
# Leading: strip consecutive self-mentions unconditionally.
# Trailing: strip only when followed by whitespace/terminal punct, so
# mid-sentence references ("don't @Bot again") stay intact.
# Leading word-boundary prevents @Al from eating @Alice.
if not text:
return text
self_names = [
f"@{ref.name or ref.open_id or 'user'}"
for ref in mentions
if ref.is_self
]
if not self_names:
return text
remaining = text.lstrip()
while True:
for nm in self_names:
if not remaining.startswith(nm):
continue
after = remaining[len(nm):]
if after and after[0] not in _MENTION_BOUNDARY_CHARS:
continue
remaining = after.lstrip()
break
else:
break
while True:
i = len(remaining)
while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
i -= 1
body = remaining[:i]
tail = remaining[i:]
for nm in self_names:
if body.endswith(nm):
remaining = body[: -len(nm)].rstrip() + tail
break
else:
return remaining
def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
"""Run the official Lark WS client in its own thread-local event loop."""
import lark_oapi.ws.client as ws_client_module
@@ -1700,7 +1491,6 @@ class FeishuAdapter(BasePlatformAdapter):
if not self._client:
return SendResult(success=False, error="Not connected")
content = self.format_message(content)
try:
msg_type, payload = self._build_outbound_payload(content)
body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2680,22 +2470,13 @@ class FeishuAdapter(BasePlatformAdapter):
chat_type: str,
message_id: str,
) -> None:
text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
if inbound_type == MessageType.TEXT:
text = _strip_edge_self_mentions(text, mentions)
if text.startswith("/"):
inbound_type = MessageType.COMMAND
# Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
if inbound_type == MessageType.TEXT and not text and not media_urls:
logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
return
if inbound_type != MessageType.COMMAND:
hint = _build_mention_hint(mentions)
if hint:
text = f"{hint}\n\n{text}" if text else hint
if inbound_type == MessageType.TEXT and text.startswith("/"):
inbound_type = MessageType.COMMAND
reply_to_message_id = (
getattr(message, "parent_id", None)
@@ -3154,20 +2935,14 @@ class FeishuAdapter(BasePlatformAdapter):
# Message content extraction and resource download
# =========================================================================
async def _extract_message_content(
self, message: Any
) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
"""Extract text and cached media from a normalized Feishu message."""
raw_content = getattr(message, "content", "") or ""
raw_type = getattr(message, "message_type", "") or ""
message_id = str(getattr(message, "message_id", "") or "")
logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)
normalized = normalize_feishu_message(
message_type=raw_type,
raw_content=raw_content,
mentions=getattr(message, "mentions", None),
bot=self._bot_identity(),
)
normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
media_urls, media_types = await self._download_feishu_message_resources(
message_id=message_id,
normalized=normalized,
@@ -3184,7 +2959,7 @@ class FeishuAdapter(BasePlatformAdapter):
if injected:
text = injected
return text, inbound_type, media_urls, media_types, list(normalized.mentions)
return text, inbound_type, media_urls, media_types
async def _download_feishu_message_resources(
self,
@@ -3448,22 +3223,10 @@ class FeishuAdapter(BasePlatformAdapter):
return "group"
async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
"""Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
Preference order for the primary ``user_id`` field:
1. user_id (tenant-scoped, most stable requires permission scope)
2. open_id (app-scoped, always available different per bot app)
``user_id_alt`` carries the union_id (developer-scoped, stable across
all apps by the same developer). Session-key generation prefers
user_id_alt when present, so participant isolation stays stable even
if the primary ID is the app-scoped open_id.
"""
open_id = getattr(sender_id, "open_id", None) or None
user_id = getattr(sender_id, "user_id", None) or None
union_id = getattr(sender_id, "union_id", None) or None
# Prefer tenant-scoped user_id; fall back to app-scoped open_id.
primary_id = user_id or open_id
primary_id = open_id or user_id
display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
return {
"user_id": primary_id,
@@ -3545,31 +3308,15 @@ class FeishuAdapter(BasePlatformAdapter):
body = getattr(parent, "body", None)
msg_type = getattr(parent, "msg_type", "") or ""
raw_content = getattr(body, "content", "") or ""
parent_mentions = getattr(parent, "mentions", None) if parent else None
text = self._extract_text_from_raw_content(
msg_type=msg_type,
raw_content=raw_content,
mentions=parent_mentions,
)
text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
self._message_text_cache[message_id] = text
return text
except Exception:
logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
return None
def _extract_text_from_raw_content(
self,
*,
msg_type: str,
raw_content: str,
mentions: Optional[Sequence[Any]] = None,
) -> Optional[str]:
normalized = normalize_feishu_message(
message_type=msg_type,
raw_content=raw_content,
mentions=mentions,
bot=self._bot_identity(),
)
def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
if normalized.text_content:
return normalized.text_content
placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3639,10 +3386,10 @@ class FeishuAdapter(BasePlatformAdapter):
normalized = normalize_feishu_message(
message_type=getattr(message, "message_type", "") or "",
raw_content=raw_content,
mentions=getattr(message, "mentions", None),
bot=self._bot_identity(),
)
return self._post_mentions_bot(normalized.mentions)
if normalized.mentioned_ids:
return self._post_mentions_bot(normalized.mentioned_ids)
return False
def _is_self_sent_bot_message(self, event: Any) -> bool:
"""Return True only for Feishu events emitted by this Hermes bot."""
@@ -3662,37 +3409,30 @@ class FeishuAdapter(BasePlatformAdapter):
return False
def _message_mentions_bot(self, mentions: List[Any]) -> bool:
# IDs trump names: when both sides have open_id (or both user_id),
# match requires equal IDs. Name fallback only when either side
# lacks an ID.
"""Check whether any mention targets the configured or inferred bot identity."""
for mention in mentions:
mention_id = getattr(mention, "id", None)
mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
mention_open_id = getattr(mention_id, "open_id", None)
mention_user_id = getattr(mention_id, "user_id", None)
mention_name = (getattr(mention, "name", None) or "").strip()
if mention_open_id and self._bot_open_id:
if mention_open_id == self._bot_open_id:
return True
continue # IDs differ — not the bot; skip name fallback.
if mention_user_id and self._bot_user_id:
if mention_user_id == self._bot_user_id:
return True
continue
if self._bot_open_id and mention_open_id == self._bot_open_id:
return True
if self._bot_user_id and mention_user_id == self._bot_user_id:
return True
if self._bot_name and mention_name == self._bot_name:
return True
return False
def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
return any(m.is_self for m in mentions)
def _bot_identity(self) -> _FeishuBotIdentity:
return _FeishuBotIdentity(
open_id=self._bot_open_id,
user_id=self._bot_user_id,
name=self._bot_name,
)
def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
if not mentioned_ids:
return False
if self._bot_open_id and self._bot_open_id in mentioned_ids:
return True
if self._bot_user_id and self._bot_user_id in mentioned_ids:
return True
return False
async def _hydrate_bot_identity(self) -> None:
"""Best-effort discovery of bot identity for precise group mention gating
@@ -3717,15 +3457,14 @@ class FeishuAdapter(BasePlatformAdapter):
# uses via probe_bot().
if not self._bot_open_id or not self._bot_name:
try:
req = (
BaseRequest.builder()
.http_method(HttpMethod.GET)
.uri("/open-apis/bot/v3/info")
.token_types({AccessTokenType.TENANT})
.build()
resp = await asyncio.to_thread(
self._client.request,
method="GET",
url="/open-apis/bot/v3/info",
body=None,
raw_response=True,
)
resp = await asyncio.to_thread(self._client.request, req)
content = getattr(getattr(resp, "raw", None), "content", None)
content = getattr(resp, "content", None)
if content:
payload = json.loads(content)
parsed = _parse_bot_response(payload) or {}
@@ -4473,9 +4212,6 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
that Feishu puts in @mention payloads. It is NOT the app_id.
"""
if FEISHU_AVAILABLE:
return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4496,12 +4232,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:
def _parse_bot_response(data: dict) -> Optional[dict]:
# /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
"""Extract bot_name and bot_open_id from a /bot/v3/info response."""
if data.get("code") != 0:
return None
bot = data.get("bot") or data.get("data", {}).get("bot") or {}
return {
"bot_name": bot.get("app_name") or bot.get("bot_name"),
"bot_name": bot.get("bot_name"),
"bot_open_id": bot.get("open_id"),
}
@@ -4510,18 +4246,13 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
"""Probe bot info using lark_oapi SDK."""
try:
client = _build_onboard_client(app_id, app_secret, domain)
req = (
BaseRequest.builder()
.http_method(HttpMethod.GET)
.uri("/open-apis/bot/v3/info")
.token_types({AccessTokenType.TENANT})
.build()
resp = client.request(
method="GET",
url="/open-apis/bot/v3/info",
body=None,
raw_response=True,
)
resp = client.request(req)
content = getattr(getattr(resp, "raw", None), "content", None)
if content is None:
return None
return _parse_bot_response(json.loads(content))
return _parse_bot_response(json.loads(resp.content))
except Exception as exc:
logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
return None
+1
View File
@@ -410,6 +410,7 @@ class MattermostAdapter(BasePlatformAdapter):
logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
import asyncio
import aiohttp
last_exc = None
+4 -2
View File
@@ -26,8 +26,9 @@ from .adapter import ( # noqa: F401
# -- Onboard (QR-code scan-to-configure) -----------------------------------
from .onboard import ( # noqa: F401
BindStatus,
create_bind_task,
poll_bind_result,
build_connect_url,
qr_register,
)
from .crypto import decrypt_secret, generate_bind_key # noqa: F401
@@ -43,8 +44,9 @@ __all__ = [
"_ssrf_redirect_guard",
# onboard
"BindStatus",
"create_bind_task",
"poll_bind_result",
"build_connect_url",
"qr_register",
# crypto
"decrypt_secret",
"generate_bind_key",
+8 -6
View File
@@ -535,9 +535,6 @@ class QQAdapter(BasePlatformAdapter):
quick_disconnect_count = 0
else:
backoff_idx += 1
if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
return
except Exception as exc:
if not self._running:
@@ -1089,8 +1086,11 @@ class QQAdapter(BasePlatformAdapter):
return MessageType.VIDEO
if "image" in first_type or "photo" in first_type:
return MessageType.PHOTO
# Unknown content type with an attachment — don't assume PHOTO
# to prevent non-image files from being sent to vision analysis.
logger.debug(
"Unknown media content_type '%s', defaulting to TEXT",
"[%s] Unknown media content_type '%s', defaulting to TEXT",
self._log_tag,
first_type,
)
return MessageType.TEXT
@@ -1826,12 +1826,14 @@ class QQAdapter(BasePlatformAdapter):
body["file_name"] = file_name
# Retry transient upload failures
last_exc = None
for attempt in range(3):
try:
return await self._api_request(
"POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
)
except RuntimeError as exc:
last_exc = exc
err_msg = str(exc)
if any(
kw in err_msg
@@ -1840,8 +1842,8 @@ class QQAdapter(BasePlatformAdapter):
raise
if attempt < 2:
await asyncio.sleep(1.5 * (attempt + 1))
else:
raise
raise last_exc # type: ignore[misc]
# Maximum time (seconds) to wait for reconnection before giving up on send.
_RECONNECT_WAIT_SECONDS = 15.0
+21 -117
View File
@@ -1,10 +1,6 @@
"""
QQBot scan-to-configure (QR code onboard) module.
Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
entry-point ``qr_register()`` that handles the full flow (create task
display QR code poll decrypt credentials).
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
generate a QR-code URL and poll for scan completion. On success the caller
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -16,20 +12,18 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
from __future__ import annotations
import logging
import time
from enum import IntEnum
from typing import Optional, Tuple
from typing import Tuple
from urllib.parse import quote
from .constants import (
ONBOARD_API_TIMEOUT,
ONBOARD_CREATE_PATH,
ONBOARD_POLL_INTERVAL,
ONBOARD_POLL_PATH,
PORTAL_HOST,
QR_URL_TEMPLATE,
)
from .crypto import decrypt_secret, generate_bind_key
from .crypto import generate_bind_key
from .utils import get_api_headers
logger = logging.getLogger(__name__)
@@ -41,7 +35,7 @@ logger = logging.getLogger(__name__)
class BindStatus(IntEnum):
"""Status codes returned by ``_poll_bind_result``."""
"""Status codes returned by ``poll_bind_result``."""
NONE = 0
PENDING = 1
@@ -50,40 +44,18 @@ class BindStatus(IntEnum):
# ---------------------------------------------------------------------------
# QR rendering
# ---------------------------------------------------------------------------
try:
import qrcode as _qrcode_mod
except (ImportError, TypeError):
_qrcode_mod = None # type: ignore[assignment]
def _render_qr(url: str) -> bool:
"""Try to render a QR code in the terminal. Returns True if successful."""
if _qrcode_mod is None:
return False
try:
qr = _qrcode_mod.QRCode(
error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
border=2,
)
qr.add_data(url)
qr.make(fit=True)
qr.print_ascii(invert=True)
return True
except Exception:
return False
# ---------------------------------------------------------------------------
# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
# Public API
# ---------------------------------------------------------------------------
def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
async def create_bind_task(
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[str, str]:
"""Create a bind task and return *(task_id, aes_key_base64)*.
The AES key is generated locally and sent to the server so it can
encrypt the bot credentials before returning them.
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
@@ -92,8 +64,8 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
key = generate_bind_key()
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
resp = client.post(url, json={"key": key}, headers=get_api_headers())
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"key": key}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
@@ -108,7 +80,7 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
return task_id, key
def _poll_bind_result(
async def poll_bind_result(
task_id: str,
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[BindStatus, str, str, str]:
@@ -117,6 +89,12 @@ def _poll_bind_result(
Returns:
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
* ``bot_encrypt_secret`` is AES-256-GCM encrypted decrypt it with
:func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
key from :func:`create_bind_task`.
* ``user_openid`` is the OpenID of the person who scanned the code
(available when ``status == COMPLETED``).
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
@@ -124,8 +102,8 @@ def _poll_bind_result(
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
@@ -144,77 +122,3 @@ def _poll_bind_result(
def build_connect_url(task_id: str) -> str:
"""Build the QR-code target URL for a given *task_id*."""
return QR_URL_TEMPLATE.format(task_id=quote(task_id))
# ---------------------------------------------------------------------------
# Public entry-point
# ---------------------------------------------------------------------------
_MAX_REFRESHES = 3
def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
"""Run the QQBot scan-to-configure QR registration flow.
Mirrors ``feishu.qr_register()``: handles create display poll
decrypt in one call. Unexpected errors propagate to the caller.
:returns:
``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
success, or ``None`` on failure / expiry / cancellation.
"""
deadline = time.monotonic() + timeout_seconds
for refresh_count in range(_MAX_REFRESHES + 1):
# ── Create bind task ──
try:
task_id, aes_key = _create_bind_task()
except Exception as exc:
logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
return None
url = build_connect_url(task_id)
# ── Display QR code + URL ──
print()
if _render_qr(url):
print(f" Scan the QR code above, or open this URL directly:\n {url}")
else:
print(f" Open this URL in QQ on your phone:\n {url}")
print(" Tip: pip install qrcode to display a scannable QR code here")
print()
# ── Poll loop ──
while time.monotonic() < deadline:
try:
status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
except Exception:
time.sleep(ONBOARD_POLL_INTERVAL)
continue
if status == BindStatus.COMPLETED:
client_secret = decrypt_secret(encrypted_secret, aes_key)
print()
print(f" QR scan complete! (App ID: {app_id})")
if user_openid:
print(f" Scanner's OpenID: {user_openid}")
return {
"app_id": app_id,
"client_secret": client_secret,
"user_openid": user_openid,
}
if status == BindStatus.EXPIRED:
if refresh_count >= _MAX_REFRESHES:
logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
return None
print(f"\n QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
break # next for-loop iteration creates a new task
time.sleep(ONBOARD_POLL_INTERVAL)
else:
# deadline reached without completing
logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
return None
return None
+15 -57
View File
@@ -38,7 +38,6 @@ from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
ProcessingOutcome,
SendResult,
SUPPORTED_DOCUMENT_TYPES,
safe_url_for_log,
@@ -114,11 +113,6 @@ class SlackAdapter(BasePlatformAdapter):
# Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
self._THREAD_CACHE_TTL = 60.0
# Track message IDs that should get reaction lifecycle (DMs / @mentions).
self._reacting_message_ids: set = set()
# Track active assistant thread status indicators so stop_typing can
# clear them (chat_id → thread_ts).
self._active_status_threads: Dict[str, str] = {}
async def connect(self) -> bool:
"""Connect to Slack via Socket Mode."""
@@ -368,7 +362,6 @@ class SlackAdapter(BasePlatformAdapter):
if not thread_ts:
return # Can only set status in a thread context
self._active_status_threads[chat_id] = thread_ts
try:
await self._get_client(chat_id).assistant_threads_setStatus(
channel_id=chat_id,
@@ -380,22 +373,6 @@ class SlackAdapter(BasePlatformAdapter):
# in an assistant-enabled context. Falls back to reactions.
logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
async def stop_typing(self, chat_id: str) -> None:
"""Clear the assistant thread status indicator."""
if not self._app:
return
thread_ts = self._active_status_threads.pop(chat_id, None)
if not thread_ts:
return
try:
await self._get_client(chat_id).assistant_threads_setStatus(
channel_id=chat_id,
thread_ts=thread_ts,
status="",
)
except Exception as e:
logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
def _dm_top_level_threads_as_sessions(self) -> bool:
"""Whether top-level Slack DMs get per-message session threads.
@@ -607,38 +584,6 @@ class SlackAdapter(BasePlatformAdapter):
logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
return False
def _reactions_enabled(self) -> bool:
"""Check if message reactions are enabled via config/env."""
return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
async def on_processing_start(self, event: MessageEvent) -> None:
"""Add an in-progress reaction when message processing begins."""
if not self._reactions_enabled():
return
ts = getattr(event, "message_id", None)
if not ts or ts not in self._reacting_message_ids:
return
channel_id = getattr(event.source, "chat_id", None)
if channel_id:
await self._add_reaction(channel_id, ts, "eyes")
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
"""Swap the in-progress reaction for a final success/failure reaction."""
if not self._reactions_enabled():
return
ts = getattr(event, "message_id", None)
if not ts or ts not in self._reacting_message_ids:
return
self._reacting_message_ids.discard(ts)
channel_id = getattr(event.source, "chat_id", None)
if not channel_id:
return
await self._remove_reaction(channel_id, ts, "eyes")
if outcome == ProcessingOutcome.SUCCESS:
await self._add_reaction(channel_id, ts, "white_check_mark")
elif outcome == ProcessingOutcome.FAILURE:
await self._add_reaction(channel_id, ts, "x")
# ----- User identity resolution -----
async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
@@ -1268,12 +1213,17 @@ class SlackAdapter(BasePlatformAdapter):
# Only react when bot is directly addressed (DM or @mention).
# In listen-all channels (require_mention=false), reacting to every
# casual message would be noisy.
_should_react = (is_dm or is_mentioned) and self._reactions_enabled()
_should_react = is_dm or is_mentioned
if _should_react:
self._reacting_message_ids.add(ts)
await self._add_reaction(channel_id, ts, "eyes")
await self.handle_message(msg_event)
if _should_react:
await self._remove_reaction(channel_id, ts, "eyes")
await self._add_reaction(channel_id, ts, "white_check_mark")
# ----- Approval button support (Block Kit) -----
async def send_exec_approval(
@@ -1650,9 +1600,11 @@ class SlackAdapter(BasePlatformAdapter):
async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
"""Download a Slack file using the bot token for auth, with retry."""
import asyncio
import httpx
bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
last_exc = None
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
for attempt in range(3):
@@ -1682,6 +1634,7 @@ class SlackAdapter(BasePlatformAdapter):
from gateway.platforms.base import cache_image_from_bytes
return cache_image_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < 2:
@@ -1690,12 +1643,15 @@ class SlackAdapter(BasePlatformAdapter):
await asyncio.sleep(1.5 * (attempt + 1))
continue
raise
raise last_exc
async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
"""Download a Slack file and return raw bytes, with retry."""
import asyncio
import httpx
bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
last_exc = None
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
for attempt in range(3):
@@ -1707,6 +1663,7 @@ class SlackAdapter(BasePlatformAdapter):
response.raise_for_status()
return response.content
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < 2:
@@ -1715,6 +1672,7 @@ class SlackAdapter(BasePlatformAdapter):
await asyncio.sleep(1.5 * (attempt + 1))
continue
raise
raise last_exc
# ── Channel mention gating ─────────────────────────────────────────────
+10 -71
View File
@@ -71,10 +71,8 @@ from gateway.platforms.base import (
SendResult,
cache_image_from_bytes,
cache_audio_from_bytes,
cache_video_from_bytes,
cache_document_from_bytes,
resolve_proxy_url,
SUPPORTED_VIDEO_TYPES,
SUPPORTED_DOCUMENT_TYPES,
utf16_len,
_prefix_within_utf16_limit,
@@ -496,13 +494,6 @@ class TelegramAdapter(BasePlatformAdapter):
"[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
self.name, name, chat_id,
)
elif "not a forum" in error_text or "forums_disabled" in error_text:
logger.warning(
"[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
"The user must open the DM with this bot in Telegram, tap the bot name "
"at the top, and enable 'Topics' in chat settings before topics can be created.",
self.name, name, chat_id,
)
else:
logger.warning(
"[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -794,28 +785,8 @@ class TelegramAdapter(BasePlatformAdapter):
# Telegram pushes updates to our HTTP endpoint. This
# enables cloud platforms (Fly.io, Railway) to auto-wake
# suspended machines on inbound HTTP traffic.
#
# SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
# python-telegram-bot passes secret_token=None and the
# webhook endpoint accepts any HTTP POST — attackers can
# inject forged updates as if from Telegram. Refuse to
# start rather than silently run in fail-open mode.
# See GHSA-3vpc-7q5r-276h.
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
if not webhook_secret:
raise RuntimeError(
"TELEGRAM_WEBHOOK_SECRET is required when "
"TELEGRAM_WEBHOOK_URL is set. Without it, the "
"webhook endpoint accepts forged updates from "
"anyone who can reach it — see "
"https://github.com/NousResearch/hermes-agent/"
"security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
"Generate a secret and set it in your .env:\n"
" export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
"Then register it with Telegram when setting the "
"webhook via setWebhook's secret_token parameter."
)
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
from urllib.parse import urlparse
webhook_path = urlparse(webhook_url).path or "/telegram"
@@ -1733,6 +1704,7 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(audio_path):
return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
@@ -1781,6 +1753,7 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(image_path):
return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
@@ -2093,7 +2066,7 @@ class TelegramAdapter(BasePlatformAdapter):
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
return _ph(f'[{display}]({url})')
text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
# 4) Convert markdown headers (## Title) → bold *Title*
def _convert_header(m):
@@ -2353,16 +2326,10 @@ class TelegramAdapter(BasePlatformAdapter):
DMs remain unrestricted. Group/supergroup messages are accepted when:
- the chat is explicitly allowlisted in ``free_response_chats``
- ``require_mention`` is disabled
- the message is a command
- the message replies to the bot
- the bot is @mentioned
- the text/caption matches a configured regex wake-word pattern
When ``require_mention`` is enabled, slash commands are not given
special treatment they must pass the same mention/reply checks
as any other group message. Users can still trigger commands via
the Telegram bot menu (``/command@botname``) or by explicitly
mentioning the bot (``@botname /command``), both of which are
recognised as mentions by :meth:`_message_mentions_bot`.
"""
if not self._is_group_chat(message):
return True
@@ -2377,6 +2344,8 @@ class TelegramAdapter(BasePlatformAdapter):
return True
if not self._telegram_require_mention():
return True
if is_command:
return True
if self._is_reply_to_bot(message):
return True
if self._message_mentions_bot(message):
@@ -2659,23 +2628,6 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as e:
logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)
elif msg.video:
try:
file_obj = await msg.video.get_file()
video_bytes = await file_obj.download_as_bytearray()
ext = ".mp4"
if getattr(file_obj, "file_path", None):
for candidate in SUPPORTED_VIDEO_TYPES:
if file_obj.file_path.lower().endswith(candidate):
ext = candidate
break
cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
logger.info("[Telegram] Cached user video at %s", cached_path)
except Exception as e:
logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
# Download document files to cache for agent processing
elif msg.document:
doc = msg.document
@@ -2692,21 +2644,6 @@ class TelegramAdapter(BasePlatformAdapter):
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
ext = mime_to_ext.get(doc.mime_type, "")
if not ext and doc.mime_type:
video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
ext = video_mime_to_ext.get(doc.mime_type, "")
if ext in SUPPORTED_VIDEO_TYPES:
file_obj = await doc.get_file()
video_bytes = await file_obj.download_as_bytearray()
cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
event.message_type = MessageType.VIDEO
logger.info("[Telegram] Cached user video document at %s", cached_path)
await self.handle_message(event)
return
# Check if supported
if ext not in SUPPORTED_DOCUMENT_TYPES:
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
@@ -2845,11 +2782,13 @@ class TelegramAdapter(BasePlatformAdapter):
logger.info("[Telegram] Analyzing sticker at %s", cached_path)
from tools.vision_tools import vision_analyze_tool
import json as _json
result_json = await vision_analyze_tool(
image_url=cached_path,
user_prompt=STICKER_VISION_PROMPT,
)
result = json.loads(result_json)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "a sticker")
+5 -146
View File
@@ -508,11 +508,6 @@ class WeComAdapter(BasePlatformAdapter):
self._remember_chat_req_id(chat_id, self._payload_req_id(payload))
text, reply_text = self._extract_text(body)
# Strip leading @mention in group chats so slash commands like
# "@BotName /approve" are correctly recognized as "/approve".
# Mirrors what the Telegram adapter does (re.sub @botname).
if is_group and text:
text = re.sub(r"^@\S+\s*", "", text).strip()
media_urls, media_types = await self._extract_media(body)
message_type = self._derive_message_type(body, text, media_types)
has_reply_context = bool(reply_text and (text or media_urls))
@@ -629,16 +624,13 @@ class WeComAdapter(BasePlatformAdapter):
msgtype = str(body.get("msgtype") or "").lower()
if msgtype == "mixed":
_raw_mixed = body.get("mixed")
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
_raw_items = mixed.get("msg_item")
items = _raw_items if isinstance(_raw_items, list) else []
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
for item in items:
if not isinstance(item, dict):
continue
if str(item.get("msgtype") or "").lower() == "text":
_raw_text = item.get("text")
text_block = _raw_text if isinstance(_raw_text, dict) else {}
text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
content = str(text_block.get("content") or "").strip()
if content:
text_parts.append(content)
@@ -680,10 +672,8 @@ class WeComAdapter(BasePlatformAdapter):
msgtype = str(body.get("msgtype") or "").lower()
if msgtype == "mixed":
_raw_mixed = body.get("mixed")
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
_raw_items = mixed.get("msg_item")
items = _raw_items if isinstance(_raw_items, list) else []
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
for item in items:
if not isinstance(item, dict):
continue
@@ -1469,134 +1459,3 @@ class WeComAdapter(BasePlatformAdapter):
"name": chat_id,
"type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
}
# ------------------------------------------------------------------
# QR code scan flow for obtaining bot credentials
# ------------------------------------------------------------------
_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
_QR_POLL_INTERVAL = 3 # seconds
_QR_POLL_TIMEOUT = 300 # 5 minutes
def qr_scan_for_bot_info(
*,
timeout_seconds: int = _QR_POLL_TIMEOUT,
) -> Optional[Dict[str, str]]:
"""Run the WeCom QR scan flow to obtain bot_id and secret.
Fetches a QR code from WeCom, renders it in the terminal, and polls
until the user scans it or the timeout expires.
Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
failure or timeout.
Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
used here are not part of WeCom's public developer API — they back the
admin-console web UI's bot-creation flow and may change without notice.
The same pattern is used by the feishu/dingtalk QR setup wizards.
"""
try:
import urllib.request
import urllib.parse
except ImportError: # pragma: no cover
logger.error("urllib is required for WeCom QR scan")
return None
generate_url = f"{_QR_GENERATE_URL}?source=hermes"
# ── Step 1: Fetch QR code ──
print(" Connecting to WeCom...", end="", flush=True)
try:
req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
with urllib.request.urlopen(req, timeout=15) as resp:
raw = json.loads(resp.read().decode("utf-8"))
except Exception as exc:
logger.error("WeCom QR: failed to fetch QR code: %s", exc)
print(f" failed: {exc}")
return None
data = raw.get("data") or {}
scode = str(data.get("scode") or "").strip()
auth_url = str(data.get("auth_url") or "").strip()
if not scode or not auth_url:
logger.error("WeCom QR: unexpected response format: %s", raw)
print(" failed: unexpected response format")
return None
print(" done.")
# ── Step 2: Render QR code in terminal ──
print()
qr_rendered = False
try:
import qrcode as _qrcode
qr = _qrcode.QRCode()
qr.add_data(auth_url)
qr.make(fit=True)
qr.print_ascii(invert=True)
qr_rendered = True
except ImportError:
pass
except Exception:
pass
page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
if qr_rendered:
print(f"\n Scan the QR code above, or open this URL directly:\n {page_url}")
else:
print(f" Open this URL in WeCom on your phone:\n\n {page_url}\n")
print(" Tip: pip install qrcode to display a scannable QR code here next time")
print()
print(" Fetching configuration results...", end="", flush=True)
# ── Step 3: Poll for result ──
import time
deadline = time.time() + timeout_seconds
query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
poll_count = 0
while time.time() < deadline:
try:
req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
with urllib.request.urlopen(req, timeout=10) as resp:
result = json.loads(resp.read().decode("utf-8"))
except Exception as exc:
logger.debug("WeCom QR poll error: %s", exc)
time.sleep(_QR_POLL_INTERVAL)
continue
poll_count += 1
# Print a dot on every poll so progress is visible within 3s.
print(".", end="", flush=True)
result_data = result.get("data") or {}
status = str(result_data.get("status") or "").lower()
if status == "success":
print() # newline after "Fetching configuration results..." dots
bot_info = result_data.get("bot_info") or {}
bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
secret = str(bot_info.get("secret") or "").strip()
if bot_id and secret:
return {"bot_id": bot_id, "secret": secret}
logger.warning(
"WeCom QR: scan reported success but bot_info missing or incomplete: %s",
result_data,
)
print(
" QR scan reported success but no bot credentials were returned.\n"
" This usually means the bot was not actually created on the WeCom side.\n"
" Falling back to manual credential entry."
)
return None
time.sleep(_QR_POLL_INTERVAL)
print() # newline after dots
print(f" QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
return None
+12 -90
View File
@@ -66,37 +66,6 @@ def _kill_port_process(port: int) -> None:
except Exception:
pass
def _terminate_bridge_process(proc, *, force: bool = False) -> None:
"""Terminate the bridge process using process-tree semantics where possible."""
if _IS_WINDOWS:
cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
if force:
cmd.append("/F")
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=10,
)
except FileNotFoundError:
if force:
proc.kill()
else:
proc.terminate()
return
if result.returncode != 0:
details = (result.stderr or result.stdout or "").strip()
raise OSError(details or f"taskkill failed for PID {proc.pid}")
return
import signal
sig = signal.SIGTERM if not force else signal.SIGKILL
os.killpg(os.getpgid(proc.pid), sig)
import sys
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
@@ -149,10 +118,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
- bridge_script: Path to the Node.js bridge script
- bridge_port: Port for HTTP communication (default: 3000)
- session_path: Path to store WhatsApp session data
- dm_policy: "open" | "allowlist" | "disabled" how DMs are handled (default: "open")
- allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
- group_policy: "open" | "allowlist" | "disabled" which groups are processed (default: "open")
- group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
"""
# WhatsApp message limits — practical UX limit, not protocol max.
@@ -175,10 +140,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
))
self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
self._mention_patterns = self._compile_mention_patterns()
self._message_queue: asyncio.Queue = asyncio.Queue()
self._bridge_log_fh = None
@@ -202,33 +163,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
return {str(part).strip() for part in raw if str(part).strip()}
return {part.strip() for part in str(raw).split(",") if part.strip()}
@staticmethod
def _coerce_allow_list(raw) -> set[str]:
"""Parse allow_from / group_allow_from from config or env var."""
if raw is None:
return set()
if isinstance(raw, list):
return {str(part).strip() for part in raw if str(part).strip()}
return {part.strip() for part in str(raw).split(",") if part.strip()}
def _is_dm_allowed(self, sender_id: str) -> bool:
"""Check whether a DM from the given sender should be processed."""
if self._dm_policy == "disabled":
return False
if self._dm_policy == "allowlist":
return sender_id in self._allow_from
# "open" — all DMs allowed
return True
def _is_group_allowed(self, chat_id: str) -> bool:
"""Check whether a group chat should be processed."""
if self._group_policy == "disabled":
return False
if self._group_policy == "allowlist":
return chat_id in self._group_allow_from
# "open" — all groups allowed
return True
def _compile_mention_patterns(self):
patterns = self.config.extra.get("mention_patterns")
if patterns is None:
@@ -321,18 +255,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
return cleaned.strip() or text
def _should_process_message(self, data: Dict[str, Any]) -> bool:
is_group = data.get("isGroup", False)
if is_group:
chat_id = str(data.get("chatId") or "")
if not self._is_group_allowed(chat_id):
return False
else:
sender_id = str(data.get("senderId") or data.get("from") or "")
if not self._is_dm_allowed(sender_id):
return False
# DMs that pass the policy gate are always processed
if not data.get("isGroup"):
return True
# Group messages: check mention / free-response settings
chat_id = str(data.get("chatId") or "")
if chat_id in self._whatsapp_free_response_chats():
return True
@@ -399,6 +323,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
# Check if bridge is already running and connected
import aiohttp
import asyncio
try:
async with aiohttp.ClientSession() as session:
async with session.get(
@@ -567,14 +492,22 @@ class WhatsAppAdapter(BasePlatformAdapter):
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
if self._bridge_process:
try:
# Kill the entire process group so child node processes die too
import signal
try:
_terminate_bridge_process(self._bridge_process, force=False)
if _IS_WINDOWS:
self._bridge_process.terminate()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
except (ProcessLookupError, PermissionError):
self._bridge_process.terminate()
await asyncio.sleep(1)
if self._bridge_process.poll() is None:
try:
_terminate_bridge_process(self._bridge_process, force=True)
if _IS_WINDOWS:
self._bridge_process.kill()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
except (ProcessLookupError, PermissionError):
self._bridge_process.kill()
except Exception as e:
@@ -840,17 +773,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
"""Send a video natively via bridge — plays inline in WhatsApp."""
return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
async def send_voice(
self,
chat_id: str,
audio_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send an audio file as a WhatsApp voice message via bridge."""
return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption)
async def send_document(
self,
chat_id: str,
+154 -450
View File
File diff suppressed because it is too large Load Diff
+12 -43
View File
@@ -80,7 +80,7 @@ class SessionSource:
user_name: Optional[str] = None
thread_id: Optional[str] = None # For forum topics, Discord threads, etc.
chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack)
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
user_id_alt: Optional[str] = None # Signal UUID (alternative to phone number)
chat_id_alt: Optional[str] = None # Signal group internal ID
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
@@ -152,7 +152,6 @@ class SessionContext:
source: SessionSource
connected_platforms: List[Platform]
home_channels: Dict[Platform, HomeChannel]
shared_multi_user_session: bool = False
# Session metadata
session_key: str = ""
@@ -167,7 +166,6 @@ class SessionContext:
"home_channels": {
p.value: hc.to_dict() for p, hc in self.home_channels.items()
},
"shared_multi_user_session": self.shared_multi_user_session,
"session_key": self.session_key,
"session_id": self.session_id,
"created_at": self.created_at.isoformat() if self.created_at else None,
@@ -242,16 +240,18 @@ def build_session_context_prompt(
lines.append(f"**Channel Topic:** {context.source.chat_topic}")
# User identity.
# In shared multi-user sessions (shared threads OR shared non-thread groups
# when group_sessions_per_user=False), multiple users contribute to the same
# conversation. Don't pin a single user name in the system prompt — it
# changes per-turn and would bust the prompt cache. Instead, note that
# this is a multi-user session; individual sender names are prefixed on
# each user message by the gateway.
if context.shared_multi_user_session:
session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
# In shared thread sessions (non-DM with thread_id), multiple users
# contribute to the same conversation. Don't pin a single user name
# in the system prompt — it changes per-turn and would bust the prompt
# cache. Instead, note that this is a multi-user thread; individual
# sender names are prefixed on each user message by the gateway.
_is_shared_thread = (
context.source.chat_type != "dm"
and context.source.thread_id
)
if _is_shared_thread:
lines.append(
f"**Session type:** {session_label} — messages are prefixed "
"**Session type:** Multi-user thread — messages are prefixed "
"with [sender name]. Multiple users may participate."
)
elif context.source.user_name:
@@ -467,27 +467,6 @@ class SessionEntry:
)
def is_shared_multi_user_session(
source: SessionSource,
*,
group_sessions_per_user: bool = True,
thread_sessions_per_user: bool = False,
) -> bool:
"""Return True when a non-DM session is shared across participants.
Mirrors the isolation rules in :func:`build_session_key`:
- DMs are never shared.
- Threads are shared unless ``thread_sessions_per_user`` is True.
- Non-thread group/channel sessions are shared unless
``group_sessions_per_user`` is True (default: True = isolated).
"""
if source.chat_type == "dm":
return False
if source.thread_id:
return not thread_sessions_per_user
return not group_sessions_per_user
def build_session_key(
source: SessionSource,
group_sessions_per_user: bool = True,
@@ -1147,10 +1126,6 @@ class SessionStore:
tool_name=message.get("tool_name"),
tool_calls=message.get("tool_calls"),
tool_call_id=message.get("tool_call_id"),
reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
)
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
@@ -1180,7 +1155,6 @@ class SessionStore:
tool_calls=msg.get("tool_calls"),
tool_call_id=msg.get("tool_call_id"),
reasoning=msg.get("reasoning") if role == "assistant" else None,
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
)
@@ -1264,11 +1238,6 @@ def build_session_context(
source=source,
connected_platforms=connected,
home_channels=home_channels,
shared_multi_user_session=is_shared_multi_user_session(
source,
group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
),
)
if session_entry:
-9
View File
@@ -56,12 +56,6 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
_SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
_SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)
# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
# don't clobber each other's delivery targets.
_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
_VAR_MAP = {
"HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
"HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
@@ -70,9 +64,6 @@ _VAR_MAP = {
"HERMES_SESSION_USER_ID": _SESSION_USER_ID,
"HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
"HERMES_SESSION_KEY": _SESSION_KEY,
"HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
"HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
}
+32 -230
View File
@@ -22,18 +22,11 @@ from pathlib import Path
from hermes_constants import get_hermes_home
from typing import Any, Optional
if sys.platform == "win32":
import msvcrt
else:
import fcntl
_GATEWAY_KIND = "hermes-gateway"
_RUNTIME_STATUS_FILE = "gateway_state.json"
_LOCKS_DIRNAME = "gateway-locks"
_IS_WINDOWS = sys.platform == "win32"
_UNSET = object()
_GATEWAY_LOCK_FILENAME = "gateway.lock"
_gateway_lock_handle = None
def _get_pid_path() -> Path:
@@ -42,14 +35,6 @@ def _get_pid_path() -> Path:
return home / "gateway.pid"
def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
"""Return the path to the runtime gateway lock file."""
if pid_path is not None:
return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
home = get_hermes_home()
return home / _GATEWAY_LOCK_FILENAME
def _get_runtime_status_path() -> Path:
"""Return the persisted runtime health/status file path."""
return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -113,11 +98,6 @@ def _get_process_start_time(pid: int) -> Optional[int]:
return None
def get_process_start_time(pid: int) -> Optional[int]:
"""Public wrapper for retrieving a process start time when available."""
return _get_process_start_time(pid)
def _read_process_cmdline(pid: int) -> Optional[str]:
"""Return the process command line as a space-separated string."""
cmdline_path = Path(f"/proc/{pid}/cmdline")
@@ -141,7 +121,6 @@ def _looks_like_gateway_process(pid: int) -> bool:
"hermes_cli.main gateway",
"hermes_cli/main.py gateway",
"hermes gateway",
"hermes-gateway",
"gateway/run.py",
)
return any(pattern in cmdline for pattern in patterns)
@@ -233,160 +212,21 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
return None
def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
return _read_pid_record(lock_path or _get_gateway_lock_path())
def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
if not record:
return None
try:
return int(record["pid"])
except (KeyError, TypeError, ValueError):
return None
def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
"""Delete a stale gateway PID file (and its sibling lock metadata).
Called from ``get_running_pid()`` after the runtime lock has already been
confirmed inactive, so the on-disk metadata is known to belong to a dead
process. Unlike ``remove_pid_file()`` (which defensively refuses to delete
a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
``--replace`` handoffs), this path force-unlinks both files so the next
startup sees a clean slate.
"""
if not cleanup_stale:
return
try:
pid_path.unlink(missing_ok=True)
if pid_path == _get_pid_path():
remove_pid_file()
else:
pid_path.unlink(missing_ok=True)
except Exception:
pass
try:
_get_gateway_lock_path(pid_path).unlink(missing_ok=True)
except Exception:
pass
def _write_gateway_lock_record(handle) -> None:
handle.seek(0)
handle.truncate()
json.dump(_build_pid_record(), handle)
handle.flush()
try:
os.fsync(handle.fileno())
except OSError:
pass
def _try_acquire_file_lock(handle) -> bool:
try:
if _IS_WINDOWS:
handle.seek(0, os.SEEK_END)
if handle.tell() == 0:
handle.write("\n")
handle.flush()
handle.seek(0)
msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
else:
fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
return True
except (BlockingIOError, OSError):
return False
def _release_file_lock(handle) -> None:
try:
if _IS_WINDOWS:
handle.seek(0)
msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
except OSError:
pass
def acquire_gateway_runtime_lock() -> bool:
"""Claim the cross-process runtime lock for the gateway.
Unlike the PID file, the lock is owned by the live process itself. If the
process dies abruptly, the OS releases the lock automatically.
"""
global _gateway_lock_handle
if _gateway_lock_handle is not None:
return True
path = _get_gateway_lock_path()
path.parent.mkdir(parents=True, exist_ok=True)
handle = open(path, "a+", encoding="utf-8")
if not _try_acquire_file_lock(handle):
handle.close()
return False
_write_gateway_lock_record(handle)
_gateway_lock_handle = handle
return True
def release_gateway_runtime_lock() -> None:
"""Release the gateway runtime lock when owned by this process."""
global _gateway_lock_handle
handle = _gateway_lock_handle
if handle is None:
return
_gateway_lock_handle = None
_release_file_lock(handle)
try:
handle.close()
except OSError:
pass
def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
"""Return True when some process currently owns the gateway runtime lock."""
global _gateway_lock_handle
resolved_lock_path = lock_path or _get_gateway_lock_path()
if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
return True
if not resolved_lock_path.exists():
return False
handle = open(resolved_lock_path, "a+", encoding="utf-8")
try:
if _try_acquire_file_lock(handle):
_release_file_lock(handle)
return False
return True
finally:
try:
handle.close()
except OSError:
pass
def write_pid_file() -> None:
"""Write the current process PID and metadata to the gateway PID file.
Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
invocations race: exactly one process wins and the rest get
FileExistsError.
"""
path = _get_pid_path()
path.parent.mkdir(parents=True, exist_ok=True)
record = json.dumps(_build_pid_record())
try:
fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
except FileExistsError:
raise # Let caller decide: another gateway is racing us
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(record)
except Exception:
try:
path.unlink(missing_ok=True)
except OSError:
pass
raise
"""Write the current process PID and metadata to the gateway PID file."""
_write_json_file(_get_pid_path(), _build_pid_record())
def write_runtime_status(
@@ -501,8 +341,7 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
if not stale:
try:
os.kill(existing_pid, 0)
except (ProcessLookupError, PermissionError, OSError):
# Windows raises OSError with WinError 87 for invalid pid check
except (ProcessLookupError, PermissionError):
stale = True
else:
current_start = _get_process_start_time(existing_pid)
@@ -567,43 +406,17 @@ def release_scoped_lock(scope: str, identity: str) -> None:
pass
def release_all_scoped_locks(
*,
owner_pid: Optional[int] = None,
owner_start_time: Optional[int] = None,
) -> int:
"""Remove scoped lock files in the lock directory.
def release_all_scoped_locks() -> int:
"""Remove all scoped lock files in the lock directory.
Called during --replace to clean up stale locks left by stopped/killed
gateway processes that did not release their locks gracefully. When an
``owner_pid`` is provided, only lock records belonging to that gateway
process are removed. ``owner_start_time`` further narrows the match to
protect against PID reuse.
When no owner is provided, preserves the legacy behavior and removes every
scoped lock file in the directory.
gateway processes that did not release their locks gracefully.
Returns the number of lock files removed.
"""
lock_dir = _get_lock_dir()
removed = 0
if lock_dir.exists():
for lock_file in lock_dir.glob("*.lock"):
if owner_pid is not None:
record = _read_json_file(lock_file)
if not isinstance(record, dict):
continue
try:
record_pid = int(record.get("pid"))
except (TypeError, ValueError):
continue
if record_pid != owner_pid:
continue
if (
owner_start_time is not None
and record.get("start_time") != owner_start_time
):
continue
try:
lock_file.unlink(missing_ok=True)
removed += 1
@@ -750,46 +563,35 @@ def get_running_pid(
Cleans up stale PID files automatically.
"""
resolved_pid_path = pid_path or _get_pid_path()
resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
if not lock_active:
record = _read_pid_record(resolved_pid_path)
if not record:
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
primary_record = _read_pid_record(resolved_pid_path)
fallback_record = _read_gateway_lock_record(resolved_lock_path)
try:
pid = int(record["pid"])
except (KeyError, TypeError, ValueError):
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
for record in (primary_record, fallback_record):
pid = _pid_from_record(record)
if pid is None:
continue
try:
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
except (ProcessLookupError, PermissionError):
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
try:
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
except ProcessLookupError:
continue
except PermissionError:
# The process exists but belongs to another user/service scope.
# With the runtime lock still held, prefer keeping it visible
# rather than deleting the PID file as "stale".
if _record_looks_like_gateway(record):
return pid
continue
except OSError:
# Windows raises OSError with WinError 87 for an invalid pid
# (process is definitely gone). Treat as "process doesn't exist".
continue
recorded_start = record.get("start_time")
current_start = _get_process_start_time(pid)
if recorded_start is not None and current_start is not None and current_start != recorded_start:
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
recorded_start = record.get("start_time")
current_start = _get_process_start_time(pid)
if recorded_start is not None and current_start is not None and current_start != recorded_start:
continue
if not _looks_like_gateway_process(pid):
if not _record_looks_like_gateway(record):
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
return pid
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
return None
return pid
def is_gateway_running(
+2 -2
View File
@@ -11,5 +11,5 @@ Provides subcommands for:
- hermes cron - Manage cron jobs
"""
__version__ = "0.11.0"
__release_date__ = "2026.4.23"
__version__ = "0.10.0"
__release_date__ = "2026.4.16"
+7 -45
View File
@@ -72,8 +72,6 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -170,11 +168,8 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
id="kimi-coding",
name="Kimi / Moonshot",
auth_type="api_key",
# Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
# sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
# by _resolve_kimi_base_url() below.
inference_base_url="https://api.moonshot.ai/v1",
api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
api_key_env_vars=("KIMI_API_KEY",),
base_url_env_var="KIMI_BASE_URL",
),
"kimi-coding-cn": ProviderConfig(
@@ -184,14 +179,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
inference_base_url="https://api.moonshot.cn/v1",
api_key_env_vars=("KIMI_CN_API_KEY",),
),
"stepfun": ProviderConfig(
id="stepfun",
name="StepFun Step Plan",
auth_type="api_key",
inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
api_key_env_vars=("STEPFUN_API_KEY",),
base_url_env_var="STEPFUN_BASE_URL",
),
"arcee": ProviderConfig(
id="arcee",
name="Arcee AI",
@@ -214,7 +201,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
auth_type="api_key",
inference_base_url="https://api.anthropic.com",
api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
base_url_env_var="ANTHROPIC_BASE_URL",
),
"alibaba": ProviderConfig(
id="alibaba",
@@ -354,16 +340,10 @@ def get_anthropic_key() -> str:
# =============================================================================
# Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
# on api.kimi.com/coding. Legacy keys from platform.moonshot.ai work on
# api.moonshot.ai/v1 (the old default). Auto-detect when user hasn't set
# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on
# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set
# KIMI_BASE_URL explicitly.
#
# Note: the base URL intentionally has NO /v1 suffix. The /coding endpoint
# speaks the Anthropic Messages protocol, and the anthropic SDK appends
# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
# (the correct target). Using "/coding/v1" here would produce
# "/coding/v1/v1/messages" (a 404).
KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"
KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -619,25 +599,7 @@ def _oauth_trace(event: str, *, sequence_id: Optional[str] = None, **fields: Any
# =============================================================================
def _auth_file_path() -> Path:
path = get_hermes_home() / "auth.json"
# Seat belt: if pytest is running and HERMES_HOME resolves to the real
# user's auth store, refuse rather than silently corrupt it. This catches
# tests that forgot to monkeypatch HERMES_HOME, tests invoked without the
# hermetic conftest, or sandbox escapes via threads/subprocesses. In
# production (no PYTEST_CURRENT_TEST) this is a single dict lookup.
if os.environ.get("PYTEST_CURRENT_TEST"):
real_home_auth = (Path.home() / ".hermes" / "auth.json").resolve(strict=False)
try:
resolved = path.resolve(strict=False)
except Exception:
resolved = path
if resolved == real_home_auth:
raise RuntimeError(
f"Refusing to touch real user auth store during test run: {path}. "
"Set HERMES_HOME to a tmp_path in your test fixture, or run "
"via scripts/run_tests.sh for hermetic CI-parity env."
)
return path
return get_hermes_home() / "auth.json"
def _auth_lock_path() -> Path:
@@ -1021,7 +983,6 @@ def resolve_provider(
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"step": "stepfun", "stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee", "arceeai": "arcee",
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
"claude": "anthropic", "claude-code": "anthropic",
@@ -3414,7 +3375,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
)
from hermes_cli.models import (
_PROVIDER_MODELS, get_pricing_for_provider,
_PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
check_nous_free_tier, partition_nous_models_by_tier,
)
model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3423,6 +3384,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
unavailable_models: list = []
if model_ids:
pricing = get_pricing_for_provider("nous")
model_ids = filter_nous_free_models(model_ids, pricing)
free_tier = check_nous_free_tier()
if free_tier:
model_ids, unavailable_models = partition_nous_models_by_tier(
+63 -37
View File
@@ -152,23 +152,6 @@ def auth_add_command(args) -> None:
pool = load_pool(provider)
# Clear ALL suppressions for this provider — re-adding a credential is
# a strong signal the user wants auth re-enabled. This covers env:*
# (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
# device_code (codex), etc. One consistent re-engagement pattern.
# Matches the Codex device_code re-link pattern that predates this.
if not provider.startswith(CUSTOM_POOL_PREFIX):
try:
from hermes_cli.auth import (
_load_auth_store,
unsuppress_credential_source,
)
suppressed = _load_auth_store().get("suppressed_sources", {})
for src in list(suppressed.get(provider, []) or []):
unsuppress_credential_source(provider, src)
except Exception:
pass
if requested_type == AUTH_TYPE_API_KEY:
token = (getattr(args, "api_key", None) or "").strip()
if not token:
@@ -355,28 +338,71 @@ def auth_remove_command(args) -> None:
raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
print(f"Removed {provider} credential #{index} ({removed.label})")
# Unified removal dispatch. Every credential source Hermes reads from
# (env vars, external OAuth files, auth.json blocks, custom config)
# has a RemovalStep registered in agent.credential_sources. The step
# handles its source-specific cleanup and we centralise suppression +
# user-facing output here so every source behaves identically from
# the user's perspective.
from agent.credential_sources import find_removal_step
from hermes_cli.auth import suppress_credential_source
# If this was an env-seeded credential, also clear the env var from .env
# so it doesn't get re-seeded on the next load_pool() call.
if removed.source.startswith("env:"):
env_var = removed.source[len("env:"):]
if env_var:
from hermes_cli.config import remove_env_value
cleared = remove_env_value(env_var)
if cleared:
print(f"Cleared {env_var} from .env")
step = find_removal_step(provider, removed.source)
if step is None:
# Unregistered source — e.g. "manual", which has nothing external
# to clean up. The pool entry is already gone; we're done.
return
# If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
# clear the underlying auth store / credential file so it doesn't get
# re-seeded on the next load_pool() call.
elif provider == "openai-codex" and (
removed.source == "device_code" or removed.source.endswith(":device_code")
):
# Codex tokens live in TWO places: the Hermes auth store and
# ~/.codex/auth.json (the Codex CLI shared file). On every refresh,
# refresh_codex_oauth_pure() writes to both. So clearing only the
# Hermes auth store is not enough — _seed_from_singletons() will
# auto-import from ~/.codex/auth.json on the next load_pool() and
# the removal is instantly undone. Mark the source as suppressed
# so auto-import is skipped; leave ~/.codex/auth.json untouched so
# the Codex CLI itself keeps working.
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
suppress_credential_source,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
suppress_credential_source(provider, "device_code")
print("Suppressed openai-codex device_code source — it will not be re-seeded.")
print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
print("Run `hermes auth add openai-codex` to re-enable if needed.")
result = step.remove_fn(provider, removed)
for line in result.cleaned:
print(line)
if result.suppress:
suppress_credential_source(provider, removed.source)
for line in result.hints:
print(line)
elif removed.source == "device_code" and provider == "nous":
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
elif removed.source == "hermes_pkce" and provider == "anthropic":
from hermes_constants import get_hermes_home
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
oauth_file.unlink()
print("Cleared Hermes Anthropic OAuth credentials")
elif removed.source == "claude_code" and provider == "anthropic":
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "claude_code")
print("Suppressed claude_code credential — it will not be re-seeded.")
print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
print("Run `hermes auth add anthropic` to re-enable if needed.")
def auth_reset_command(args) -> None:
+1 -1
View File
@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
state_path = child / state_name
if state_path.exists():
kind = "directory" if state_path.is_dir() else "file"
rel = state_path.relative_to(source_dir).as_posix()
rel = state_path.relative_to(source_dir)
findings.append((state_path, f"Workspace {kind}: {rel}"))
return findings
+1 -2
View File
@@ -12,7 +12,6 @@ import os
logger = logging.getLogger(__name__)
DEFAULT_CODEX_MODELS: List[str] = [
"gpt-5.5",
"gpt-5.4-mini",
"gpt-5.4",
"gpt-5.3-codex",
@@ -22,10 +21,10 @@ DEFAULT_CODEX_MODELS: List[str] = [
]
_FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
("gpt-5.3-codex", ("gpt-5.2-codex",)),
("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")),
]
+11 -91
View File
@@ -260,26 +260,6 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
)
def is_gateway_known_command(name: str | None) -> bool:
"""Return True if ``name`` resolves to a gateway-dispatchable slash command.
This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
looked up lazily so importing this module never forces plugin
discovery. Gateway code uses this to decide whether to emit
``command:<name>`` hooks plugin commands get the same lifecycle
events as built-ins.
"""
if not name:
return False
if name in GATEWAY_KNOWN_COMMANDS:
return True
for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
if plugin_name == name:
return True
return False
# Commands with explicit Level-2 running-agent handlers in gateway/run.py.
# Listed here for introspection / tests; semantically a subset of
# "all resolvable commands" — which is the real bypass set (see
@@ -391,47 +371,12 @@ def gateway_help_lines() -> list[str]:
return lines
def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
"""Yield (name, description, args_hint) tuples for all plugin slash commands.
Plugin commands are registered via
:func:`hermes_cli.plugins.PluginContext.register_command`. They behave
like ``CommandDef`` entries for gateway surfacing: they appear in the
Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
(via :func:`gateway.platforms.discord._register_slash_commands`) in
Discord's native slash command picker.
Lookup is lazy so importing this module never forces plugin discovery
(which can trigger filesystem scans and environment-dependent
behavior).
"""
try:
from hermes_cli.plugins import get_plugin_commands
except Exception:
return []
try:
commands = get_plugin_commands() or {}
except Exception:
return []
entries: list[tuple[str, str, str]] = []
for name, meta in commands.items():
if not isinstance(name, str) or not isinstance(meta, dict):
continue
description = str(meta.get("description") or f"Run /{name}")
args_hint = str(meta.get("args_hint") or "").strip()
entries.append((name, description, args_hint))
return entries
def telegram_bot_commands() -> list[tuple[str, str]]:
"""Return (command_name, description) pairs for Telegram setMyCommands.
Telegram command names cannot contain hyphens, so they are replaced with
underscores. Aliases are skipped -- Telegram shows one menu entry per
canonical command.
Plugin-registered slash commands are included so plugins get native
autocomplete in Telegram without touching core code.
"""
overrides = _resolve_config_gates()
result: list[tuple[str, str]] = []
@@ -441,10 +386,6 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
tg_name = _sanitize_telegram_name(cmd.name)
if tg_name:
result.append((tg_name, cmd.description))
for name, description, _args_hint in _iter_plugin_command_entries():
tg_name = _sanitize_telegram_name(name)
if tg_name:
result.append((tg_name, description))
return result
@@ -556,8 +497,9 @@ def _collect_gateway_skill_entries(
# --- Tier 1: Plugin slash commands (never trimmed) ---------------------
plugin_pairs: list[tuple[str, str]] = []
try:
from hermes_cli.plugins import get_plugin_commands
plugin_cmds = get_plugin_commands()
from hermes_cli.plugins import get_plugin_manager
pm = get_plugin_manager()
plugin_cmds = getattr(pm, "_plugin_commands", {})
for cmd_name in sorted(plugin_cmds):
name = sanitize_name(cmd_name) if sanitize_name else cmd_name
if not name:
@@ -809,9 +751,6 @@ def slack_subcommand_map() -> dict[str, str]:
Maps both canonical names and aliases so /hermes bg do stuff works
the same as /hermes background do stuff.
Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
routes through the plugin handler.
"""
overrides = _resolve_config_gates()
mapping: dict[str, str] = {}
@@ -821,9 +760,6 @@ def slack_subcommand_map() -> dict[str, str]:
mapping[cmd.name] = f"/{cmd.name}"
for alias in cmd.aliases:
mapping[alias] = f"/{alias}"
for name, _description, _args_hint in _iter_plugin_command_entries():
if name not in mapping:
mapping[name] = f"/{name}"
return mapping
@@ -989,22 +925,12 @@ class SlashCommandCompleter(Completer):
display_meta=meta,
)
# If the user typed @file: / @folder: (or just @file / @folder with
# no colon yet), delegate to path completions. Accepting the bare
# form lets the picker surface directories as soon as the user has
# typed `@folder`, without requiring them to first accept the static
# `@folder:` hint and re-trigger completion.
# If the user typed @file: or @folder:, delegate to path completions
for prefix in ("@file:", "@folder:"):
bare = prefix[:-1]
if word == bare or word.startswith(prefix):
want_dir = prefix == "@folder:"
path_part = '' if word == bare else word[len(prefix):]
if word.startswith(prefix):
path_part = word[len(prefix):] or "."
expanded = os.path.expanduser(path_part)
if not expanded or expanded == ".":
search_dir, match_prefix = ".", ""
elif expanded.endswith("/"):
if expanded.endswith("/"):
search_dir, match_prefix = expanded, ""
else:
search_dir = os.path.dirname(expanded) or "."
@@ -1020,21 +946,15 @@ class SlashCommandCompleter(Completer):
for entry in sorted(entries):
if match_prefix and not entry.lower().startswith(prefix_lower):
continue
full_path = os.path.join(search_dir, entry)
is_dir = os.path.isdir(full_path)
# `@folder:` must only surface directories; `@file:` only
# regular files. Without this filter `@folder:` listed
# every .env / .gitignore in the cwd, defeating the
# explicit prefix and confusing users expecting a
# directory picker.
if want_dir != is_dir:
continue
if count >= limit:
break
full_path = os.path.join(search_dir, entry)
is_dir = os.path.isdir(full_path)
display_path = os.path.relpath(full_path)
suffix = "/" if is_dir else ""
kind = "folder" if is_dir else "file"
meta = "dir" if is_dir else _file_size_label(full_path)
completion = f"{prefix}{display_path}{suffix}"
completion = f"@{kind}:{display_path}{suffix}"
yield Completion(
completion,
start_position=-len(word),
+11 -295
View File
@@ -13,7 +13,6 @@ This module provides:
"""
import copy
import logging
import os
import platform
import re
@@ -25,7 +24,6 @@ from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
logger = logging.getLogger(__name__)
_IS_WINDOWS = platform.system() == "Windows"
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
@@ -361,15 +359,6 @@ DEFAULT_CONFIG = {
# to finish, then interrupts any remaining runs after the timeout.
# 0 = no drain, interrupt immediately.
"restart_drain_timeout": 60,
# Max app-level retry attempts for API errors (connection drops,
# provider timeouts, 5xx, etc.) before the agent surfaces the
# failure. The OpenAI SDK already does its own low-level retries
# (max_retries=2 default) for transient network errors; this is
# the Hermes-level retry loop that wraps the whole call. Lower
# this to 1 if you use fallback providers and want fast failover
# on flaky primaries; raise it if you prefer to tolerate longer
# provider hiccups on a single provider.
"api_max_retries": 3,
"service_tier": "",
# Tool-use enforcement: injects system prompt guidance that tells the
# model to actually call tools instead of describing intended actions.
@@ -384,11 +373,7 @@ DEFAULT_CONFIG = {
# Periodic "still working" notification interval (seconds).
# Sends a status message every N seconds so the user knows the
# agent hasn't died during long tasks. 0 = disable notifications.
# Lower values mean faster feedback on slow tasks but more chat
# noise; 180s is a compromise that catches spinning weak-model runs
# (60+ tool iterations with tiny output) before users assume the
# bot is dead and /restart.
"gateway_notify_interval": 180,
"gateway_notify_interval": 600,
},
"terminal": {
@@ -400,32 +385,6 @@ DEFAULT_CONFIG = {
# (terminal and execute_code). Skill-declared required_environment_variables
# are passed through automatically; this list is for non-skill use cases.
"env_passthrough": [],
# Extra files to source in the login shell when building the
# per-session environment snapshot. Use this when tools like nvm,
# pyenv, asdf, or custom PATH entries are registered by files that
# a bash login shell would skip — most commonly ``~/.bashrc``
# (bash doesn't source bashrc in non-interactive login mode) or
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
# Paths support ``~`` / ``${VAR}``. Missing files are silently
# skipped. When empty, Hermes auto-sources ``~/.profile``,
# ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
# snapshot shell is bash (this is the ``auto_source_bashrc``
# behaviour — disable with that key if you want strict login-only
# semantics).
"shell_init_files": [],
# When true (default), Hermes sources the user's shell rc files
# (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
# login shell used to build the environment snapshot. This
# captures PATH additions, shell functions, and aliases — which a
# plain ``bash -l -c`` would otherwise miss because bash skips
# bashrc in non-interactive login mode, and because a default
# Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
# sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
# because ``n`` / ``nvm`` / ``asdf`` installers typically write
# their PATH exports there without an interactivity guard. Turn
# this off if your rc files misbehave when sourced
# non-interactively (e.g. one that hard-exits on TTY checks).
"auto_source_bashrc": True,
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
"docker_forward_env": [],
# Explicit environment variables to set inside Docker containers.
@@ -486,27 +445,7 @@ DEFAULT_CONFIG = {
# exceed this are rejected with guidance to use offset+limit.
# 100K chars ≈ 2535K tokens across typical tokenisers.
"file_read_max_chars": 100_000,
# Tool-output truncation thresholds. When terminal output or a
# single read_file page exceeds these limits, Hermes truncates the
# payload sent to the model (keeping head + tail for terminal,
# enforcing pagination for read_file). Tuning these trades context
# footprint against how much raw output the model can see in one
# shot. Ported from anomalyco/opencode PR #23770.
#
# - max_bytes: terminal_tool output cap, in chars
# (default 50_000 ≈ 12-15K tokens).
# - max_lines: read_file pagination cap — the maximum `limit`
# a single read_file call can request before
# being clamped (default 2000).
# - max_line_length: per-line cap applied when read_file emits a
# line-numbered view (default 2000 chars).
"tool_output": {
"max_bytes": 50_000,
"max_lines": 2000,
"max_line_length": 2000,
},
"compression": {
"enabled": True,
"threshold": 0.50, # compress when context usage exceeds this ratio
@@ -652,10 +591,6 @@ DEFAULT_CONFIG = {
},
# Text-to-speech configuration
# Each provider supports an optional `max_text_length:` override for the
# per-request input-character cap. Omit it to use the provider's documented
# limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
# Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
"tts": {
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
"edge": {
@@ -708,7 +643,6 @@ DEFAULT_CONFIG = {
"record_key": "ctrl+b",
"max_recording_seconds": 120,
"auto_tts": False,
"beep_enabled": True, # Play record start/stop beeps in CLI voice mode
"silence_threshold": 200, # RMS below this = silence (0-32767)
"silence_duration": 3.0, # Seconds of silence before auto-stop
},
@@ -751,26 +685,10 @@ DEFAULT_CONFIG = {
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
"base_url": "", # direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
# When delegate_task narrows child toolsets explicitly, preserve any
# MCP toolsets the parent already has enabled. On by default so
# narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
# extras" without silently stripping MCP tools the parent already has.
# Set to false for strict intersection.
"inherit_mcp_toolsets": True,
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
# independent of the parent's max_iterations)
"child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s,
# no ceiling). High-reasoning models on large tasks
# (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
# raise if children time out before producing output.
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
# "low", "minimal", "none" (empty = inherit parent's level)
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
# Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
# and _get_orchestrator_enabled). Values are clamped to [1, 3] with a
# warning log if out of range.
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
"orchestrator_enabled": True, # kill switch for role="orchestrator"
},
# Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -783,31 +701,6 @@ DEFAULT_CONFIG = {
# always goes to ~/.hermes/skills/.
"skills": {
"external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"]
# Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
# content with the absolute skill directory and the active session id
# before the agent sees it. Lets skill authors reference bundled
# scripts without the agent having to join paths.
"template_vars": True,
# Pre-execute inline shell snippets written as !`cmd` in SKILL.md
# body. Their stdout is inlined into the skill message before the
# agent reads it, so skills can inject dynamic context (dates, git
# state, detected tool versions, …). Off by default because any
# content from the skill author runs on the host without approval;
# only enable for skill sources you trust.
"inline_shell": False,
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
"inline_shell_timeout": 10,
# Run the keyword/pattern security scanner on skills the agent
# writes via skill_manage (create/edit/patch). Off by default
# because the agent can already execute the same code paths via
# terminal() with no gate, so the scan adds friction (blocks
# skills that mention risky keywords in prose) without meaningful
# security. Turn on if you want the belt-and-suspenders — a
# dangerous verdict will then surface as a tool error to the
# agent, which can retry with the flagged content removed.
# External hub installs (trusted/community sources) are always
# scanned regardless of this setting.
"guard_agent_created": False,
},
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -878,21 +771,6 @@ DEFAULT_CONFIG = {
"command_allowlist": [],
# User-defined quick commands that bypass the agent loop (type: exec only)
"quick_commands": {},
# Shell-script hooks — declarative bridge that invokes shell scripts
# on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
# subagent_stop, etc.). Each entry maps an event name to a list of
# {matcher, command, timeout} dicts. First registration of a new
# command prompts the user for consent; subsequent runs reuse the
# stored approval from ~/.hermes/shell-hooks-allowlist.json.
# See `website/docs/user-guide/features/hooks.md` for schema + examples.
"hooks": {},
# Auto-accept shell-hook registrations without a TTY prompt. Also
# toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1.
# Gateway / cron / non-interactive runs need this (or one of the other
# channels) to pick up newly-added hooks.
"hooks_auto_accept": False,
# Custom personalities — add your own entries here
# Supports string format: {"name": "system prompt"}
# Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
@@ -900,7 +778,6 @@ DEFAULT_CONFIG = {
# Pre-exec security scanning via tirith
"security": {
"allow_private_urls": False, # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
"redact_secrets": True,
"tirith_enabled": True,
"tirith_path": "tirith",
@@ -917,11 +794,6 @@ DEFAULT_CONFIG = {
# Wrap delivered cron responses with a header (task name) and footer
# ("The agent cannot see this message"). Set to false for clean output.
"wrap_response": True,
# Maximum number of due jobs to run in parallel per tick.
# null/0 = unbounded (limited only by thread count).
# 1 = serial (pre-v0.9 behaviour).
# Also overridable via HERMES_CRON_MAX_PARALLEL env var.
"max_parallel_jobs": None,
},
# execute_code settings — controls the tool used for programmatic tool calls.
@@ -954,36 +826,8 @@ DEFAULT_CONFIG = {
"force_ipv4": False,
},
# Session storage — controls automatic cleanup of ~/.hermes/state.db.
# state.db accumulates every session, message, tool call, and FTS5 index
# entry forever. Without auto-pruning, a heavy user (gateway + cron)
# reports 384MB+ databases with 68K+ messages, which slows down FTS5
# inserts, /resume listing, and insights queries.
"sessions": {
# When true, prune ended sessions older than retention_days once
# per (roughly) min_interval_hours at CLI/gateway/cron startup.
# Only touches ended sessions — active sessions are always preserved.
# Default false: session history is valuable for search recall, and
# silently deleting it could surprise users. Opt in explicitly.
"auto_prune": False,
# How many days of ended-session history to keep. Matches the
# default of ``hermes sessions prune``.
"retention_days": 90,
# VACUUM after a prune that actually deleted rows. SQLite does not
# reclaim disk space on DELETE — freed pages are just reused on
# subsequent INSERTs — so without VACUUM the file stays bloated
# even after pruning. VACUUM blocks writes for a few seconds per
# 100MB, so it only runs at startup, and only when prune deleted
# ≥1 session.
"vacuum_after_prune": True,
# Minimum hours between auto-maintenance runs (avoids repeating
# the sweep on every CLI invocation). Tracked via state_meta in
# state.db itself, so it's shared across all processes.
"min_interval_hours": 24,
},
# Config schema version - bump this when adding new required fields
"_config_version": 22,
"_config_version": 20,
}
# =============================================================================
@@ -1139,22 +983,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"STEPFUN_API_KEY": {
"description": "StepFun Step Plan API key",
"prompt": "StepFun Step Plan API key",
"url": "https://platform.stepfun.com/",
"password": True,
"category": "provider",
"advanced": True,
},
"STEPFUN_BASE_URL": {
"description": "StepFun Step Plan base URL override",
"prompt": "StepFun Step Plan base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"ARCEEAI_API_KEY": {
"description": "Arcee AI API key",
"prompt": "Arcee AI API key",
@@ -1328,7 +1156,7 @@ OPTIONAL_ENV_VARS = {
"advanced": True,
},
"XIAOMI_API_KEY": {
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
"prompt": "Xiaomi MiMo API Key",
"url": "https://platform.xiaomimimo.com",
"password": True,
@@ -2022,53 +1850,12 @@ def _normalize_custom_provider_entry(
if not isinstance(entry, dict):
return None
# Accept camelCase aliases commonly used in hand-written configs.
_CAMEL_ALIASES: Dict[str, str] = {
"apiKey": "api_key",
"baseUrl": "base_url",
"apiMode": "api_mode",
"keyEnv": "key_env",
"defaultModel": "default_model",
"contextLength": "context_length",
"rateLimitDelay": "rate_limit_delay",
}
_KNOWN_KEYS = {
"name", "api", "url", "base_url", "api_key", "key_env",
"api_mode", "transport", "model", "default_model", "models",
"context_length", "rate_limit_delay",
}
for camel, snake in _CAMEL_ALIASES.items():
if camel in entry and snake not in entry:
logger.warning(
"providers.%s: camelCase key '%s' auto-mapped to '%s' "
"(use snake_case to avoid this warning)",
provider_key or "?", camel, snake,
)
entry[snake] = entry[camel]
unknown = set(entry.keys()) - _KNOWN_KEYS - set(_CAMEL_ALIASES.keys())
if unknown:
logger.warning(
"providers.%s: unknown config keys ignored: %s",
provider_key or "?", ", ".join(sorted(unknown)),
)
from urllib.parse import urlparse
base_url = ""
for url_key in ("base_url", "url", "api"):
for url_key in ("api", "url", "base_url"):
raw_url = entry.get(url_key)
if isinstance(raw_url, str) and raw_url.strip():
candidate = raw_url.strip()
parsed = urlparse(candidate)
if parsed.scheme and parsed.netloc:
base_url = candidate
break
else:
logger.warning(
"providers.%s: '%s' value '%s' is not a valid URL "
"(no scheme or host) — skipped",
provider_key or "?", url_key, candidate,
)
base_url = raw_url.strip()
break
if not base_url:
return None
@@ -2109,14 +1896,6 @@ def _normalize_custom_provider_entry(
models = entry.get("models")
if isinstance(models, dict) and models:
normalized["models"] = models
elif isinstance(models, list) and models:
# Hand-edited configs (and older Hermes versions) write ``models`` as
# a plain list of model ids. Preserve them by converting to the dict
# shape downstream code expects; otherwise normalize silently drops
# the list and /model shows the provider with (0) models.
normalized["models"] = {
str(m): {} for m in models if isinstance(m, str) and m.strip()
}
context_length = entry.get("context_length")
if isinstance(context_length, int) and context_length > 0:
@@ -2215,7 +1994,6 @@ _KNOWN_ROOT_KEYS = {
"fallback_providers", "credential_pool_strategies", "toolsets",
"agent", "terminal", "display", "compression", "delegation",
"auxiliary", "custom_providers", "context", "memory", "gateway",
"sessions",
}
# Valid fields inside a custom_providers list entry
@@ -2373,6 +2151,7 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
if not issues:
return
import sys
lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
for ci in issues:
marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@@ -2387,6 +2166,7 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
These env vars are deprecated the canonical setting is terminal.cwd
in config.yaml. Prints a migration hint to stderr.
"""
import os, sys
messaging_cwd = os.environ.get("MESSAGING_CWD")
terminal_cwd_env = os.environ.get("TERMINAL_CWD")
@@ -2704,71 +2484,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
else:
print(" ✓ Removed unused compression.summary_* keys")
# ── Version 20 → 21: plugins are now opt-in; grandfather existing user plugins ──
# The loader now requires plugins to appear in ``plugins.enabled`` before
# loading. Existing installs had all discovered plugins loading by default
# (minus anything in ``plugins.disabled``). To avoid silently breaking
# those setups on upgrade, populate ``plugins.enabled`` with the set of
# currently-installed user plugins that aren't already disabled.
#
# Bundled plugins (shipped in the repo itself) are NOT grandfathered —
# they ship off for everyone, including existing users, so any user who
# wants one has to opt in explicitly.
if current_ver < 21:
config = read_raw_config()
plugins_cfg = config.get("plugins")
if not isinstance(plugins_cfg, dict):
plugins_cfg = {}
# Only migrate if the enabled allow-list hasn't been set yet.
if "enabled" not in plugins_cfg:
disabled = plugins_cfg.get("disabled", []) or []
if not isinstance(disabled, list):
disabled = []
disabled_set = set(disabled)
# Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
grandfathered: List[str] = []
try:
user_plugins_dir = get_hermes_home() / "plugins"
if user_plugins_dir.is_dir():
for child in sorted(user_plugins_dir.iterdir()):
if not child.is_dir():
continue
manifest_file = child / "plugin.yaml"
if not manifest_file.exists():
manifest_file = child / "plugin.yml"
if not manifest_file.exists():
continue
try:
with open(manifest_file) as _mf:
manifest = yaml.safe_load(_mf) or {}
except Exception:
manifest = {}
name = manifest.get("name") or child.name
if name in disabled_set:
continue
grandfathered.append(name)
except Exception:
grandfathered = []
plugins_cfg["enabled"] = grandfathered
config["plugins"] = plugins_cfg
save_config(config)
results["config_added"].append(
f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)"
)
if not quiet:
if grandfathered:
print(
f" ✓ Plugins now opt-in: grandfathered "
f"{len(grandfathered)} existing plugin(s) into plugins.enabled"
)
else:
print(
" ✓ Plugins now opt-in: no existing plugins to grandfather. "
"Use `hermes plugins enable <name>` to activate."
)
if current_ver < latest_ver and not quiet:
print(f"Config version: {current_ver}{latest_ver}")
@@ -3231,7 +2946,7 @@ def save_config(config: Dict[str, Any]):
if not sec or sec.get("redact_secrets") is None:
parts.append(_SECURITY_COMMENT)
fb = normalized.get("fallback_model", {})
if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
if not fb or not (fb.get("provider") and fb.get("model")):
parts.append(_FALLBACK_COMMENT)
atomic_yaml_write(
@@ -3394,6 +3109,7 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
import sys
print(
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"
+48 -128
View File
@@ -13,7 +13,6 @@ import time
import urllib.error
import urllib.parse
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
@@ -148,14 +147,6 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
return (deleted, len(remaining))
def _best_effort_sweep_expired_pastes() -> None:
"""Attempt pending-paste cleanup without letting /debug fail offline."""
try:
_sweep_expired_pastes()
except Exception:
pass
# ---------------------------------------------------------------------------
# Privacy / delete helpers
# ---------------------------------------------------------------------------
@@ -323,128 +314,72 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
# Log file reading
# ---------------------------------------------------------------------------
@dataclass
class LogSnapshot:
"""Single-read snapshot of a log file used by debug-share."""
path: Optional[Path]
tail_text: str
full_text: Optional[str]
def _primary_log_path(log_name: str) -> Optional[Path]:
"""Where *log_name* would live if present. Doesn't check existence."""
from hermes_cli.logs import LOG_FILES
filename = LOG_FILES.get(log_name)
return (get_hermes_home() / "logs" / filename) if filename else None
def _resolve_log_path(log_name: str) -> Optional[Path]:
"""Find the log file for *log_name*, falling back to the .1 rotation.
Returns the first non-empty candidate (primary, then .1), or None.
Callers distinguish 'empty primary' from 'truly missing' via
:func:`_primary_log_path`.
Returns the path if found, or None.
"""
primary = _primary_log_path(log_name)
if primary is None:
from hermes_cli.logs import LOG_FILES
filename = LOG_FILES.get(log_name)
if not filename:
return None
log_dir = get_hermes_home() / "logs"
primary = log_dir / filename
if primary.exists() and primary.stat().st_size > 0:
return primary
rotated = primary.parent / f"{primary.name}.1"
# Fall back to the most recent rotated file (.1).
rotated = log_dir / f"{filename}.1"
if rotated.exists() and rotated.stat().st_size > 0:
return rotated
return None
def _capture_log_snapshot(
log_name: str,
*,
tail_lines: int,
max_bytes: int = _MAX_LOG_BYTES,
) -> LogSnapshot:
"""Capture a log once and derive summary/full-log views from it.
def _read_log_tail(log_name: str, num_lines: int) -> str:
"""Read the last *num_lines* from a log file, or return a placeholder."""
from hermes_cli.logs import _read_last_n_lines
The report tail and standalone log upload must come from the same file
snapshot. Otherwise a rotation/truncate between reads can make the report
look newer than the uploaded ``agent.log`` paste.
log_path = _resolve_log_path(log_name)
if log_path is None:
return "(file not found)"
try:
lines = _read_last_n_lines(log_path, num_lines)
return "".join(lines).rstrip("\n")
except Exception as exc:
return f"(error reading: {exc})"
def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
"""Read a log file for standalone upload.
Returns the file content (last *max_bytes* if truncated), or None if the
file doesn't exist or is empty.
"""
log_path = _resolve_log_path(log_name)
if log_path is None:
primary = _primary_log_path(log_name)
tail = "(file empty)" if primary and primary.exists() else "(file not found)"
return LogSnapshot(path=None, tail_text=tail, full_text=None)
return None
try:
size = log_path.stat().st_size
if size == 0:
# race: file was truncated between _resolve_log_path and stat
return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)
return None
if size <= max_bytes:
return log_path.read_text(encoding="utf-8", errors="replace")
# File is larger than max_bytes — read the tail.
with open(log_path, "rb") as f:
if size <= max_bytes:
raw = f.read()
truncated = False
else:
# Read from the end until we have enough bytes for the
# standalone upload and enough newline context to render the
# summary tail from the same snapshot.
chunk_size = 8192
pos = size
chunks: list[bytes] = []
total = 0
newline_count = 0
while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
read_size = min(chunk_size, pos)
pos -= read_size
f.seek(pos)
chunk = f.read(read_size)
chunks.insert(0, chunk)
total += len(chunk)
newline_count += chunk.count(b"\n")
chunk_size = min(chunk_size * 2, 65536)
raw = b"".join(chunks)
truncated = pos > 0
full_raw = raw
if truncated and len(full_raw) > max_bytes:
cut = len(full_raw) - max_bytes
# Check whether the cut lands exactly on a line boundary. If the
# byte just before the cut position is a newline the first retained
# byte starts a complete line and we should keep it. Only drop a
# partial first line when we're genuinely mid-line.
on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
full_raw = full_raw[cut:]
if not on_boundary and b"\n" in full_raw:
full_raw = full_raw.split(b"\n", 1)[1]
all_text = raw.decode("utf-8", errors="replace")
tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
full_text = full_raw.decode("utf-8", errors="replace")
if truncated:
full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
except Exception as exc:
return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
"""Capture all logs used by debug-share exactly once."""
errors_lines = min(log_lines, 100)
return {
"agent": _capture_log_snapshot("agent", tail_lines=log_lines),
"errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
"gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
}
f.seek(size - max_bytes)
# Skip partial line at the seek point.
f.readline()
content = f.read().decode("utf-8", errors="replace")
return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
except Exception:
return None
# ---------------------------------------------------------------------------
@@ -470,12 +405,7 @@ def _capture_dump() -> str:
return capture.getvalue()
def collect_debug_report(
*,
log_lines: int = 200,
dump_text: str = "",
log_snapshots: Optional[dict[str, LogSnapshot]] = None,
) -> str:
def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
"""Build the summary debug report: system dump + log tails.
Parameters
@@ -494,22 +424,19 @@ def collect_debug_report(
dump_text = _capture_dump()
buf.write(dump_text)
if log_snapshots is None:
log_snapshots = _capture_default_log_snapshots(log_lines)
# ── Recent log tails (summary only) ──────────────────────────────────
buf.write("\n\n")
buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
buf.write(log_snapshots["agent"].tail_text)
buf.write(_read_log_tail("agent", log_lines))
buf.write("\n\n")
errors_lines = min(log_lines, 100)
buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
buf.write(log_snapshots["errors"].tail_text)
buf.write(_read_log_tail("errors", errors_lines))
buf.write("\n\n")
buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
buf.write(log_snapshots["gateway"].tail_text)
buf.write(_read_log_tail("gateway", errors_lines))
buf.write("\n")
return buf.getvalue()
@@ -521,8 +448,6 @@ def collect_debug_report(
def run_debug_share(args):
"""Collect debug report + full logs, upload each, print URLs."""
_best_effort_sweep_expired_pastes()
log_lines = getattr(args, "lines", 200)
expiry = getattr(args, "expire", 7)
local_only = getattr(args, "local", False)
@@ -534,15 +459,10 @@ def run_debug_share(args):
# Capture dump once — prepended to every paste for context.
dump_text = _capture_dump()
log_snapshots = _capture_default_log_snapshots(log_lines)
report = collect_debug_report(
log_lines=log_lines,
dump_text=dump_text,
log_snapshots=log_snapshots,
)
agent_log = log_snapshots["agent"].full_text
gateway_log = log_snapshots["gateway"].full_text
report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
agent_log = _read_full_log("agent")
gateway_log = _read_full_log("gateway")
# Prepend dump header to each full log so every paste is self-contained.
if agent_log:
+5 -11
View File
@@ -30,7 +30,6 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
from hermes_cli.colors import Colors, color
from hermes_constants import OPENROUTER_MODELS_URL
from utils import base_url_host_matches
_PROVIDER_ENV_HINTS = (
@@ -912,7 +911,6 @@ def run_doctor(args):
_apikey_providers = [
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
@@ -944,22 +942,18 @@ def run_doctor(args):
try:
import httpx
_base = os.getenv(_base_env, "") if _base_env else ""
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
# (OpenAI-compat surface, which exposes /models for health check).
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
if not _base and _key.startswith("sk-kimi-"):
_base = "https://api.kimi.com/coding/v1"
# Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
# with no /v1) don't support /models. Rewrite to the OpenAI-compat
# /v1 surface for health checks.
# Anthropic-compat endpoints (/anthropic) don't support /models.
# Rewrite to the OpenAI-compat /v1 surface for health checks.
if _base and _base.rstrip("/").endswith("/anthropic"):
from agent.auxiliary_client import _to_openai_base_url
_base = _to_openai_base_url(_base)
if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
_base = _base.rstrip("/") + "/v1"
_url = (_base.rstrip("/") + "/models") if _base else _default_url
_headers = {"Authorization": f"Bearer {_key}"}
if base_url_host_matches(_base, "api.kimi.com"):
_headers["User-Agent"] = "claude-code/0.1.0"
if "api.kimi.com" in _url.lower():
_headers["User-Agent"] = "KimiCLI/1.30.0"
_resp = httpx.get(
_url,
headers=_headers,
+1 -52
View File
@@ -3,7 +3,6 @@
from __future__ import annotations
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
@@ -15,26 +14,6 @@ from dotenv import load_dotenv
# pure ASCII (they become HTTP header values).
_CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
# Names we've already warned about during this process, so repeated
# load_hermes_dotenv() calls (user env + project env, gateway hot-reload,
# tests) don't spam the same warning multiple times.
_WARNED_KEYS: set[str] = set()
def _format_offending_chars(value: str, limit: int = 3) -> str:
"""Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
seen: list[str] = []
for ch in value:
if ord(ch) > 127:
label = f"U+{ord(ch):04X}"
if ch.isprintable():
label += f" ({ch!r})"
if label not in seen:
seen.append(label)
if len(seen) >= limit:
break
return ", ".join(seen)
def _sanitize_loaded_credentials() -> None:
"""Strip non-ASCII characters from credential env vars in os.environ.
@@ -42,42 +21,14 @@ def _sanitize_loaded_credentials() -> None:
Called after dotenv loads so the rest of the codebase never sees
non-ASCII API keys. Only touches env vars whose names end with
known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
Emits a one-line warning to stderr when characters are stripped.
Silent stripping would mask copy-paste corruption (Unicode lookalike
glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque
provider-side "invalid API key" errors (see #6843).
"""
for key, value in list(os.environ.items()):
if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
continue
try:
value.encode("ascii")
continue
except UnicodeEncodeError:
pass
cleaned = value.encode("ascii", errors="ignore").decode("ascii")
os.environ[key] = cleaned
if key in _WARNED_KEYS:
continue
_WARNED_KEYS.add(key)
stripped = len(value) - len(cleaned)
detail = _format_offending_chars(value) or "non-printable"
print(
f" Warning: {key} contained {stripped} non-ASCII character"
f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the "
f"key can be sent as an HTTP header.",
file=sys.stderr,
)
print(
" This usually means the key was copy-pasted from a PDF, "
"rich-text editor, or web page that substituted lookalike\n"
" Unicode glyphs for ASCII letters. If authentication fails "
"(e.g. \"API key not valid\"), re-copy the key from the\n"
" provider's dashboard and run `hermes setup` (or edit the "
".env file in a plain-text editor).",
file=sys.stderr,
)
os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
@@ -160,8 +111,6 @@ def load_hermes_dotenv(
# Fix corrupted .env files before python-dotenv parses them (#8908).
if user_env.exists():
_sanitize_env_file_if_needed(user_env)
if project_env_path and project_env_path.exists():
_sanitize_env_file_if_needed(project_env_path)
if user_env.exists():
_load_dotenv_with_fallback(user_env, override=True)
+153 -545
View File
@@ -175,60 +175,6 @@ def _request_gateway_self_restart(pid: int) -> bool:
return True
def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
"""Send SIGUSR1 to a gateway PID and wait for it to exit gracefully.
SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
seconds), then exits with code 75. Both systemd (``Restart=on-failure``
+ ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
= false``) relaunch the process after the graceful exit.
This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
which SIGKILL in-flight agents after a short timeout.
Args:
pid: Gateway process PID (systemd MainPID, launchd PID, or bare
process PID).
drain_timeout: Seconds to wait for the process to exit after sending
SIGUSR1. Should be slightly larger than the gateway's
``agent.restart_drain_timeout`` to allow the drain loop to
finish cleanly.
Returns:
True if the PID was signalled and exited within the timeout.
False if SIGUSR1 couldn't be sent or the process didn't exit in
time (caller should fall back to a harder restart path).
"""
if not hasattr(signal, "SIGUSR1"):
return False
if pid <= 0:
return False
try:
os.kill(pid, signal.SIGUSR1)
except ProcessLookupError:
# Already gone — nothing to drain.
return True
except (PermissionError, OSError):
return False
import time as _time
deadline = _time.monotonic() + max(drain_timeout, 1.0)
while _time.monotonic() < deadline:
try:
os.kill(pid, 0) # signal 0 — probe liveness
except ProcessLookupError:
return True
except PermissionError:
# Process still exists but we can't signal it. Treat as alive
# so the caller falls back.
pass
_time.sleep(0.5)
# Drain didn't finish in time.
return False
def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
if pid is None or pid <= 0:
return
@@ -387,147 +333,6 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
return selected_system, result.stdout.strip() == "active"
def _read_systemd_unit_properties(
system: bool = False,
properties: tuple[str, ...] = (
"ActiveState",
"SubState",
"Result",
"ExecMainStatus",
),
) -> dict[str, str]:
"""Return selected ``systemctl show`` properties for the gateway unit."""
selected_system = _select_systemd_scope(system)
try:
result = _run_systemctl(
[
"show",
get_service_name(),
"--no-pager",
"--property",
",".join(properties),
],
system=selected_system,
capture_output=True,
text=True,
timeout=10,
)
except (RuntimeError, subprocess.TimeoutExpired, OSError):
return {}
if result.returncode != 0:
return {}
parsed: dict[str, str] = {}
for line in result.stdout.splitlines():
if "=" not in line:
continue
key, value = line.split("=", 1)
parsed[key] = value.strip()
return parsed
def _wait_for_systemd_service_restart(
*,
system: bool = False,
previous_pid: int | None = None,
timeout: float = 60.0,
) -> bool:
"""Wait for the gateway service to become active after a restart handoff."""
import time
svc = get_service_name()
scope_label = _service_scope_label(system).capitalize()
deadline = time.time() + timeout
while time.time() < deadline:
props = _read_systemd_unit_properties(system=system)
active_state = props.get("ActiveState", "")
sub_state = props.get("SubState", "")
new_pid = None
try:
from gateway.status import get_running_pid
new_pid = get_running_pid()
except Exception:
new_pid = None
if active_state == "active":
if new_pid and (previous_pid is None or new_pid != previous_pid):
print(f"{scope_label} service restarted (PID {new_pid})")
return True
if previous_pid is None:
print(f"{scope_label} service restarted")
return True
if active_state == "activating" and sub_state == "auto-restart":
time.sleep(1)
continue
time.sleep(2)
print(
f"{scope_label} service did not become active within {int(timeout)}s.\n"
f" Check status: {'sudo ' if system else ''}hermes gateway status\n"
f" Check logs: journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
)
return False
def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
"""Recover a planned service restart that is stuck in systemd state."""
props = _read_systemd_unit_properties(system=system)
if not props:
return False
try:
from gateway.status import read_runtime_status
except Exception:
return False
runtime_state = read_runtime_status() or {}
if not runtime_state.get("restart_requested"):
return False
active_state = props.get("ActiveState", "")
sub_state = props.get("SubState", "")
exec_main_status = props.get("ExecMainStatus", "")
result = props.get("Result", "")
if active_state == "activating" and sub_state == "auto-restart":
print("⏳ Service restart already pending — waiting for systemd relaunch...")
return _wait_for_systemd_service_restart(
system=system,
previous_pid=previous_pid,
)
if active_state == "failed" and (
exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
or result == "exit-code"
):
svc = get_service_name()
scope_label = _service_scope_label(system).capitalize()
print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
_run_systemctl(
["reset-failed", svc],
system=system,
check=False,
timeout=30,
)
_run_systemctl(
["start", svc],
system=system,
check=False,
timeout=90,
)
return _wait_for_systemd_service_restart(
system=system,
previous_pid=previous_pid,
)
return False
def _probe_launchd_service_running() -> bool:
if not get_launchd_plist_path().exists():
return False
@@ -665,8 +470,7 @@ def stop_profile_gateway() -> bool:
except (ProcessLookupError, PermissionError):
break
if get_running_pid() is None:
remove_pid_file()
remove_pid_file()
return True
@@ -815,21 +619,6 @@ def get_systemd_unit_path(system: bool = False) -> Path:
return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"
class UserSystemdUnavailableError(RuntimeError):
"""Raised when ``systemctl --user`` cannot reach the user D-Bus session.
Typically hit on fresh RHEL/Debian SSH sessions where linger is disabled
and no user@.service is running, so ``/run/user/$UID/bus`` never exists.
Carries a user-facing remediation message in ``args[0]``.
"""
def _user_dbus_socket_path() -> Path:
"""Return the expected per-user D-Bus socket path (regardless of existence)."""
xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
return Path(xdg) / "bus"
def _ensure_user_systemd_env() -> None:
"""Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.
@@ -852,126 +641,6 @@ def _ensure_user_systemd_env() -> None:
os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}"
def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
"""Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
Linger-enabled user@.service can take a second or two to spawn the socket
after ``loginctl enable-linger`` runs. Returns True once the socket exists.
"""
import time
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
if _user_dbus_socket_path().exists():
_ensure_user_systemd_env()
return True
time.sleep(0.2)
return _user_dbus_socket_path().exists()
def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
"""Ensure ``systemctl --user`` will reach the user D-Bus session bus.
No-op when the bus socket is already there (the common case on desktops
and linger-enabled servers). On fresh SSH sessions where the socket is
missing:
* If linger is already enabled, wait briefly for user@.service to spawn
the socket.
* If linger is disabled and ``auto_enable_linger`` is True, try
``loginctl enable-linger $USER`` (works as non-root when polkit permits
it, otherwise needs sudo).
* If the socket is still missing afterwards, raise
:class:`UserSystemdUnavailableError` with a precise remediation message.
Callers should treat the exception as a terminal condition for user-scope
systemd operations and surface the message to the user.
"""
_ensure_user_systemd_env()
bus_path = _user_dbus_socket_path()
if bus_path.exists():
return
import getpass
username = getpass.getuser()
linger_enabled, linger_detail = get_systemd_linger_status()
if linger_enabled is True:
if _wait_for_user_dbus_socket(timeout=3.0):
return
# Linger is on but socket still missing — unusual; fall through to error.
_raise_user_systemd_unavailable(
username,
reason="User D-Bus socket is missing even though linger is enabled.",
fix_hint=(
f" systemctl start user@{os.getuid()}.service\n"
" (may require sudo; try again after the command succeeds)"
),
)
if auto_enable_linger and shutil.which("loginctl"):
try:
result = subprocess.run(
["loginctl", "enable-linger", username],
capture_output=True,
text=True,
check=False,
timeout=30,
)
except Exception as exc:
_raise_user_systemd_unavailable(
username,
reason=f"loginctl enable-linger failed ({exc}).",
fix_hint=f" sudo loginctl enable-linger {username}",
)
else:
if result.returncode == 0:
if _wait_for_user_dbus_socket(timeout=5.0):
print(f"✓ Enabled linger for {username} — user D-Bus now available")
return
# enable-linger succeeded but the socket never appeared.
_raise_user_systemd_unavailable(
username,
reason="Linger was enabled, but the user D-Bus socket did not appear.",
fix_hint=(
" Log out and log back in, then re-run the command.\n"
f" Or reboot and run: systemctl --user start {get_service_name()}"
),
)
detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
_raise_user_systemd_unavailable(
username,
reason=f"loginctl enable-linger was denied: {detail}",
fix_hint=f" sudo loginctl enable-linger {username}",
)
_raise_user_systemd_unavailable(
username,
reason=(
"User D-Bus session is not available "
f"({linger_detail or 'linger disabled'})."
),
fix_hint=f" sudo loginctl enable-linger {username}",
)
def _raise_user_systemd_unavailable(username: str, *, reason: str, fix_hint: str) -> None:
"""Build a user-facing error message and raise UserSystemdUnavailableError."""
msg = (
f"{reason}\n"
" systemctl --user cannot reach the user D-Bus session in this shell.\n"
"\n"
" To fix:\n"
f"{fix_hint}\n"
"\n"
" Alternative: run the gateway in the foreground (stays up until\n"
" you exit / close the terminal):\n"
" hermes gateway run"
)
raise UserSystemdUnavailableError(msg)
def _systemctl_cmd(system: bool = False) -> list[str]:
if not system:
_ensure_user_systemd_env()
@@ -1325,6 +994,8 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
if not is_linux():
return None, "not supported on this platform"
import shutil
if not shutil.which("loginctl"):
return None, "loginctl not found"
@@ -1523,14 +1194,7 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
path_entries.append(resolved_node_dir)
common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
# systemd's TimeoutStopSec must exceed the gateway's drain_timeout so
# there's budget left for post-interrupt cleanup (tool subprocess kill,
# adapter disconnect, session DB close) before systemd escalates to
# SIGKILL on the cgroup — otherwise bash/sleep tool-call children left
# by a force-interrupted agent get reaped by systemd instead of us
# (#8202). 30s of headroom covers the worst case we've observed.
_drain_timeout = int(_get_restart_drain_timeout() or 0)
restart_timeout = max(60, _drain_timeout) + 30
restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))
if system:
username, group_name, home_dir = _system_service_identity(run_as_user)
@@ -1683,6 +1347,7 @@ def _ensure_linger_enabled() -> None:
return
import getpass
import shutil
username = getpass.getuser()
linger_file = Path(f"/var/lib/systemd/linger/{username}")
@@ -1819,11 +1484,6 @@ def systemd_start(system: bool = False):
system = _select_systemd_scope(system)
if system:
_require_root_for_system_service("start")
else:
# Fail fast with actionable guidance if the user D-Bus session is not
# reachable (common on fresh RHEL/Debian SSH sessions without linger).
# Raises UserSystemdUnavailableError with a remediation message.
_preflight_user_systemd()
refresh_systemd_unit_if_needed(system=system)
_run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
print(f"{_service_scope_label(system).capitalize()} service started")
@@ -1843,16 +1503,19 @@ def systemd_restart(system: bool = False):
system = _select_systemd_scope(system)
if system:
_require_root_for_system_service("restart")
else:
_preflight_user_systemd()
refresh_systemd_unit_if_needed(system=system)
from gateway.status import get_running_pid
pid = get_running_pid()
if pid is not None and _request_gateway_self_restart(pid):
# SIGUSR1 sent — the gateway will drain active agents, exit with
# code 75, and systemd will restart it after RestartSec (30s).
# Wait for the old process to die and the new one to become active
# so the CLI doesn't return while the service is still restarting.
import time
scope_label = _service_scope_label(system).capitalize()
svc = get_service_name()
scope_cmd = _systemctl_cmd(system)
# Phase 1: wait for old process to exit (drain + shutdown)
print(f"{scope_label} service draining active work...")
@@ -1866,41 +1529,48 @@ def systemd_restart(system: bool = False):
else:
print(f"⚠ Old process (PID {pid}) still alive after 90s")
# The gateway exits with code 75 for a planned service restart.
# systemd can sit in the RestartSec window or even wedge itself into a
# failed/rate-limited state if the operator asks for another restart in
# the middle of that handoff. Clear any stale failed state and kick the
# unit immediately so `hermes gateway restart` behaves idempotently.
_run_systemctl(
["reset-failed", svc],
system=system,
check=False,
timeout=30,
)
_run_systemctl(
["start", svc],
system=system,
check=False,
timeout=90,
)
_wait_for_systemd_service_restart(system=system, previous_pid=pid)
return
# Phase 2: wait for systemd to start the new process
print(f"⏳ Waiting for {svc} to restart...")
deadline = time.time() + 60
while time.time() < deadline:
try:
result = subprocess.run(
scope_cmd + ["is-active", svc],
capture_output=True, text=True, timeout=5,
)
if result.stdout.strip() == "active":
# Verify it's a NEW process, not the old one somehow
new_pid = get_running_pid()
if new_pid and new_pid != pid:
print(f"{scope_label} service restarted (PID {new_pid})")
return
except (subprocess.TimeoutExpired, FileNotFoundError):
pass
time.sleep(2)
if _recover_pending_systemd_restart(system=system, previous_pid=pid):
# Timed out — check final state
try:
result = subprocess.run(
scope_cmd + ["is-active", svc],
capture_output=True, text=True, timeout=5,
)
if result.stdout.strip() == "active":
print(f"{scope_label} service restarted")
return
except Exception:
pass
print(
f"{scope_label} service did not become active within 60s.\n"
f" Check status: {'sudo ' if system else ''}hermes gateway status\n"
f" Check logs: journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
)
return
_run_systemctl(
["reset-failed", get_service_name()],
system=system,
check=False,
timeout=30,
)
_run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
print(f"{_service_scope_label(system).capitalize()} service restarted")
def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
def systemd_status(deep: bool = False, system: bool = False):
system = _select_systemd_scope(system)
unit_path = get_systemd_unit_path(system=system)
scope_flag = " --system" if system else ""
@@ -1923,12 +1593,8 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
print(f" Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag} # auto-refreshes the unit")
print()
status_cmd = ["status", get_service_name(), "--no-pager"]
if full:
status_cmd.append("-l")
_run_systemctl(
status_cmd,
["status", get_service_name(), "--no-pager"],
system=system,
capture_output=False,
timeout=10,
@@ -1961,19 +1627,6 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
for line in runtime_lines:
print(f" {line}")
unit_props = _read_systemd_unit_properties(system=system)
active_state = unit_props.get("ActiveState", "")
sub_state = unit_props.get("SubState", "")
exec_main_status = unit_props.get("ExecMainStatus", "")
result_code = unit_props.get("Result", "")
if active_state == "activating" and sub_state == "auto-restart":
print(" ⏳ Restart pending: systemd is waiting to relaunch the gateway")
elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
print(" ⚠ Planned restart is stuck in systemd failed state (exit 75)")
print(f" Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
elif active_state == "failed" and result_code:
print(f" ⚠ Systemd unit result: {result_code}")
if system:
print("✓ System service starts at boot without requiring systemd linger")
elif deep:
@@ -1989,10 +1642,7 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
if deep:
print()
print("Recent logs:")
log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
if full:
log_cmd.append("-l")
subprocess.run(log_cmd, timeout=10)
subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)
# =============================================================================
@@ -2006,6 +1656,7 @@ def get_launchd_label() -> str:
def _launchd_domain() -> str:
import os
return f"gui/{os.getuid()}"
@@ -2992,120 +2643,9 @@ def _setup_dingtalk():
def _setup_wecom():
"""Interactive setup for WeCom — scan QR code or manual credential input."""
print()
print(color(" ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
existing_bot_id = get_env_value("WECOM_BOT_ID")
existing_secret = get_env_value("WECOM_SECRET")
if existing_bot_id and existing_secret:
print()
print_success("WeCom is already configured.")
if not prompt_yes_no(" Reconfigure WeCom?", False):
return
# ── Choose setup method ──
print()
method_choices = [
"Scan QR code to obtain Bot ID and Secret automatically (recommended)",
"Enter existing Bot ID and Secret manually",
]
method_idx = prompt_choice(" How would you like to set up WeCom?", method_choices, 0)
bot_id = None
secret = None
if method_idx == 0:
# ── QR scan flow ──
try:
from gateway.platforms.wecom import qr_scan_for_bot_info
except Exception as exc:
print_error(f" WeCom QR scan import failed: {exc}")
qr_scan_for_bot_info = None
if qr_scan_for_bot_info is not None:
try:
credentials = qr_scan_for_bot_info()
except KeyboardInterrupt:
print()
print_warning(" WeCom setup cancelled.")
return
except Exception as exc:
print_warning(f" QR scan failed: {exc}")
credentials = None
if credentials:
bot_id = credentials.get("bot_id", "")
secret = credentials.get("secret", "")
print_success(" ✔ QR scan successful! Bot ID and Secret obtained.")
if not bot_id or not secret:
print_info(" QR scan did not complete. Continuing with manual input.")
bot_id = None
secret = None
# ── Manual credential input ──
if not bot_id or not secret:
print()
print_info(" 1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
print_info(" 2. Select API Mode")
print_info(" 3. Copy the Bot ID and Secret from the bot's credentials info")
print_info(" 4. The bot connects via WebSocket — no public endpoint needed")
print()
bot_id = prompt(" Bot ID", password=False)
if not bot_id:
print_warning(" Skipped — WeCom won't work without a Bot ID.")
return
secret = prompt(" Secret", password=True)
if not secret:
print_warning(" Skipped — WeCom won't work without a Secret.")
return
# ── Save core credentials ──
save_env_value("WECOM_BOT_ID", bot_id)
save_env_value("WECOM_SECRET", secret)
# ── Allowed users (deny-by-default security) ──
print()
print_info(" The gateway DENIES all users by default for security.")
print_info(" Enter user IDs to create an allowlist, or leave empty.")
allowed = prompt(" Allowed user IDs (comma-separated, or empty)", password=False)
if allowed:
cleaned = allowed.replace(" ", "")
save_env_value("WECOM_ALLOWED_USERS", cleaned)
print_success(" Saved — only these users can interact with the bot.")
else:
print()
access_choices = [
"Enable open access (anyone can message the bot)",
"Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
"Disable direct messages",
"Skip for now (bot will deny all users until configured)",
]
access_idx = prompt_choice(" How should unauthorized users be handled?", access_choices, 1)
if access_idx == 0:
save_env_value("WECOM_DM_POLICY", "open")
save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
print_warning(" Open access enabled — anyone can use your bot!")
elif access_idx == 1:
save_env_value("WECOM_DM_POLICY", "pairing")
print_success(" DM pairing mode — users will receive a code to request access.")
print_info(" Approve with: hermes pairing approve <platform> <code>")
elif access_idx == 2:
save_env_value("WECOM_DM_POLICY", "disabled")
print_warning(" Direct messages disabled.")
else:
print_info(" Skipped — configure later with 'hermes gateway setup'")
# ── Home channel (optional) ──
print()
print_info(" Chat ID for scheduled results and notifications.")
home = prompt(" Home chat ID (optional, for cron/notifications)", password=False)
if home:
save_env_value("WECOM_HOME_CHANNEL", home)
print_success(f" Home channel set to {home}")
print()
print_success("💬 WeCom configured!")
"""Configure WeCom (Enterprise WeChat) via the standard platform setup."""
wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
_setup_standard_platform(wecom_platform)
def _is_service_installed() -> bool:
@@ -3485,8 +3025,7 @@ def _setup_qqbot():
if method_idx == 0:
# ── QR scan-to-configure ──
try:
from gateway.platforms.qqbot import qr_register
credentials = qr_register()
credentials = _qqbot_qr_flow()
except KeyboardInterrupt:
print()
print_warning(" QQ Bot setup cancelled.")
@@ -3568,6 +3107,106 @@ def _setup_qqbot():
print_info(f" App ID: {credentials['app_id']}")
def _qqbot_render_qr(url: str) -> bool:
"""Try to render a QR code in the terminal. Returns True if successful."""
try:
import qrcode as _qr
qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
qr.add_data(url)
qr.make(fit=True)
qr.print_ascii(invert=True)
return True
except Exception:
return False
def _qqbot_qr_flow():
"""Run the QR-code scan-to-configure flow.
Returns a dict with app_id, client_secret, user_openid on success,
or None on failure/cancel.
"""
try:
from gateway.platforms.qqbot import (
create_bind_task, poll_bind_result, build_connect_url,
decrypt_secret, BindStatus,
)
from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
except Exception as exc:
print_error(f" QQBot onboard import failed: {exc}")
return None
import asyncio
import time
MAX_REFRESHES = 3
refresh_count = 0
while refresh_count <= MAX_REFRESHES:
loop = asyncio.new_event_loop()
# ── Create bind task ──
try:
task_id, aes_key = loop.run_until_complete(create_bind_task())
except Exception as e:
print_warning(f" Failed to create bind task: {e}")
loop.close()
return None
url = build_connect_url(task_id)
# ── Display QR code + URL ──
print()
if _qqbot_render_qr(url):
print(f" Scan the QR code above, or open this URL directly:\n {url}")
else:
print(f" Open this URL in QQ on your phone:\n {url}")
print_info(" Tip: pip install qrcode to show a scannable QR code here")
# ── Poll loop (silent — keep QR visible at bottom) ──
try:
while True:
try:
status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
poll_bind_result(task_id)
)
except Exception:
time.sleep(ONBOARD_POLL_INTERVAL)
continue
if status == BindStatus.COMPLETED:
client_secret = decrypt_secret(encrypted_secret, aes_key)
print()
print_success(f" QR scan complete! (App ID: {app_id})")
if user_openid:
print_info(f" Scanner's OpenID: {user_openid}")
return {
"app_id": app_id,
"client_secret": client_secret,
"user_openid": user_openid,
}
if status == BindStatus.EXPIRED:
refresh_count += 1
if refresh_count > MAX_REFRESHES:
print()
print_warning(f" QR code expired {MAX_REFRESHES} times — giving up.")
return None
print()
print_warning(f" QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
loop.close()
break # outer while creates a new task
time.sleep(ONBOARD_POLL_INTERVAL)
except KeyboardInterrupt:
loop.close()
raise
finally:
loop.close()
return None
def _setup_signal():
"""Interactive setup for Signal messenger."""
import shutil
@@ -3719,10 +3358,6 @@ def gateway_setup():
systemd_start()
elif is_macos():
launchd_start()
except UserSystemdUnavailableError as e:
print_error(" Failed to start — user systemd not reachable:")
for line in str(e).splitlines():
print(f" {line}")
except subprocess.CalledProcessError as e:
print_error(f" Failed to start: {e}")
else:
@@ -3759,8 +3394,6 @@ def gateway_setup():
_setup_feishu()
elif platform["key"] == "qqbot":
_setup_qqbot()
elif platform["key"] == "wecom":
_setup_wecom()
else:
_setup_standard_platform(platform)
@@ -3787,10 +3420,6 @@ def gateway_setup():
else:
stop_profile_gateway()
print_info("Start manually: hermes gateway")
except UserSystemdUnavailableError as e:
print_error(" Restart failed — user systemd not reachable:")
for line in str(e).splitlines():
print(f" {line}")
except subprocess.CalledProcessError as e:
print_error(f" Restart failed: {e}")
elif service_installed:
@@ -3800,10 +3429,6 @@ def gateway_setup():
systemd_start()
elif is_macos():
launchd_start()
except UserSystemdUnavailableError as e:
print_error(" Start failed — user systemd not reachable:")
for line in str(e).splitlines():
print(f" {line}")
except subprocess.CalledProcessError as e:
print_error(f" Start failed: {e}")
else:
@@ -3827,10 +3452,6 @@ def gateway_setup():
systemd_start(system=installed_scope == "system")
else:
launchd_start()
except UserSystemdUnavailableError as e:
print_error(" Start failed — user systemd not reachable:")
for line in str(e).splitlines():
print(f" {line}")
except subprocess.CalledProcessError as e:
print_error(f" Start failed: {e}")
except subprocess.CalledProcessError as e:
@@ -3868,18 +3489,6 @@ def gateway_setup():
def gateway_command(args):
"""Handle gateway subcommands."""
try:
return _gateway_command_inner(args)
except UserSystemdUnavailableError as e:
# Clean, actionable message instead of a traceback when the user D-Bus
# session is unreachable (fresh SSH shell, no linger, container, etc.).
print_error("User systemd not reachable:")
for line in str(e).splitlines():
print(f" {line}")
sys.exit(1)
def _gateway_command_inner(args):
subcmd = getattr(args, 'gateway_command', None)
# Default to run if no subcommand
@@ -4143,13 +3752,12 @@ def _gateway_command_inner(args):
elif subcmd == "status":
deep = getattr(args, 'deep', False)
full = getattr(args, 'full', False)
system = getattr(args, 'system', False)
snapshot = get_gateway_runtime_snapshot(system=system)
# Check for service first
if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
systemd_status(deep, system=system, full=full)
systemd_status(deep, system=system)
_print_gateway_process_mismatch(snapshot)
elif is_macos() and get_launchd_plist_path().exists():
launchd_status(deep)
-385
View File
@@ -1,385 +0,0 @@
"""hermes hooks — inspect and manage shell-script hooks.
Usage::
hermes hooks list
hermes hooks test <event> [--for-tool X] [--payload-file F]
hermes hooks revoke <command>
hermes hooks doctor
Consent records live under ``~/.hermes/shell-hooks-allowlist.json`` and
hook definitions come from the ``hooks:`` block in ``~/.hermes/config.yaml``
(the same config read by the CLI / gateway at startup).
This module is a thin CLI shell over :mod:`agent.shell_hooks`; every
shared concern (payload serialisation, response parsing, allowlist
format) lives there.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Optional
def hooks_command(args) -> None:
"""Entry point for ``hermes hooks`` — dispatches to the requested action."""
sub = getattr(args, "hooks_action", None)
if not sub:
print("Usage: hermes hooks {list|test|revoke|doctor}")
print("Run 'hermes hooks --help' for details.")
return
if sub in ("list", "ls"):
_cmd_list(args)
elif sub == "test":
_cmd_test(args)
elif sub in ("revoke", "remove", "rm"):
_cmd_revoke(args)
elif sub == "doctor":
_cmd_doctor(args)
else:
print(f"Unknown hooks subcommand: {sub}")
# ---------------------------------------------------------------------------
# list
# ---------------------------------------------------------------------------
def _cmd_list(_args) -> None:
from hermes_cli.config import load_config
from agent import shell_hooks
specs = shell_hooks.iter_configured_hooks(load_config())
if not specs:
print("No shell hooks configured in ~/.hermes/config.yaml.")
print("See `hermes hooks --help` or")
print(" website/docs/user-guide/features/hooks.md")
print("for the config schema and worked examples.")
return
by_event: Dict[str, List] = {}
for spec in specs:
by_event.setdefault(spec.event, []).append(spec)
allowlist = shell_hooks.load_allowlist()
approved = {
(e.get("event"), e.get("command"))
for e in allowlist.get("approvals", [])
if isinstance(e, dict)
}
print(f"Configured shell hooks ({len(specs)} total):\n")
for event in sorted(by_event.keys()):
print(f" [{event}]")
for spec in by_event[event]:
is_approved = (spec.event, spec.command) in approved
status = "✓ allowed" if is_approved else "✗ not allowlisted"
matcher_part = f" matcher={spec.matcher!r}" if spec.matcher else ""
print(
f" - {spec.command}{matcher_part} "
f"(timeout={spec.timeout}s, {status})"
)
if is_approved:
entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
if entry and entry.get("approved_at"):
print(f" approved_at: {entry['approved_at']}")
mtime_now = shell_hooks.script_mtime_iso(spec.command)
mtime_at = entry.get("script_mtime_at_approval")
if mtime_now and mtime_at and mtime_now > mtime_at:
print(
f" ⚠ script modified since approval "
f"(was {mtime_at}, now {mtime_now}) — "
f"run `hermes hooks doctor` to re-validate"
)
print()
# ---------------------------------------------------------------------------
# test
# ---------------------------------------------------------------------------
# Synthetic kwargs matching the real invoke_hook() call sites — these are
# passed verbatim to agent.shell_hooks.run_once(), which routes them through
# the same _serialize_payload() that production firings use. That way the
# stdin a script sees under `hermes hooks test` and `hermes hooks doctor`
# is identical in shape to what it will see at runtime.
_DEFAULT_PAYLOADS = {
"pre_tool_call": {
"tool_name": "terminal",
"args": {"command": "echo hello"},
"session_id": "test-session",
"task_id": "test-task",
"tool_call_id": "test-call",
},
"post_tool_call": {
"tool_name": "terminal",
"args": {"command": "echo hello"},
"session_id": "test-session",
"task_id": "test-task",
"tool_call_id": "test-call",
"result": '{"output": "hello"}',
},
"pre_llm_call": {
"session_id": "test-session",
"user_message": "What is the weather?",
"conversation_history": [],
"is_first_turn": True,
"model": "gpt-4",
"platform": "cli",
},
"post_llm_call": {
"session_id": "test-session",
"model": "gpt-4",
"platform": "cli",
},
"on_session_start": {"session_id": "test-session"},
"on_session_end": {"session_id": "test-session"},
"on_session_finalize": {"session_id": "test-session"},
"on_session_reset": {"session_id": "test-session"},
"pre_api_request": {
"session_id": "test-session",
"task_id": "test-task",
"platform": "cli",
"model": "claude-sonnet-4-6",
"provider": "anthropic",
"base_url": "https://api.anthropic.com",
"api_mode": "anthropic_messages",
"api_call_count": 1,
"message_count": 4,
"tool_count": 12,
"approx_input_tokens": 2048,
"request_char_count": 8192,
"max_tokens": 4096,
},
"post_api_request": {
"session_id": "test-session",
"task_id": "test-task",
"platform": "cli",
"model": "claude-sonnet-4-6",
"provider": "anthropic",
"base_url": "https://api.anthropic.com",
"api_mode": "anthropic_messages",
"api_call_count": 1,
"api_duration": 1.234,
"finish_reason": "stop",
"message_count": 4,
"response_model": "claude-sonnet-4-6",
"usage": {"input_tokens": 2048, "output_tokens": 512},
"assistant_content_chars": 1200,
"assistant_tool_call_count": 0,
},
"subagent_stop": {
"parent_session_id": "parent-sess",
"child_role": None,
"child_summary": "Synthetic summary for hooks test",
"child_status": "completed",
"duration_ms": 1234,
},
}
def _cmd_test(args) -> None:
from hermes_cli.config import load_config
from hermes_cli.plugins import VALID_HOOKS
from agent import shell_hooks
event = args.event
if event not in VALID_HOOKS:
print(f"Unknown event: {event!r}")
print(f"Valid events: {', '.join(sorted(VALID_HOOKS))}")
return
# Synthetic kwargs in the same shape invoke_hook() would pass. Merged
# with --for-tool (overrides tool_name) and --payload-file (extra kwargs).
payload = dict(_DEFAULT_PAYLOADS.get(event, {"session_id": "test-session"}))
if getattr(args, "for_tool", None):
payload["tool_name"] = args.for_tool
if getattr(args, "payload_file", None):
try:
custom = json.loads(Path(args.payload_file).read_text())
if isinstance(custom, dict):
payload.update(custom)
else:
print(f"Warning: {args.payload_file} is not a JSON object; ignoring")
except Exception as exc:
print(f"Error reading payload file: {exc}")
return
specs = shell_hooks.iter_configured_hooks(load_config())
specs = [s for s in specs if s.event == event]
if getattr(args, "for_tool", None):
specs = [
s for s in specs
if s.event not in ("pre_tool_call", "post_tool_call")
or s.matches_tool(args.for_tool)
]
if not specs:
print(f"No shell hooks configured for event: {event}")
if getattr(args, "for_tool", None):
print(f"(with matcher filter --for-tool={args.for_tool})")
return
print(f"Firing {len(specs)} hook(s) for event '{event}':\n")
for spec in specs:
print(f"{spec.command}")
result = shell_hooks.run_once(spec, payload)
_print_run_result(result)
print()
def _print_run_result(result: Dict[str, Any]) -> None:
if result.get("error"):
print(f" ✗ error: {result['error']}")
return
if result.get("timed_out"):
print(f" ✗ timed out after {result['elapsed_seconds']}s")
return
rc = result.get("returncode")
elapsed = result.get("elapsed_seconds", 0)
print(f" exit={rc} elapsed={elapsed}s")
stdout = (result.get("stdout") or "").strip()
stderr = (result.get("stderr") or "").strip()
if stdout:
print(f" stdout: {_truncate(stdout, 400)}")
if stderr:
print(f" stderr: {_truncate(stderr, 400)}")
parsed = result.get("parsed")
if parsed:
print(f" parsed (Hermes wire shape): {json.dumps(parsed)}")
else:
print(" parsed: <none — hook contributed nothing to the dispatcher>")
def _truncate(s: str, n: int) -> str:
return s if len(s) <= n else s[: n - 3] + "..."
# ---------------------------------------------------------------------------
# revoke
# ---------------------------------------------------------------------------
def _cmd_revoke(args) -> None:
from agent import shell_hooks
removed = shell_hooks.revoke(args.command)
if removed == 0:
print(f"No allowlist entry found for command: {args.command}")
return
print(f"Removed {removed} allowlist entry/entries for: {args.command}")
print(
"Note: currently running CLI / gateway processes keep their "
"already-registered callbacks until they restart."
)
# ---------------------------------------------------------------------------
# doctor
# ---------------------------------------------------------------------------
def _cmd_doctor(_args) -> None:
from hermes_cli.config import load_config
from agent import shell_hooks
specs = shell_hooks.iter_configured_hooks(load_config())
if not specs:
print("No shell hooks configured — nothing to check.")
return
print(f"Checking {len(specs)} configured shell hook(s)...\n")
problems = 0
for spec in specs:
print(f" [{spec.event}] {spec.command}")
problems += _doctor_one(spec, shell_hooks)
print()
if problems:
print(f"{problems} issue(s) found. Fix before relying on these hooks.")
else:
print("All shell hooks look healthy.")
def _doctor_one(spec, shell_hooks) -> int:
problems = 0
# 1. Script exists and is executable
if shell_hooks.script_is_executable(spec.command):
print(" ✓ script exists and is executable")
else:
problems += 1
print(" ✗ script missing or not executable "
"(chmod +x the file, or fix the path)")
# 2. Allowlist status
entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
if entry:
print(f" ✓ allowlisted (approved {entry.get('approved_at', '?')})")
else:
problems += 1
print(" ✗ not allowlisted — hook will NOT fire at runtime "
"(run with --accept-hooks once, or confirm at the TTY prompt)")
# 3. Mtime drift
if entry and entry.get("script_mtime_at_approval"):
mtime_now = shell_hooks.script_mtime_iso(spec.command)
mtime_at = entry["script_mtime_at_approval"]
if mtime_now and mtime_at and mtime_now > mtime_at:
problems += 1
print(f" ⚠ script modified since approval "
f"(was {mtime_at}, now {mtime_now}) — review changes, "
f"then `hermes hooks revoke` + re-approve to refresh")
elif mtime_now and mtime_at and mtime_now == mtime_at:
print(" ✓ script unchanged since approval")
# 4. Produces valid JSON for a synthetic payload — only when the entry
# is already allowlisted. Otherwise `hermes hooks doctor` would execute
# every script listed in a freshly-pulled config before the user has
# reviewed them, which directly contradicts the documented workflow
# ("spot newly-added hooks *before they register*").
if not entry:
print(" skipped JSON smoke test — not allowlisted yet. "
"Approve the hook first (via TTY prompt or --accept-hooks), "
"then re-run `hermes hooks doctor`.")
elif shell_hooks.script_is_executable(spec.command):
payload = _DEFAULT_PAYLOADS.get(spec.event, {"extra": {}})
result = shell_hooks.run_once(spec, payload)
if result.get("timed_out"):
problems += 1
print(f" ✗ timed out after {result['elapsed_seconds']}s "
f"on synthetic payload (timeout={spec.timeout}s)")
elif result.get("error"):
problems += 1
print(f" ✗ execution error: {result['error']}")
else:
rc = result.get("returncode")
elapsed = result.get("elapsed_seconds", 0)
stdout = (result.get("stdout") or "").strip()
if stdout:
try:
json.loads(stdout)
print(f" ✓ produced valid JSON on synthetic payload "
f"(exit={rc}, {elapsed}s)")
except json.JSONDecodeError:
problems += 1
print(f" ✗ stdout was not valid JSON (exit={rc}, "
f"{elapsed}s): {_truncate(stdout, 120)}")
else:
print(f" ✓ ran clean with empty stdout "
f"(exit={rc}, {elapsed}s) — hook is observer-only")
return problems
+94 -586
View File
@@ -51,19 +51,6 @@ import sys
from pathlib import Path
from typing import Optional
def _add_accept_hooks_flag(parser) -> None:
"""Attach the ``--accept-hooks`` flag. Shared across every agent
subparser so the flag works regardless of CLI position."""
parser.add_argument(
"--accept-hooks",
action="store_true",
default=argparse.SUPPRESS,
help=(
"Auto-approve unseen shell hooks without a TTY prompt "
"(equivalent to HERMES_ACCEPT_HOOKS=1 / hooks_auto_accept: true)."
),
)
def _require_tty(command_name: str) -> None:
"""Exit with a clear error if stdin is not a terminal.
@@ -193,7 +180,7 @@ import time as _time
from datetime import datetime
from hermes_cli import __version__, __release_date__
from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
@@ -618,6 +605,7 @@ def _exec_in_container(container_info: dict, cli_args: list):
container_info: dict with backend, container_name, exec_user, hermes_bin
cli_args: the original CLI arguments (everything after 'hermes')
"""
import shutil
backend = container_info["backend"]
container_name = container_info["container_name"]
@@ -1015,17 +1003,6 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
)
env.setdefault("HERMES_PYTHON", sys.executable)
env.setdefault("HERMES_CWD", os.getcwd())
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
# ~1.54GB depending on version and can fatal-OOM on long sessions with
# large transcripts / reasoning blobs. Token-level merge: respect any
# user-supplied --max-old-space-size (they may have set it higher) and
# avoid duplicating --expose-gc.
_tokens = env.get("NODE_OPTIONS", "").split()
if not any(t.startswith("--max-old-space-size=") for t in _tokens):
_tokens.append("--max-old-space-size=8192")
if "--expose-gc" not in _tokens:
_tokens.append("--expose-gc")
env["NODE_OPTIONS"] = " ".join(_tokens)
if resume_session_id:
env["HERMES_TUI_RESUME"] = resume_session_id
@@ -1131,20 +1108,6 @@ def cmd_chat(args):
if getattr(args, "yolo", False):
os.environ["HERMES_YOLO_MODE"] = "1"
# --ignore-user-config: make load_cli_config() / load_config() skip the
# user's ~/.hermes/config.yaml and return built-in defaults. Set BEFORE
# importing cli (which runs `CLI_CONFIG = load_cli_config()` at module
# import time). Credentials in .env are still loaded — this flag only
# ignores behavioral/config settings.
if getattr(args, "ignore_user_config", False):
os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
# --ignore-rules: skip auto-injection of AGENTS.md/SOUL.md/.cursorrules
# (rules), memory entries, and any preloaded skills coming from user config.
# Maps to AIAgent(skip_context_files=True, skip_memory=True).
if getattr(args, "ignore_rules", False):
os.environ["HERMES_IGNORE_RULES"] = "1"
# --source: tag session source for filtering (e.g. 'tool' for third-party integrations)
if getattr(args, "source", None):
os.environ["HERMES_SESSION_SOURCE"] = args.source
@@ -1173,8 +1136,6 @@ def cmd_chat(args):
"checkpoints": getattr(args, "checkpoints", False),
"pass_session_id": getattr(args, "pass_session_id", False),
"max_turns": getattr(args, "max_turns", None),
"ignore_rules": getattr(args, "ignore_rules", False),
"ignore_user_config": getattr(args, "ignore_user_config", False),
}
# Filter out None values
kwargs = {k: v for k, v in kwargs.items() if v is not None}
@@ -1196,6 +1157,8 @@ def cmd_gateway(args):
def cmd_whatsapp(args):
"""Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
_require_tty("whatsapp")
import subprocess
from pathlib import Path
from hermes_cli.config import get_env_value, save_env_value
print()
@@ -1304,27 +1267,16 @@ def cmd_whatsapp(args):
return
if not (bridge_dir / "node_modules").exists():
print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
npm = shutil.which("npm")
if not npm:
print(" ✗ npm not found on PATH — install Node.js first")
return
try:
result = subprocess.run(
[npm, "install", "--no-fund", "--no-audit", "--progress=false"],
cwd=str(bridge_dir),
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True,
)
except KeyboardInterrupt:
print("\n ✗ Install cancelled")
return
print("\n→ Installing WhatsApp bridge dependencies...")
result = subprocess.run(
["npm", "install"],
cwd=str(bridge_dir),
capture_output=True,
text=True,
timeout=120,
)
if result.returncode != 0:
err = (result.stderr or "").strip()
preview = "\n".join(err.splitlines()[-30:]) if err else "(no output)"
print(" ✗ npm install failed:")
print(preview)
print(f" ✗ npm install failed: {result.stderr}")
return
print(" ✓ Dependencies installed")
else:
@@ -1343,6 +1295,8 @@ def cmd_whatsapp(args):
except (EOFError, KeyboardInterrupt):
response = "n"
if response.lower() in ("y", "yes"):
import shutil
shutil.rmtree(session_dir, ignore_errors=True)
session_dir.mkdir(parents=True, exist_ok=True)
print(" ✓ Session cleared")
@@ -1438,6 +1392,8 @@ def select_provider_and_model(args=None):
# Read effective provider the same way the CLI does at startup:
# config.yaml model.provider > env var > auto-detect
import os
config_provider = None
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
@@ -1548,8 +1504,6 @@ def select_provider_and_model(args=None):
# Step 2: Provider-specific setup + model selection
if selected_provider == "openrouter":
_model_flow_openrouter(config, current_model)
elif selected_provider == "ai-gateway":
_model_flow_ai_gateway(config, current_model)
elif selected_provider == "nous":
_model_flow_nous(config, current_model, args=args)
elif selected_provider == "openai-codex":
@@ -1582,8 +1536,6 @@ def select_provider_and_model(args=None):
_model_flow_anthropic(config, current_model)
elif selected_provider == "kimi-coding":
_model_flow_kimi(config, current_model)
elif selected_provider == "stepfun":
_model_flow_stepfun(config, current_model)
elif selected_provider == "bedrock":
_model_flow_bedrock(config, current_model)
elif selected_provider in (
@@ -1597,6 +1549,7 @@ def select_provider_and_model(args=None):
"kilocode",
"opencode-zen",
"opencode-go",
"ai-gateway",
"alibaba",
"huggingface",
"xiaomi",
@@ -2068,63 +2021,6 @@ def _model_flow_openrouter(config, current_model=""):
print("No change.")
def _model_flow_ai_gateway(config, current_model=""):
"""Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
from hermes_cli.auth import (
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import get_env_value, save_env_value
api_key = get_env_value("AI_GATEWAY_API_KEY")
if not api_key:
print("No Vercel AI Gateway API key configured.")
print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
print("Add a payment method to get $5 in free credits.")
print()
try:
import getpass
key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not key:
print("Cancelled.")
return
save_env_value("AI_GATEWAY_API_KEY", key)
print("API key saved.")
print()
from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider
models_list = ai_gateway_model_ids(force_refresh=True)
pricing = get_pricing_for_provider("ai-gateway", force_refresh=True)
selected = _prompt_model_selection(
models_list, current_model=current_model, pricing=pricing
)
if selected:
_save_model_choice(selected)
from hermes_cli.config import load_config, save_config
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "ai-gateway"
model["base_url"] = AI_GATEWAY_BASE_URL
model["api_mode"] = "chat_completions"
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via Vercel AI Gateway)")
else:
print("No change.")
def _model_flow_nous(config, current_model="", args=None):
"""Nous Portal provider: ensure logged in, then pick model."""
from hermes_cli.auth import (
@@ -2145,6 +2041,7 @@ def _model_flow_nous(config, current_model="", args=None):
save_env_value,
)
from hermes_cli.nous_subscription import prompt_enable_tool_gateway
import argparse
state = get_provider_auth_state("nous")
if not state or not state.get("access_token"):
@@ -2183,6 +2080,7 @@ def _model_flow_nous(config, current_model="", args=None):
from hermes_cli.models import (
_PROVIDER_MODELS,
get_pricing_for_provider,
filter_nous_free_models,
check_nous_free_tier,
partition_nous_models_by_tier,
)
@@ -2225,8 +2123,10 @@ def _model_flow_nous(config, current_model="", args=None):
# Check if user is on free tier
free_tier = check_nous_free_tier()
# For free users: partition models into selectable/unavailable based on
# whether they are free per the Portal-reported pricing.
# For both tiers: apply the allowlist filter first (removes non-allowlisted
# free models and allowlist models that aren't actually free).
# Then for free users: partition remaining models into selectable/unavailable.
model_ids = filter_nous_free_models(model_ids, pricing)
unavailable_models: list[str] = []
if free_tier:
model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -2309,6 +2209,7 @@ def _model_flow_openai_codex(config, current_model=""):
DEFAULT_CODEX_BASE_URL,
)
from hermes_cli.codex_models import get_codex_model_ids
import argparse
status = get_codex_auth_status()
if not status.get("logged_in"):
@@ -3439,9 +3340,8 @@ def _model_flow_kimi(config, current_model=""):
# Step 3: Model selection — show appropriate models for the endpoint
if is_coding_plan:
# Coding Plan models (kimi-k2.6 first)
# Coding Plan models (kimi-k2.5 first)
model_list = [
"kimi-k2.6",
"kimi-k2.5",
"kimi-for-coding",
"kimi-k2-thinking",
@@ -3480,140 +3380,6 @@ def _model_flow_kimi(config, current_model=""):
print("No change.")
def _infer_stepfun_region(base_url: str) -> str:
"""Infer the current StepFun region from the configured endpoint."""
normalized = (base_url or "").strip().lower()
if "api.stepfun.com" in normalized:
return "china"
return "international"
def _stepfun_base_url_for_region(region: str) -> str:
from hermes_cli.auth import (
STEPFUN_STEP_PLAN_CN_BASE_URL,
STEPFUN_STEP_PLAN_INTL_BASE_URL,
)
return (
STEPFUN_STEP_PLAN_CN_BASE_URL
if region == "china"
else STEPFUN_STEP_PLAN_INTL_BASE_URL
)
def _model_flow_stepfun(config, current_model=""):
"""StepFun Step Plan flow with region-specific endpoints."""
from hermes_cli.auth import (
PROVIDER_REGISTRY,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
from hermes_cli.models import fetch_api_models
provider_id = "stepfun"
pconfig = PROVIDER_REGISTRY[provider_id]
key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
base_url_env = pconfig.base_url_env_var or ""
existing_key = ""
for ev in pconfig.api_key_env_vars:
existing_key = get_env_value(ev) or os.getenv(ev, "")
if existing_key:
break
if not existing_key:
print(f"No {pconfig.name} API key configured.")
if key_env:
try:
import getpass
new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not new_key:
print("Cancelled.")
return
save_env_value(key_env, new_key)
existing_key = new_key
print("API key saved.")
print()
else:
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
print()
current_base = ""
if base_url_env:
current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
if not current_base:
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
current_base = str(model_cfg.get("base_url") or "").strip()
current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
region_choices = [
("international", f"International ({_stepfun_base_url_for_region('international')})"),
("china", f"China ({_stepfun_base_url_for_region('china')})"),
]
ordered_regions = []
for region_key, label in region_choices:
if region_key == current_region:
ordered_regions.insert(0, (region_key, f"{label} ← currently active"))
else:
ordered_regions.append((region_key, label))
ordered_regions.append(("cancel", "Cancel"))
region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
if region_idx is None or ordered_regions[region_idx][0] == "cancel":
print("No change.")
return
selected_region = ordered_regions[region_idx][0]
effective_base = _stepfun_base_url_for_region(selected_region)
if base_url_env:
save_env_value(base_url_env, effective_base)
live_models = fetch_api_models(existing_key, effective_base)
if live_models:
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = _PROVIDER_MODELS.get(provider_id, [])
if model_list:
print(
f" Could not auto-detect models from {pconfig.name} API — "
"showing Step Plan fallback catalog."
)
if model_list:
selected = _prompt_model_selection(model_list, current_model=current_model)
else:
try:
selected = input("Model name: ").strip()
except (KeyboardInterrupt, EOFError):
selected = None
if selected:
_save_model_choice(selected)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = provider_id
model["base_url"] = effective_base
model.pop("api_mode", None)
save_config(cfg)
deactivate_provider()
config["model"] = dict(model)
print(f"Default model set to: {selected} (via {pconfig.name})")
else:
print("No change.")
def _model_flow_bedrock_api_key(config, region, current_model=""):
"""Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.
@@ -3984,18 +3750,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
pass
if mdev_models:
# Merge models.dev with curated list so newly added models
# (not yet in models.dev) still appear in the picker.
if curated:
seen = {m.lower() for m in mdev_models}
merged = list(mdev_models)
for m in curated:
if m.lower() not in seen:
merged.append(m)
seen.add(m.lower())
model_list = merged
else:
model_list = mdev_models
model_list = mdev_models
print(f" Found {len(model_list)} model(s) from models.dev registry")
elif curated and len(curated) >= 8:
# Curated list is substantial — use it directly, skip live probe
@@ -4325,12 +4080,6 @@ def cmd_webhook(args):
webhook_command(args)
def cmd_hooks(args):
"""Shell-hook inspection and management."""
from hermes_cli.hooks import hooks_command
hooks_command(args)
def cmd_doctor(args):
"""Check configuration and dependencies."""
from hermes_cli.doctor import run_doctor
@@ -4440,7 +4189,9 @@ def _clear_bytecode_cache(root: Path) -> int:
]
if os.path.basename(dirpath) == "__pycache__":
try:
shutil.rmtree(dirpath)
import shutil as _shutil
_shutil.rmtree(dirpath)
removed += 1
except OSError:
pass
@@ -4479,6 +4230,8 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
tmp.replace(prompt_path)
# Poll for response
import time as _time
deadline = _time.monotonic() + timeout
while _time.monotonic() < deadline:
if response_path.exists():
@@ -4510,6 +4263,7 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
"""
if not (web_dir / "package.json").exists():
return True
import shutil
npm = shutil.which("npm")
if not npm:
@@ -4546,6 +4300,7 @@ def _update_via_zip(args):
Used on Windows when git file I/O is broken (antivirus, NTFS filter
drivers causing 'Invalid argument' errors on file creation).
"""
import shutil
import tempfile
import zipfile
from urllib.request import urlretrieve
@@ -4622,6 +4377,7 @@ def _update_via_zip(args):
# breaks on this machine, keep base deps and reinstall the remaining extras
# individually so update does not silently strip working capabilities.
print("→ Updating Python dependencies...")
import subprocess
uv_bin = shutil.which("uv")
if uv_bin:
@@ -5372,11 +5128,9 @@ def _install_hangup_protection(gateway_mode: bool = False):
# (2) Mirror output to update.log and wrap stdio for broken-pipe
# tolerance. Any failure here is non-fatal; we just skip the wrap.
try:
# Late-bound import so tests can monkeypatch
# hermes_cli.config.get_hermes_home to simulate setup failure.
from hermes_cli.config import get_hermes_home as _get_hermes_home
from hermes_cli.config import get_hermes_home
logs_dir = _get_hermes_home() / "logs"
logs_dir = get_hermes_home() / "logs"
logs_dir.mkdir(parents=True, exist_ok=True)
log_path = logs_dir / "update.log"
log_file = open(log_path, "a", buffering=1, encoding="utf-8")
@@ -5864,15 +5618,12 @@ def _cmd_update_impl(args, gateway_mode: bool):
# Write exit code *before* the gateway restart attempt.
# When running as ``hermes update --gateway`` (spawned by the gateway's
# /update command), this process lives inside the gateway's systemd
# cgroup. A graceful SIGUSR1 restart keeps the drain loop alive long
# enough for the exit-code marker to be written below, but the
# fallback ``systemctl restart`` path (see below) kills everything in
# the cgroup (KillMode=mixed → SIGKILL to remaining processes),
# including us and the wrapping bash shell. The shell never reaches
# its ``printf $status > .update_exit_code`` epilogue, so the
# exit-code marker file would never be created. The new gateway's
# update watcher would then poll for 30 minutes and send a spurious
# timeout message.
# cgroup. ``systemctl restart hermes-gateway`` kills everything in the
# cgroup (KillMode=mixed → SIGKILL to remaining processes), including
# us and the wrapping bash shell. The shell never reaches its
# ``printf $status > .update_exit_code`` epilogue, so the exit-code
# marker file is never created. The new gateway's update watcher then
# polls for 30 minutes and sends a spurious timeout message.
#
# Writing the marker here — after git pull + pip install succeed but
# before we attempt the restart — ensures the new gateway sees it
@@ -5894,37 +5645,9 @@ def _cmd_update_impl(args, gateway_mode: bool):
_ensure_user_systemd_env,
find_gateway_pids,
_get_service_pids,
_graceful_restart_via_sigusr1,
)
import signal as _signal
# Drain budget for graceful SIGUSR1 restarts. The gateway drains
# for up to ``agent.restart_drain_timeout`` (default 60s) before
# exiting with code 75; we wait slightly longer so the drain
# completes before we fall back to a hard restart. On older
# systemd units without SIGUSR1 wiring this wait just times out
# and we fall back to ``systemctl restart`` (the old behaviour).
try:
from hermes_constants import (
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN,
)
except Exception:
_DEFAULT_DRAIN = 60.0
_cfg_drain = None
try:
from hermes_cli.config import load_config
_cfg_agent = (load_config().get("agent") or {})
_cfg_drain = _cfg_agent.get("restart_drain_timeout")
except Exception:
pass
try:
_drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
except (TypeError, ValueError):
_drain_budget = float(_DEFAULT_DRAIN)
# Add a 15s margin so the drain loop + final exit finish before
# we escalate to ``systemctl restart`` / SIGTERM.
_drain_budget = max(_drain_budget, 30.0) + 15.0
restarted_services = []
killed_pids = set()
@@ -5971,114 +5694,61 @@ def _cmd_update_impl(args, gateway_mode: bool):
text=True,
timeout=5,
)
if check.stdout.strip() != "active":
continue
# Prefer a graceful SIGUSR1 restart so in-flight
# agent runs drain instead of being SIGKILLed.
# The gateway's SIGUSR1 handler calls
# request_restart(via_service=True) → drain →
# exit(75); systemd's Restart=on-failure (and
# RestartForceExitStatus=75) respawns the unit.
_main_pid = 0
try:
_show = subprocess.run(
scope_cmd + [
"show", svc_name,
"--property=MainPID", "--value",
],
capture_output=True, text=True, timeout=5,
)
_main_pid = int((_show.stdout or "").strip() or 0)
except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
_main_pid = 0
_graceful_ok = False
if _main_pid > 0:
print(
f"{svc_name}: draining (up to {int(_drain_budget)}s)..."
)
_graceful_ok = _graceful_restart_via_sigusr1(
_main_pid, drain_timeout=_drain_budget,
)
if _graceful_ok:
# Gateway exited 75; systemd should relaunch
# via Restart=on-failure. Verify the new
# process came up.
_time.sleep(3)
verify = subprocess.run(
scope_cmd + ["is-active", svc_name],
capture_output=True, text=True, timeout=5,
)
if verify.stdout.strip() == "active":
restarted_services.append(svc_name)
continue
# Process exited but wasn't respawned (older
# unit without Restart=on-failure or
# RestartForceExitStatus=75). Fall through
# to systemctl start/restart.
print(
f"{svc_name} drained but didn't relaunch — forcing restart"
)
# Fallback: blunt systemctl restart. This is
# what the old code always did; we get here only
# when the graceful path failed (unit missing
# SIGUSR1 wiring, drain exceeded the budget,
# restart-policy mismatch).
restart = subprocess.run(
scope_cmd + ["restart", svc_name],
capture_output=True,
text=True,
timeout=15,
)
if restart.returncode == 0:
# Verify the service actually survived the
# restart. systemctl restart returns 0 even
# if the new process crashes immediately.
_time.sleep(3)
verify = subprocess.run(
scope_cmd + ["is-active", svc_name],
if check.stdout.strip() == "active":
restart = subprocess.run(
scope_cmd + ["restart", svc_name],
capture_output=True,
text=True,
timeout=5,
timeout=15,
)
if verify.stdout.strip() == "active":
restarted_services.append(svc_name)
else:
# Retry once — transient startup failures
# (stale module cache, import race) often
# resolve on the second attempt.
print(
f"{svc_name} died after restart, retrying..."
)
retry = subprocess.run(
scope_cmd + ["restart", svc_name],
capture_output=True,
text=True,
timeout=15,
)
if restart.returncode == 0:
# Verify the service actually survived the
# restart. systemctl restart returns 0 even
# if the new process crashes immediately.
import time as _time
_time.sleep(3)
verify2 = subprocess.run(
verify = subprocess.run(
scope_cmd + ["is-active", svc_name],
capture_output=True,
text=True,
timeout=5,
)
if verify2.stdout.strip() == "active":
if verify.stdout.strip() == "active":
restarted_services.append(svc_name)
print(f"{svc_name} recovered on retry")
else:
# Retry once — transient startup failures
# (stale module cache, import race) often
# resolve on the second attempt.
print(
f" {svc_name} failed to stay running after restart.\n"
f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
f" {svc_name} died after restart, retrying..."
)
else:
print(
f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
)
retry = subprocess.run(
scope_cmd + ["restart", svc_name],
capture_output=True,
text=True,
timeout=15,
)
_time.sleep(3)
verify2 = subprocess.run(
scope_cmd + ["is-active", svc_name],
capture_output=True,
text=True,
timeout=5,
)
if verify2.stdout.strip() == "active":
restarted_services.append(svc_name)
print(f"{svc_name} recovered on retry")
else:
print(
f"{svc_name} failed to stay running after restart.\n"
f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
)
else:
print(
f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
)
except (FileNotFoundError, subprocess.TimeoutExpired):
pass
@@ -6689,17 +6359,6 @@ For more help on a command:
default=False,
help="Run in an isolated git worktree (for parallel agents)",
)
parser.add_argument(
"--accept-hooks",
action="store_true",
default=False,
help=(
"Auto-approve any unseen shell hooks declared in config.yaml "
"without a TTY prompt. Equivalent to HERMES_ACCEPT_HOOKS=1 or "
"hooks_auto_accept: true in config.yaml. Use on CI / headless "
"runs that can't prompt."
),
)
parser.add_argument(
"--skills",
"-s",
@@ -6719,18 +6378,6 @@ For more help on a command:
default=False,
help="Include the session ID in the agent's system prompt",
)
parser.add_argument(
"--ignore-user-config",
action="store_true",
default=False,
help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
)
parser.add_argument(
"--ignore-rules",
action="store_true",
default=False,
help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
)
parser.add_argument(
"--tui",
action="store_true",
@@ -6791,7 +6438,6 @@ For more help on a command:
"zai",
"kimi-coding",
"kimi-coding-cn",
"stepfun",
"minimax",
"minimax-cn",
"kilocode",
@@ -6835,16 +6481,6 @@ For more help on a command:
default=argparse.SUPPRESS,
help="Run in an isolated git worktree (for parallel agents on the same repo)",
)
chat_parser.add_argument(
"--accept-hooks",
action="store_true",
default=argparse.SUPPRESS,
help=(
"Auto-approve any unseen shell hooks declared in config.yaml "
"without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
"hooks_auto_accept: in config.yaml)."
),
)
chat_parser.add_argument(
"--checkpoints",
action="store_true",
@@ -6870,18 +6506,6 @@ For more help on a command:
default=argparse.SUPPRESS,
help="Include the session ID in the agent's system prompt",
)
chat_parser.add_argument(
"--ignore-user-config",
action="store_true",
default=argparse.SUPPRESS,
help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
)
chat_parser.add_argument(
"--ignore-rules",
action="store_true",
default=argparse.SUPPRESS,
help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
)
chat_parser.add_argument(
"--source",
default=None,
@@ -6976,8 +6600,6 @@ For more help on a command:
action="store_true",
help="Replace any existing gateway instance (useful for systemd)",
)
_add_accept_hooks_flag(gateway_run)
_add_accept_hooks_flag(gateway_parser)
# gateway start
gateway_start = gateway_subparsers.add_parser(
@@ -7025,12 +6647,6 @@ For more help on a command:
# gateway status
gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
gateway_status.add_argument(
"-l",
"--full",
action="store_true",
help="Show full, untruncated service/log output where supported",
)
gateway_status.add_argument(
"--system",
action="store_true",
@@ -7348,7 +6964,6 @@ For more help on a command:
"run", help="Run a job on the next scheduler tick"
)
cron_run.add_argument("job_id", help="Job ID to trigger")
_add_accept_hooks_flag(cron_run)
cron_remove = cron_subparsers.add_parser(
"remove", aliases=["rm", "delete"], help="Remove a scheduled job"
@@ -7359,9 +6974,8 @@ For more help on a command:
cron_subparsers.add_parser("status", help="Check if cron scheduler is running")
# cron tick (mostly for debugging)
cron_tick = cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
_add_accept_hooks_flag(cron_tick)
_add_accept_hooks_flag(cron_parser)
cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
cron_parser.set_defaults(func=cmd_cron)
# =========================================================================
@@ -7428,67 +7042,6 @@ For more help on a command:
webhook_parser.set_defaults(func=cmd_webhook)
# =========================================================================
# hooks command — shell-hook inspection and management
# =========================================================================
hooks_parser = subparsers.add_parser(
"hooks",
help="Inspect and manage shell-script hooks",
description=(
"Inspect shell-script hooks declared in ~/.hermes/config.yaml, "
"test them against synthetic payloads, and manage the first-use "
"consent allowlist at ~/.hermes/shell-hooks-allowlist.json."
),
)
hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")
hooks_subparsers.add_parser(
"list", aliases=["ls"],
help="List configured hooks with matcher, timeout, and consent status",
)
_hk_test = hooks_subparsers.add_parser(
"test",
help="Fire every hook matching <event> against a synthetic payload",
)
_hk_test.add_argument(
"event",
help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
)
_hk_test.add_argument(
"--for-tool", dest="for_tool", default=None,
help=(
"Only fire hooks whose matcher matches this tool name "
"(used for pre_tool_call / post_tool_call)"
),
)
_hk_test.add_argument(
"--payload-file", dest="payload_file", default=None,
help=(
"Path to a JSON file whose contents are merged into the "
"synthetic payload before execution"
),
)
_hk_revoke = hooks_subparsers.add_parser(
"revoke", aliases=["remove", "rm"],
help="Remove a command's allowlist entries (takes effect on next restart)",
)
_hk_revoke.add_argument(
"command",
help="The exact command string to revoke (as declared in config.yaml)",
)
hooks_subparsers.add_parser(
"doctor",
help=(
"Check each configured hook: exec bit, allowlist, mtime drift, "
"JSON validity, and synthetic run timing"
),
)
hooks_parser.set_defaults(func=cmd_hooks)
# =========================================================================
# doctor command
# =========================================================================
@@ -7896,17 +7449,6 @@ Examples:
action="store_true",
help="Remove existing plugin and reinstall",
)
_install_enable_group = plugins_install.add_mutually_exclusive_group()
_install_enable_group.add_argument(
"--enable",
action="store_true",
help="Auto-enable the plugin after install (skip confirmation prompt)",
)
_install_enable_group.add_argument(
"--no-enable",
action="store_true",
help="Install disabled (skip confirmation prompt); enable later with `hermes plugins enable <name>`",
)
plugins_update = plugins_subparsers.add_parser(
"update", help="Pull latest changes for an installed plugin"
@@ -7954,7 +7496,9 @@ Examples:
)
cmd_info["setup_fn"](plugin_parser)
except Exception as _exc:
logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
import logging as _log
_log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
# =========================================================================
# memory command
@@ -8160,7 +7704,6 @@ Examples:
action="store_true",
help="Enable verbose logging on stderr",
)
_add_accept_hooks_flag(mcp_serve_p)
mcp_add_p = mcp_sub.add_parser(
"add", help="Add an MCP server (discovery-first install)"
@@ -8199,8 +7742,6 @@ Examples:
)
mcp_login_p.add_argument("name", help="Server name to re-authenticate")
_add_accept_hooks_flag(mcp_parser)
def cmd_mcp(args):
from hermes_cli.mcp_config import mcp_command
@@ -8339,6 +7880,7 @@ Examples:
return
line = _json.dumps(data, ensure_ascii=False) + "\n"
if args.output == "-":
import sys
sys.stdout.write(line)
else:
@@ -8348,6 +7890,7 @@ Examples:
else:
sessions = db.export_all(source=args.source)
if args.output == "-":
import sys
for s in sessions:
sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
@@ -8418,6 +7961,8 @@ Examples:
# Launch hermes --resume <id> by replacing the current process
print(f"Resuming session: {selected_id}")
import shutil
hermes_bin = shutil.which("hermes")
if hermes_bin:
os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
@@ -8608,7 +8153,6 @@ Examples:
help="Run Hermes Agent as an ACP (Agent Client Protocol) server",
description="Start Hermes Agent in ACP mode for editor integration (VS Code, Zed, JetBrains)",
)
_add_accept_hooks_flag(acp_parser)
def cmd_acp(args):
"""Launch Hermes Agent as an ACP server."""
@@ -8882,42 +8426,6 @@ Examples:
cmd_version(args)
return
# Discover Python plugins and register shell hooks once, before any
# command that can fire lifecycle hooks. Both are idempotent; gated
# so introspection/management commands (hermes hooks list, cron
# list, gateway status, mcp add, ...) don't pay discovery cost or
# trigger consent prompts for hooks the user is still inspecting.
# Groups with mixed admin/CRUD vs. agent-running entries narrow via
# the nested subcommand (dest varies by parser).
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
_AGENT_SUBCOMMANDS = {
"cron": ("cron_command", {"run", "tick"}),
"gateway": ("gateway_command", {"run"}),
"mcp": ("mcp_action", {"serve"}),
}
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
if (
args.command in _AGENT_COMMANDS
or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
):
_accept_hooks = bool(getattr(args, "accept_hooks", False))
try:
from hermes_cli.plugins import discover_plugins
discover_plugins()
except Exception:
logger.debug(
"plugin discovery failed at CLI startup", exc_info=True,
)
try:
from hermes_cli.config import load_config
from agent.shell_hooks import register_from_config
register_from_config(load_config(), accept_hooks=_accept_hooks)
except Exception:
logger.debug(
"shell-hook registration failed at CLI startup",
exc_info=True,
)
# Handle top-level --resume / --continue as shortcut to chat
if (args.resume or args.continue_last) and args.command is None:
args.command = "chat"
+49 -238
View File
@@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
# Z.AI / GLM
"glm": ModelIdentity("z-ai", "glm"),
# Step Plan (StepFun)
# StepFun
"step": ModelIdentity("stepfun", "step"),
# Xiaomi
@@ -304,113 +304,6 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
# Alias resolution
# ---------------------------------------------------------------------------
def _model_sort_key(model_id: str, prefix: str) -> tuple:
"""Sort key for model version preference.
Extracts version numbers after the family prefix and returns a sort key
that prefers higher versions. Suffix tokens (``pro``, ``omni``, etc.)
are used as tiebreakers, with common quality indicators ranked.
Examples (with prefix ``"mimo"``)::
mimo-v2.5-pro (-2.5, 0, 'pro') # highest version wins
mimo-v2.5 (-2.5, 1, '') # no suffix = lower than pro
mimo-v2-pro (-2.0, 0, 'pro')
mimo-v2-omni (-2.0, 1, 'omni')
mimo-v2-flash (-2.0, 1, 'flash')
"""
# Strip the prefix (and optional "/" separator for aggregator slugs)
rest = model_id[len(prefix):]
if rest.startswith("/"):
rest = rest[1:]
rest = rest.lstrip("-").strip()
# Parse version and suffix from the remainder.
# "v2.5-pro" → version [2.5], suffix "pro"
# "-omni" → version [], suffix "omni"
# State machine: start → in_version → between → in_suffix
nums: list[float] = []
suffix_buf = ""
state = "start"
num_buf = ""
for ch in rest:
if state == "start":
if ch in "vV":
state = "in_version"
elif ch.isdigit():
state = "in_version"
num_buf += ch
elif ch in "-_.":
pass # skip separators before any content
else:
state = "in_suffix"
suffix_buf += ch
elif state == "in_version":
if ch.isdigit():
num_buf += ch
elif ch == ".":
if "." in num_buf:
# Second dot — flush current number, start new component
try:
nums.append(float(num_buf.rstrip(".")))
except ValueError:
pass
num_buf = ""
else:
num_buf += ch
elif ch in "-_.":
if num_buf:
try:
nums.append(float(num_buf.rstrip(".")))
except ValueError:
pass
num_buf = ""
state = "between"
else:
if num_buf:
try:
nums.append(float(num_buf.rstrip(".")))
except ValueError:
pass
num_buf = ""
state = "in_suffix"
suffix_buf += ch
elif state == "between":
if ch.isdigit():
state = "in_version"
num_buf = ch
elif ch in "vV":
state = "in_version"
elif ch in "-_.":
pass
else:
state = "in_suffix"
suffix_buf += ch
elif state == "in_suffix":
suffix_buf += ch
# Flush remaining buffer (strip trailing dots — "5.4." → "5.4")
if num_buf and state == "in_version":
try:
nums.append(float(num_buf.rstrip(".")))
except ValueError:
pass
suffix = suffix_buf.lower().strip("-_.")
suffix = suffix.strip()
# Negate versions so higher → sorts first
version_key = tuple(-n for n in nums)
# Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite
# Lower number = preferred
_SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0}
suffix_rank = _SUFFIX_RANK.get(suffix, 1)
return version_key + (suffix_rank, suffix)
def resolve_alias(
raw_input: str,
current_provider: str,
@@ -418,9 +311,9 @@ def resolve_alias(
"""Resolve a short alias against the current provider's catalog.
Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
current provider's models.dev catalog for the model whose ID starts
with ``vendor/family`` (or just ``family`` for non-aggregator
providers) and has the **highest version**.
current provider's models.dev catalog for the first model whose ID
starts with ``vendor/family`` (or just ``family`` for non-aggregator
providers).
Returns:
``(provider, resolved_model_id, alias_name)`` if a match is
@@ -448,44 +341,28 @@ def resolve_alias(
vendor, family = identity
# Build catalog from models.dev, then merge in static _PROVIDER_MODELS
# entries that models.dev may be missing (e.g. newly added models not
# yet synced to the registry).
# Search the provider's catalog from models.dev
catalog = list_provider_models(current_provider)
try:
from hermes_cli.models import _PROVIDER_MODELS
static = _PROVIDER_MODELS.get(current_provider, [])
if static:
seen = {m.lower() for m in catalog}
for m in static:
if m.lower() not in seen:
catalog.append(m)
except Exception:
pass
if not catalog:
return None
# For aggregators, models are vendor/model-name format
aggregator = is_aggregator(current_provider)
if aggregator:
prefix = f"{vendor}/{family}".lower()
matches = [
mid for mid in catalog
if mid.lower().startswith(prefix)
]
else:
family_lower = family.lower()
matches = [
mid for mid in catalog
if mid.lower().startswith(family_lower)
]
for model_id in catalog:
mid_lower = model_id.lower()
if aggregator:
# Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
prefix = f"{vendor}/{family}".lower()
if mid_lower.startswith(prefix):
return (current_provider, model_id, key)
else:
# Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
family_lower = family.lower()
if mid_lower.startswith(family_lower):
return (current_provider, model_id, key)
if not matches:
return None
# Sort by version descending — prefer the latest/highest version
prefix_for_sort = f"{vendor}/{family}" if aggregator else family
matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort))
return (current_provider, matches[0], key)
return None
def get_authenticated_provider_slugs(
@@ -801,7 +678,6 @@ def switch_model(
_da = DIRECT_ALIASES.get(resolved_alias)
if _da is not None and _da.base_url:
base_url = _da.base_url
api_mode = "" # clear so determine_api_mode re-detects from URL
if not api_key:
api_key = "no-key-required"
@@ -905,7 +781,6 @@ def switch_model(
def list_authenticated_providers(
current_provider: str = "",
current_base_url: str = "",
user_providers: dict = None,
custom_providers: list | None = None,
max_models: int = 8,
@@ -934,10 +809,7 @@ def list_authenticated_providers(
get_provider_info as _mdev_pinfo,
)
from hermes_cli.auth import PROVIDER_REGISTRY
from hermes_cli.models import (
OPENROUTER_MODELS, _PROVIDER_MODELS,
_MODELS_DEV_PREFERRED, _merge_with_models_dev,
)
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
results: List[dict] = []
seen_slugs: set = set() # lowercase-normalized to catch case variants (#9545)
@@ -971,10 +843,6 @@ def list_authenticated_providers(
# source of truth. models.dev can have wrong mappings (e.g.
# minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
pconfig = PROVIDER_REGISTRY.get(hermes_id)
# Skip non-API-key auth providers here — they are handled in
# section 2 (HERMES_OVERLAYS) with proper auth store checking.
if pconfig and pconfig.auth_type != "api_key":
continue
if pconfig and pconfig.api_key_env_vars:
env_vars = list(pconfig.api_key_env_vars)
else:
@@ -987,13 +855,8 @@ def list_authenticated_providers(
if not has_creds:
continue
# Use curated list, falling back to models.dev if no curated list.
# For preferred providers, merge models.dev entries into the curated
# catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
# show up in the picker without requiring a Hermes release.
# Use curated list, falling back to models.dev if no curated list
model_ids = curated.get(hermes_id, [])
if hermes_id in _MODELS_DEV_PREFERRED:
model_ids = _merge_with_models_dev(hermes_id, model_ids)
total = len(model_ids)
top = model_ids[:max_models]
@@ -1097,9 +960,6 @@ def list_authenticated_providers(
# Use curated list — look up by Hermes slug, fall back to overlay key
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
# Merge with models.dev for preferred providers (same rationale as above).
if hermes_slug in _MODELS_DEV_PREFERRED:
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
total = len(model_ids)
top = model_ids[:max_models]
@@ -1235,7 +1095,6 @@ def list_authenticated_providers(
"api_url": api_url,
})
seen_slugs.add(ep_name.lower())
seen_slugs.add(custom_provider_slug(display_name).lower())
_pair = (
str(display_name).strip().lower(),
str(api_url).strip().rstrip("/").lower(),
@@ -1245,113 +1104,66 @@ def list_authenticated_providers(
# --- 4. Saved custom providers from config ---
# Each ``custom_providers`` entry represents one model under a named
# provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
# are grouped into a single picker row, so e.g. four Ollama entries
# pointing at ``http://localhost:11434/v1`` with per-model display names
# ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
# "Ollama" row with four models inside instead of four near-duplicates
# that differ only by suffix. Entries with distinct endpoints still
# produce separate rows.
#
# When the grouped endpoint matches ``current_base_url`` the group's
# slug becomes ``current_provider`` so that selecting a model from the
# picker flows back through the runtime provider that already holds
# valid credentials — no re-resolution needed.
# provider. Entries sharing the same provider name are grouped into a
# single picker row so that e.g. four Ollama Cloud entries
# (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
# "Ollama Cloud" row with four models inside instead of four
# duplicate "Ollama Cloud" rows. Entries with distinct provider names
# still produce separate rows (e.g. Ollama Cloud vs Moonshot).
if custom_providers and isinstance(custom_providers, list):
from collections import OrderedDict
# Key by (base_url, api_key) instead of slug: names frequently
# differ per model ("Ollama — X") while the endpoint stays the
# same. Slug-based grouping left them as separate rows.
groups: "OrderedDict[tuple, dict]" = OrderedDict()
groups: "OrderedDict[str, dict]" = OrderedDict()
for entry in custom_providers:
if not isinstance(entry, dict):
continue
raw_name = (entry.get("name") or "").strip()
display_name = (entry.get("name") or "").strip()
api_url = (
entry.get("base_url", "")
or entry.get("url", "")
or entry.get("api", "")
or ""
).strip().rstrip("/")
if not raw_name or not api_url:
).strip()
if not display_name or not api_url:
continue
api_key = (entry.get("api_key") or "").strip()
group_key = (api_url, api_key)
if group_key not in groups:
# Strip per-model suffix so "Ollama — GLM 5.1" becomes
# "Ollama" for the grouped row. Em dash is the convention
# Hermes's own writer uses; a hyphen variant is accepted
# for hand-edited configs.
display_name = raw_name
for sep in ("", " - "):
if sep in display_name:
display_name = display_name.split(sep)[0].strip()
break
if not display_name:
display_name = raw_name
# If this endpoint matches the currently active one, use
# ``current_provider`` as the slug so picker-driven switches
# route through the live credential pipeline.
if (
current_base_url
and api_url == current_base_url.strip().rstrip("/")
):
slug = current_provider or custom_provider_slug(display_name)
else:
slug = custom_provider_slug(display_name)
groups[group_key] = {
"slug": slug,
slug = custom_provider_slug(display_name)
if slug not in groups:
groups[slug] = {
"name": display_name,
"api_url": api_url,
"models": [],
}
# The singular ``model:`` field only holds the currently
# active model. Hermes's own writer (main.py::_save_custom_provider)
# stores every configured model as a dict under ``models:``;
# downstream readers (agent/models_dev.py, gateway/run.py,
# run_agent.py, hermes_cli/config.py) already consume that dict.
# The /model picker previously ignored it, so multi-model
# custom providers appeared to have only the active model.
default_model = (entry.get("model") or "").strip()
if default_model and default_model not in groups[group_key]["models"]:
groups[group_key]["models"].append(default_model)
if default_model and default_model not in groups[slug]["models"]:
groups[slug]["models"].append(default_model)
cfg_models = entry.get("models", {})
if isinstance(cfg_models, dict):
for m in cfg_models:
if m and m not in groups[group_key]["models"]:
groups[group_key]["models"].append(m)
if m and m not in groups[slug]["models"]:
groups[slug]["models"].append(m)
elif isinstance(cfg_models, list):
for m in cfg_models:
if m and m not in groups[group_key]["models"]:
groups[group_key]["models"].append(m)
if m and m not in groups[slug]["models"]:
groups[slug]["models"].append(m)
_section4_emitted_slugs: set = set()
for grp in groups.values():
slug = grp["slug"]
# If the slug is already claimed by a built-in / overlay /
# user-provider row (sections 1-3), skip this custom group
# to avoid shadowing a real provider.
if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
for slug, grp in groups.items():
if slug.lower() in seen_slugs:
continue
# If a prior section-4 group already used this slug (two custom
# endpoints with the same cleaned name — e.g. two OpenAI-
# compatible gateways named identically with different keys),
# append a counter so both rows stay visible in the picker.
if slug.lower() in _section4_emitted_slugs:
base_slug = slug
n = 2
while f"{base_slug}-{n}".lower() in seen_slugs:
n += 1
slug = f"{base_slug}-{n}"
grp["slug"] = slug
# Skip if section 3 already emitted this endpoint under its
# ``providers:`` dict key — matches on (display_name, base_url).
# Prevents two picker rows labelled identically when callers
# pass both ``user_providers`` and a compatibility-merged
# ``custom_providers`` list.
# ``providers:`` dict key — matches on (display_name, base_url),
# the tuple section 4 groups by. Prevents two picker rows
# labelled identically when callers pass both ``user_providers``
# and a compatibility-merged ``custom_providers`` list.
_pair_key = (
str(grp["name"]).strip().lower(),
str(grp["api_url"]).strip().rstrip("/").lower(),
@@ -1369,7 +1181,6 @@ def list_authenticated_providers(
"api_url": grp["api_url"],
})
seen_slugs.add(slug.lower())
_section4_emitted_slugs.add(slug.lower())
# Sort: current provider first, then by model count descending
results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
+77 -557
View File
@@ -16,12 +16,6 @@ from difflib import get_close_matches
from pathlib import Path
from typing import Any, NamedTuple, Optional
from hermes_cli import __version__ as _HERMES_VERSION
# Identify ourselves so endpoints fronted by Cloudflare's Browser Integrity
# Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
COPILOT_BASE_URL = "https://api.githubcopilot.com"
COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
@@ -32,7 +26,7 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
# Fallback OpenRouter snapshot used when the live catalog is unavailable.
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
("moonshotai/kimi-k2.6", "recommended"),
("moonshotai/kimi-k2.5", "recommended"),
("anthropic/claude-opus-4.7", ""),
("anthropic/claude-opus-4.6", ""),
("anthropic/claude-sonnet-4.6", ""),
@@ -42,8 +36,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
("openrouter/elephant-alpha", "free"),
("openai/gpt-5.4", ""),
("openai/gpt-5.4-mini", ""),
("xiaomi/mimo-v2.5-pro", ""),
("xiaomi/mimo-v2.5", ""),
("xiaomi/mimo-v2-pro", ""),
("openai/gpt-5.3-codex", ""),
("google/gemini-3-pro-image-preview", ""),
("google/gemini-3-flash-preview", ""),
@@ -54,7 +47,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
("stepfun/step-3.5-flash", ""),
("minimax/minimax-m2.7", ""),
("minimax/minimax-m2.5", ""),
("minimax/minimax-m2.5:free", "free"),
("z-ai/glm-5.1", ""),
("z-ai/glm-5v-turbo", ""),
("z-ai/glm-5-turbo", ""),
@@ -70,31 +62,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
_openrouter_catalog_cache: list[tuple[str, str]] | None = None
# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
# OSS / open-weight models prioritized first, then closed-source by family.
# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
# zai/ and xai/ without hyphens).
VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
("moonshotai/kimi-k2.6", "recommended"),
("alibaba/qwen3.6-plus", ""),
("zai/glm-5.1", ""),
("minimax/minimax-m2.7", ""),
("anthropic/claude-sonnet-4.6", ""),
("anthropic/claude-opus-4.7", ""),
("anthropic/claude-opus-4.6", ""),
("anthropic/claude-haiku-4.5", ""),
("openai/gpt-5.4", ""),
("openai/gpt-5.4-mini", ""),
("openai/gpt-5.3-codex", ""),
("google/gemini-3.1-pro-preview", ""),
("google/gemini-3-flash", ""),
("google/gemini-3.1-flash-lite-preview", ""),
("xai/grok-4.20-reasoning", ""),
]
_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None
def _codex_curated_models() -> list[str]:
"""Derive the openai-codex curated list from codex_models.py.
@@ -108,9 +75,8 @@ def _codex_curated_models() -> list[str]:
_PROVIDER_MODELS: dict[str, list[str]] = {
"nous": [
"moonshotai/kimi-k2.6",
"xiaomi/mimo-v2.5-pro",
"xiaomi/mimo-v2.5",
"moonshotai/kimi-k2.5",
"xiaomi/mimo-v2-pro",
"anthropic/claude-opus-4.7",
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
@@ -128,15 +94,17 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"stepfun/step-3.5-flash",
"minimax/minimax-m2.7",
"minimax/minimax-m2.5",
"minimax/minimax-m2.5:free",
"z-ai/glm-5.1",
"z-ai/glm-5v-turbo",
"z-ai/glm-5-turbo",
"x-ai/grok-4.20-beta",
"nvidia/nemotron-3-super-120b-a12b",
"nvidia/nemotron-3-super-120b-a12b:free",
"arcee-ai/trinity-large-preview:free",
"arcee-ai/trinity-large-thinking",
"openai/gpt-5.4-pro",
"openai/gpt-5.4-nano",
"openrouter/elephant-alpha",
],
"openai-codex": _codex_curated_models(),
"copilot-acp": [
@@ -191,13 +159,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
# (map to OpenRouter defaults — users get familiar picks on NIM)
"qwen/qwen3.5-397b-a17b",
"deepseek-ai/deepseek-v3.2",
"moonshotai/kimi-k2.6",
"moonshotai/kimi-k2.5",
"minimaxai/minimax-m2.5",
"z-ai/glm5",
"openai/gpt-oss-120b",
],
"kimi-coding": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-for-coding",
"kimi-k2-thinking",
@@ -206,18 +173,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"kimi-k2-0905-preview",
],
"kimi-coding-cn": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
],
"stepfun": [
"step-3.5-flash",
"step-3.5-flash-2603",
],
"moonshot": [
"kimi-k2.6",
"kimi-k2.5",
"kimi-k2-thinking",
"kimi-k2-turbo-preview",
@@ -250,8 +211,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"deepseek-reasoner",
],
"xiaomi": [
"mimo-v2.5-pro",
"mimo-v2.5",
"mimo-v2-pro",
"mimo-v2-omni",
"mimo-v2-flash",
@@ -266,6 +225,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"gpt-5.4-pro",
"gpt-5.4",
"gpt-5.3-codex",
"gpt-5.3-codex-spark",
"gpt-5.2",
"gpt-5.2-codex",
"gpt-5.1",
@@ -299,18 +259,27 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"big-pickle",
],
"opencode-go": [
"kimi-k2.6",
"kimi-k2.5",
"glm-5.1",
"glm-5",
"mimo-v2.5-pro",
"mimo-v2.5",
"mimo-v2-pro",
"mimo-v2-omni",
"minimax-m2.7",
"minimax-m2.5",
"qwen3.6-plus",
"qwen3.5-plus",
],
"ai-gateway": [
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-haiku-4.5",
"openai/gpt-5",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
"google/gemini-3-pro-preview",
"google/gemini-3-flash",
"google/gemini-2.5-pro",
"google/gemini-2.5-flash",
"deepseek/deepseek-v3.2",
],
"kilocode": [
"anthropic/claude-opus-4.6",
@@ -344,7 +313,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"zai-org/GLM-5",
"XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking",
"moonshotai/Kimi-K2.6",
],
# AWS Bedrock — static fallback list used when dynamic discovery is
# unavailable (no boto3, no credentials, or API error). The agent
@@ -364,18 +332,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
],
}
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
# and the static fallback catalog (bare ids) stay in sync from a single
# source of truth.
_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
# ---------------------------------------------------------------------------
# Nous Portal free-model helper
# Nous Portal free-model filtering
# ---------------------------------------------------------------------------
# The Nous Portal models endpoint is the source of truth for which models
# are currently offered (free or paid). We trust whatever it returns and
# surface it to users as-is — no local allowlist filtering.
# Models that are ALLOWED to appear when priced as free on Nous Portal.
# Any other free model is hidden — prevents promotional/temporary free models
# from cluttering the selection when users are paying subscribers.
# Models in this list are ALSO filtered out if they are NOT free (i.e. they
# should only appear in the menu when they are genuinely free).
_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
"xiaomi/mimo-v2-pro",
"xiaomi/mimo-v2-omni",
})
def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -389,6 +357,35 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
return False
def filter_nous_free_models(
model_ids: list[str],
pricing: dict[str, dict[str, str]],
) -> list[str]:
"""Filter the Nous Portal model list according to free-model policy.
Rules:
Paid models that are NOT in the allowlist keep (normal case).
Free models that are NOT in the allowlist drop.
Allowlist models that ARE free keep.
Allowlist models that are NOT free drop.
"""
if not pricing:
return model_ids # no pricing data — can't filter, show everything
result: list[str] = []
for mid in model_ids:
free = _is_model_free(mid, pricing)
if mid in _NOUS_ALLOWED_FREE_MODELS:
# Allowlist model: only show when it's actually free
if free:
result.append(mid)
else:
# Regular model: keep only when it's NOT free
if not free:
result.append(mid)
return result
# ---------------------------------------------------------------------------
# Nous Portal account tier detection
# ---------------------------------------------------------------------------
@@ -452,7 +449,8 @@ def partition_nous_models_by_tier(
) -> tuple[list[str], list[str]]:
"""Split Nous models into (selectable, unavailable) based on user tier.
For paid-tier users: all models are selectable, none unavailable.
For paid-tier users: all models are selectable, none unavailable
(free-model filtering is handled separately by ``filter_nous_free_models``).
For free-tier users: only free models are selectable; paid models
are returned as unavailable (shown grayed out in the menu).
@@ -491,6 +489,8 @@ def check_nous_free_tier() -> bool:
Returns False (assume paid) on any error never blocks paying users.
"""
global _free_tier_cache
import time
now = time.monotonic()
if _free_tier_cache is not None:
cached_result, cached_at = _free_tier_cache
@@ -522,157 +522,6 @@ def check_nous_free_tier() -> bool:
return False # default to paid on error — don't block users
# ---------------------------------------------------------------------------
# Nous Portal recommended models
#
# The Portal publishes a curated list of suggested models (separated into
# paid and free tiers) plus dedicated recommendations for compaction (text
# summarisation / auxiliary) and vision tasks. We fetch it once per process
# with a TTL cache so callers can ask "what's the best aux model right now?"
# without hitting the network on every lookup.
#
# Shape of the response (fields we care about):
# {
# "paidRecommendedModels": [ {modelName, ...}, ... ],
# "freeRecommendedModels": [ {modelName, ...}, ... ],
# "paidRecommendedCompactionModel": {modelName, ...} | null,
# "paidRecommendedVisionModel": {modelName, ...} | null,
# "freeRecommendedCompactionModel": {modelName, ...} | null,
# "freeRecommendedVisionModel": {modelName, ...} | null,
# }
# ---------------------------------------------------------------------------
NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
_NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes)
# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
def fetch_nous_recommended_models(
portal_base_url: str = "",
timeout: float = 5.0,
*,
force_refresh: bool = False,
) -> dict[str, Any]:
"""Fetch the Nous Portal's curated recommended-models payload.
Hits ``<portal>/api/nous/recommended-models``. The endpoint is public
no auth is required. Results are cached per portal URL for
``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
bypass the cache.
Returns the parsed JSON dict on success, or ``{}`` on any failure
(network, parse, non-2xx). Callers must treat missing/null fields as
"no recommendation" and fall back to their own default.
"""
base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
now = time.monotonic()
cached = _nous_recommended_cache.get(base)
if not force_refresh and cached is not None:
payload, cached_at = cached
if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
return payload
url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
try:
req = urllib.request.Request(
url,
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
if not isinstance(data, dict):
data = {}
except Exception:
data = {}
_nous_recommended_cache[base] = (data, now)
return data
def _resolve_nous_portal_url() -> str:
"""Best-effort lookup of the Portal base URL the user is authed against."""
try:
from hermes_cli.auth import (
DEFAULT_NOUS_PORTAL_URL,
get_provider_auth_state,
)
state = get_provider_auth_state("nous") or {}
portal = str(state.get("portal_base_url") or "").strip()
if portal:
return portal.rstrip("/")
return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
except Exception:
return "https://portal.nousresearch.com"
def _extract_model_name(entry: Any) -> Optional[str]:
"""Pull the ``modelName`` field from a recommended-model entry, else None."""
if not isinstance(entry, dict):
return None
model_name = entry.get("modelName")
if isinstance(model_name, str) and model_name.strip():
return model_name.strip()
return None
def get_nous_recommended_aux_model(
*,
vision: bool = False,
free_tier: Optional[bool] = None,
portal_base_url: str = "",
force_refresh: bool = False,
) -> Optional[str]:
"""Return the Portal's recommended model name for an auxiliary task.
Picks the best field from the Portal's recommended-models payload:
* ``vision=True`` ``paidRecommendedVisionModel`` (paid tier) or
``freeRecommendedVisionModel`` (free tier)
* ``vision=False`` ``paidRecommendedCompactionModel`` or
``freeRecommendedCompactionModel``
When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
detection useful for tests or when the caller already knows the tier.
For paid-tier users we prefer the paid recommendation but gracefully fall
back to the free recommendation if the Portal returned ``null`` for the
paid field (common during the staged rollout of new paid models).
Returns ``None`` when every candidate is missing, null, or the fetch
fails callers should fall back to their own default (currently
``google/gemini-3-flash-preview``).
"""
base = portal_base_url or _resolve_nous_portal_url()
payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
if not payload:
return None
if free_tier is None:
try:
free_tier = check_nous_free_tier()
except Exception:
# On any detection error, assume paid — paid users see both fields
# anyway so this is a safe default that maximises model quality.
free_tier = False
if vision:
paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
else:
paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
# Preference order:
# free tier → free only
# paid tier → paid, then free (if paid field is null)
candidates = [free_key] if free_tier else [paid_key, free_key]
for key in candidates:
name = _extract_model_name(payload.get(key))
if name:
return name
return None
# ---------------------------------------------------------------------------
# Canonical provider list — single source of truth for provider identity.
# Every code path that lists, displays, or iterates providers derives from
@@ -693,10 +542,9 @@ class ProviderEntry(NamedTuple):
CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
@@ -709,7 +557,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("zai", "Z.AI / GLM", "Z.AI / GLM (Zhipu AI direct API)"),
ProviderEntry("kimi-coding", "Kimi / Kimi Coding Plan", "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
ProviderEntry("stepfun", "StepFun Step Plan", "StepFun Step Plan (agent/coding models via Step Plan API)"),
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
@@ -718,6 +565,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
]
@@ -744,8 +592,6 @@ _PROVIDER_ALIASES = {
"moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn",
"moonshot-cn": "kimi-coding-cn",
"step": "stepfun",
"stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee",
"arceeai": "arcee",
"minimax-china": "minimax-cn",
@@ -815,31 +661,6 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
return False
def _openrouter_model_supports_tools(item: Any) -> bool:
"""Return True when the model's ``supported_parameters`` advertise tool calling.
hermes-agent is tool-calling-first every provider path assumes the model
can invoke tools. Models that don't advertise ``tools`` in their
``supported_parameters`` (e.g. image-only or completion-only models) cannot
be driven by the agent loop and would fail at the first tool call.
**Permissive when the field is missing.** Some OpenRouter-compatible gateways
(Nous Portal, private mirrors, older catalog snapshots) don't populate
``supported_parameters`` at all. Treat that as "unknown capability → allow"
so the picker doesn't silently empty for those users. Only hide models
whose ``supported_parameters`` is an explicit list that omits ``tools``.
Ported from Kilo-Org/kilocode#9068.
"""
if not isinstance(item, dict):
return True
params = item.get("supported_parameters")
if not isinstance(params, list):
# Field absent / malformed / None — be permissive.
return True
return "tools" in params
def fetch_openrouter_models(
timeout: float = 8.0,
*,
@@ -882,11 +703,6 @@ def fetch_openrouter_models(
live_item = live_by_id.get(preferred_id)
if live_item is None:
continue
# Hide models that don't advertise tool-calling support — hermes-agent
# requires it and surfacing them leads to immediate runtime failures
# when the user selects them. Ported from Kilo-Org/kilocode#9068.
if not _openrouter_model_supports_tools(live_item):
continue
desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
curated.append((preferred_id, desc))
@@ -904,93 +720,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
def _ai_gateway_model_is_free(pricing: Any) -> bool:
"""Return True if an AI Gateway model has $0 input AND output pricing."""
if not isinstance(pricing, dict):
return False
try:
return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
except (TypeError, ValueError):
return False
def fetch_ai_gateway_models(
timeout: float = 8.0,
*,
force_refresh: bool = False,
) -> list[tuple[str, str]]:
"""Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
global _ai_gateway_catalog_cache
if _ai_gateway_catalog_cache is not None and not force_refresh:
return list(_ai_gateway_catalog_cache)
from hermes_constants import AI_GATEWAY_BASE_URL
fallback = list(VERCEL_AI_GATEWAY_MODELS)
preferred_ids = [mid for mid, _ in fallback]
try:
req = urllib.request.Request(
f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
payload = json.loads(resp.read().decode())
except Exception:
return list(_ai_gateway_catalog_cache or fallback)
live_items = payload.get("data", [])
if not isinstance(live_items, list):
return list(_ai_gateway_catalog_cache or fallback)
live_by_id: dict[str, dict[str, Any]] = {}
for item in live_items:
if not isinstance(item, dict):
continue
mid = str(item.get("id") or "").strip()
if not mid:
continue
live_by_id[mid] = item
curated: list[tuple[str, str]] = []
for preferred_id in preferred_ids:
live_item = live_by_id.get(preferred_id)
if live_item is None:
continue
desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
curated.append((preferred_id, desc))
if not curated:
return list(_ai_gateway_catalog_cache or fallback)
# If the live catalog offers a free Moonshot model, auto-promote it to
# position #1 as "recommended" — dynamic discovery without a PR.
free_moonshot = next(
(
mid
for mid, item in live_by_id.items()
if mid.startswith("moonshotai/")
and _ai_gateway_model_is_free(item.get("pricing"))
),
None,
)
if free_moonshot:
curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot]
curated.insert(0, (free_moonshot, "recommended"))
else:
first_id, _ = curated[0]
curated[0] = (first_id, "recommended")
_ai_gateway_catalog_cache = curated
return list(curated)
def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
"""Return just the AI Gateway model-id strings."""
return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
# ---------------------------------------------------------------------------
@@ -1135,56 +864,6 @@ def fetch_models_with_pricing(
return result
def fetch_ai_gateway_pricing(
timeout: float = 8.0,
*,
force_refresh: bool = False,
) -> dict[str, dict[str, str]]:
"""Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing.
Vercel uses ``input`` / ``output`` field names; hermes's picker expects
``prompt`` / ``completion``. This translates. Cache read/write field names
already match.
"""
from hermes_constants import AI_GATEWAY_BASE_URL
cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
if not force_refresh and cache_key in _pricing_cache:
return _pricing_cache[cache_key]
try:
req = urllib.request.Request(
f"{cache_key}/models",
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
payload = json.loads(resp.read().decode())
except Exception:
_pricing_cache[cache_key] = {}
return {}
result: dict[str, dict[str, str]] = {}
for item in payload.get("data", []):
if not isinstance(item, dict):
continue
mid = item.get("id")
pricing = item.get("pricing")
if not (mid and isinstance(pricing, dict)):
continue
entry: dict[str, str] = {
"prompt": str(pricing.get("input", "")),
"completion": str(pricing.get("output", "")),
}
if pricing.get("input_cache_read"):
entry["input_cache_read"] = str(pricing["input_cache_read"])
if pricing.get("input_cache_write"):
entry["input_cache_write"] = str(pricing["input_cache_write"])
result[mid] = entry
_pricing_cache[cache_key] = result
return result
def _resolve_openrouter_api_key() -> str:
"""Best-effort OpenRouter API key for pricing fetch."""
return os.getenv("OPENROUTER_API_KEY", "").strip()
@@ -1203,7 +882,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:
def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
"""Return live pricing for providers that support it (openrouter, nous, ai-gateway)."""
"""Return live pricing for providers that support it (openrouter, nous)."""
normalized = normalize_provider(provider)
if normalized == "openrouter":
return fetch_models_with_pricing(
@@ -1211,8 +890,6 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
base_url="https://openrouter.ai/api",
force_refresh=force_refresh,
)
if normalized == "ai-gateway":
return fetch_ai_gateway_pricing(force_refresh=force_refresh)
if normalized == "nous":
api_key, base_url = _resolve_nous_pricing_credentials()
if base_url:
@@ -1417,6 +1094,7 @@ def detect_provider_for_model(
from hermes_cli.auth import PROVIDER_REGISTRY
pconfig = PROVIDER_REGISTRY.get(direct_match)
if pconfig:
import os
for env_var in pconfig.api_key_env_vars:
if os.getenv(env_var, "").strip():
has_creds = True
@@ -1593,84 +1271,11 @@ def _resolve_copilot_catalog_api_key() -> str:
return ""
# Providers where models.dev is treated as authoritative: curated static
# lists are kept only as an offline fallback and to capture custom additions
# the registry doesn't publish yet. Adding a provider here causes its
# curated list to be merged with fresh models.dev entries (fresh first, any
# curated-only names appended) for both the CLI and the gateway /model picker.
#
# DELIBERATELY EXCLUDED:
# - "openrouter": curated list is already a hand-picked agentic subset of
# OpenRouter's 400+ catalog. Blindly merging would dump everything.
# - "nous": curated list and Portal /models endpoint are the source of
# truth for the subscription tier.
# Also excluded: providers that already have dedicated live-endpoint
# branches below (copilot, anthropic, ai-gateway, ollama-cloud, custom,
# stepfun, openai-codex) — those paths handle freshness themselves.
_MODELS_DEV_PREFERRED: frozenset[str] = frozenset({
"opencode-go",
"opencode-zen",
"deepseek",
"kilocode",
"fireworks",
"mistral",
"togetherai",
"cohere",
"perplexity",
"groq",
"nvidia",
"huggingface",
"zai",
"gemini",
"google",
})
def _merge_with_models_dev(provider: str, curated: list[str]) -> list[str]:
"""Merge curated list with fresh models.dev entries for a preferred provider.
Returns models.dev entries first (in models.dev order), then any
curated-only entries appended. Preserves case for curated fallbacks
(e.g. ``MiniMax-M2.7``) while trusting models.dev for newer variants.
If models.dev is unreachable or returns nothing, the curated list is
returned unchanged this is the offline/CI fallback path.
"""
try:
from agent.models_dev import list_agentic_models
mdev = list_agentic_models(provider)
except Exception:
mdev = []
if not mdev:
return list(curated)
# Case-insensitive dedup while preserving order and curated casing.
seen_lower: set[str] = set()
merged: list[str] = []
for mid in mdev:
key = str(mid).lower()
if key in seen_lower:
continue
seen_lower.add(key)
merged.append(mid)
for mid in curated:
key = str(mid).lower()
if key in seen_lower:
continue
seen_lower.add(key)
merged.append(mid)
return merged
def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
"""Return the best known model catalog for a provider.
Tries live API endpoints for providers that support them (Codex, Nous),
falling back to static lists. For providers in ``_MODELS_DEV_PREFERRED``
(opencode-go/zen, xiaomi, deepseek, smaller inference providers, etc.),
models.dev entries are merged on top of curated so new models released
on the platform appear in ``/model`` without a Hermes release.
falling back to static lists.
"""
normalized = normalize_provider(provider)
if normalized == "openrouter":
@@ -1678,19 +1283,7 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
if normalized == "openai-codex":
from hermes_cli.codex_models import get_codex_model_ids
# Pass the live OAuth access token so the picker matches whatever
# ChatGPT lists for this account right now (new models appear without
# a Hermes release). Falls back to the hardcoded catalog if no token
# or the endpoint is unreachable.
access_token = None
try:
from hermes_cli.auth import resolve_codex_runtime_credentials
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
access_token = creds.get("api_key")
except Exception:
access_token = None
return get_codex_model_ids(access_token=access_token)
return get_codex_model_ids()
if normalized in {"copilot", "copilot-acp"}:
try:
live = _fetch_github_models(_resolve_copilot_catalog_api_key())
@@ -1711,19 +1304,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
return live
except Exception:
pass
if normalized == "stepfun":
try:
from hermes_cli.auth import resolve_api_key_provider_credentials
creds = resolve_api_key_provider_credentials("stepfun")
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip()
if api_key and base_url:
live = fetch_api_models(api_key, base_url)
if live:
return live
except Exception:
pass
if normalized == "anthropic":
live = _fetch_anthropic_models()
if live:
@@ -1748,10 +1328,7 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
live = fetch_api_models(api_key, base_url)
if live:
return live
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
if normalized in _MODELS_DEV_PREFERRED:
return _merge_with_models_dev(normalized, curated_static)
return curated_static
return list(_PROVIDER_MODELS.get(normalized, []))
def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
@@ -2192,7 +1769,7 @@ def probe_api_models(
candidates.append((alternate_base, True))
tried: list[str] = []
headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT}
headers: dict[str, str] = {}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
if normalized.startswith(COPILOT_BASE_URL):
@@ -2654,70 +2231,13 @@ def validate_requested_model(
except Exception:
pass # Fall through to generic warning
# Static-catalog fallback: when the /models probe was unreachable,
# validate against the curated list from provider_model_ids() — same
# pattern as the openai-codex and minimax branches above. This fixes
# /model switches in the gateway for providers like opencode-go and
# opencode-zen whose /models endpoint returns 404 against the HTML
# marketing site. Without this block, validate_requested_model would
# reject every model on such providers, switch_model() would return
# success=False, and the gateway would never write to
# _session_model_overrides.
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
try:
catalog_models = provider_model_ids(normalized)
except Exception:
catalog_models = []
if catalog_models:
catalog_lower = {m.lower(): m for m in catalog_models}
if requested_for_lookup.lower() in catalog_lower:
return {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
catalog_lower_list = list(catalog_lower.keys())
auto = get_close_matches(
requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
)
if auto:
corrected = catalog_lower[auto[0]]
return {
"accepted": True,
"persist": True,
"recognized": True,
"corrected_model": corrected,
"message": f"Auto-corrected `{requested}` → `{corrected}`",
}
suggestions = get_close_matches(
requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(
f"`{catalog_lower[s]}`" for s in suggestions
)
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in the {provider_label} curated catalog "
f"and the /models endpoint was unreachable.{suggestion_text}"
f"\n The model may still work if it exists on the provider."
),
}
# No catalog available — accept with a warning, matching the comment's
# stated intent ("Accept and persist, but warn").
return {
"accepted": True,
"persist": True,
"accepted": False,
"persist": False,
"recognized": False,
"message": (
f"Note: could not reach the {provider_label} API to validate `{requested}`. "
f"Could not reach the {provider_label} API to validate `{requested}`. "
f"If the service isn't down, this model may not be valid."
),
}
+4 -4
View File
@@ -10,7 +10,6 @@ from hermes_cli.auth import get_nous_auth_status
from hermes_cli.config import get_env_value, load_config
from tools.managed_tool_gateway import is_managed_tool_gateway_ready
from tools.tool_backend_helpers import (
fal_key_is_configured,
has_direct_modal_credentials,
managed_nous_tools_enabled,
normalize_browser_cloud_provider,
@@ -272,7 +271,7 @@ def get_nous_subscription_features(
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
direct_fal = fal_key_is_configured()
direct_fal = bool(get_env_value("FAL_KEY"))
direct_openai_tts = bool(resolve_openai_audio_api_key())
direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@@ -521,7 +520,7 @@ def apply_nous_managed_defaults(
browser_cfg["cloud_provider"] = "browser-use"
changed.add("browser")
if "image_gen" in selected_toolsets and not fal_key_is_configured():
if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
changed.add("image_gen")
return changed
@@ -549,7 +548,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
or get_env_value("TAVILY_API_KEY")
or get_env_value("EXA_API_KEY")
),
"image_gen": fal_key_is_configured(),
"image_gen": bool(get_env_value("FAL_KEY")),
"tts": bool(
resolve_openai_audio_api_key()
or get_env_value("ELEVENLABS_API_KEY")
@@ -587,6 +586,7 @@ def get_gateway_eligible_tools(
return [], [], []
if config is None:
from hermes_cli.config import load_config
config = load_config() or {}
# Quick provider check without the heavy get_nous_subscription_features call
+3 -3
View File
@@ -44,7 +44,7 @@ def _cmd_list(store):
for p in pending:
print(
f" {p['platform']:<12} {p['code']:<10} {p['user_id']:<20} "
f"{(p.get('user_name') or ''):<20} {p['age_minutes']}m ago"
f"{p.get('user_name', ''):<20} {p['age_minutes']}m ago"
)
else:
print("\n No pending pairing requests.")
@@ -54,7 +54,7 @@ def _cmd_list(store):
print(f" {'Platform':<12} {'User ID':<20} {'Name':<20}")
print(f" {'--------':<12} {'-------':<20} {'----':<20}")
for a in approved:
print(f" {a['platform']:<12} {a['user_id']:<20} {(a.get('user_name') or ''):<20}")
print(f" {a['platform']:<12} {a['user_id']:<20} {a.get('user_name', ''):<20}")
else:
print("\n No approved users.")
@@ -69,7 +69,7 @@ def _cmd_approve(store, platform: str, code: str):
result = store.approve_code(platform, code)
if result:
uid = result["user_id"]
name = result.get("user_name") or ""
name = result.get("user_name", "")
display = f"{name} ({uid})" if name else uid
print(f"\n Approved! User {display} on {platform} can now use the bot~")
print(" They'll be recognized automatically on their next message.\n")
-1
View File
@@ -38,7 +38,6 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
])
+50 -379
View File
@@ -2,20 +2,14 @@
Hermes Plugin System
====================
Discovers, loads, and manages plugins from four sources:
Discovers, loads, and manages plugins from three sources:
1. **Bundled plugins** ``<repo>/plugins/<name>/`` (shipped with hermes-agent;
``memory/`` and ``context_engine/`` subdirs are excluded they have their
own discovery paths)
2. **User plugins** ``~/.hermes/plugins/<name>/``
3. **Project plugins** ``./.hermes/plugins/<name>/`` (opt-in via
1. **User plugins** ``~/.hermes/plugins/<name>/``
2. **Project plugins** ``./.hermes/plugins/<name>/`` (opt-in via
``HERMES_ENABLE_PROJECT_PLUGINS``)
4. **Pip plugins** packages that expose the ``hermes_agent.plugins``
3. **Pip plugins** packages that expose the ``hermes_agent.plugins``
entry-point group.
Later sources override earlier ones on name collision, so a user or project
plugin with the same name as a bundled plugin replaces it.
Each directory plugin must contain a ``plugin.yaml`` manifest **and** an
``__init__.py`` with a ``register(ctx)`` function.
@@ -70,7 +64,6 @@ VALID_HOOKS: Set[str] = {
"on_session_end",
"on_session_finalize",
"on_session_reset",
"subagent_stop",
}
ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -84,12 +77,7 @@ def _env_enabled(name: str) -> bool:
def _get_disabled_plugins() -> set:
"""Read the disabled plugins list from config.yaml.
Kept for backward compat and explicit deny-list semantics. A plugin
name in this set will never load, even if it appears in
``plugins.enabled``.
"""
"""Read the disabled plugins list from config.yaml."""
try:
from hermes_cli.config import load_config
config = load_config()
@@ -99,43 +87,10 @@ def _get_disabled_plugins() -> set:
return set()
def _get_enabled_plugins() -> Optional[set]:
"""Read the enabled-plugins allow-list from config.yaml.
Plugins are opt-in by default only plugins whose name appears in
this set are loaded. Returns:
* ``None`` the key is missing or malformed. Callers should treat
this as "nothing enabled yet" (the opt-in default); the first
``migrate_config`` run populates the key with a grandfathered set
of currently-installed user plugins so existing setups don't
break on upgrade.
* ``set()`` an empty list was explicitly set; nothing loads.
* ``set(...)`` the concrete allow-list.
"""
try:
from hermes_cli.config import load_config
config = load_config()
plugins_cfg = config.get("plugins")
if not isinstance(plugins_cfg, dict):
return None
if "enabled" not in plugins_cfg:
return None
enabled = plugins_cfg.get("enabled")
if not isinstance(enabled, list):
return None
return set(enabled)
except Exception:
return None
# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------
_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
@dataclass
class PluginManifest:
"""Parsed representation of a plugin.yaml manifest."""
@@ -149,23 +104,6 @@ class PluginManifest:
provides_hooks: List[str] = field(default_factory=list)
source: str = "" # "user", "project", or "entrypoint"
path: Optional[str] = None
# Plugin kind — see plugins.py module docstring for semantics.
# ``standalone`` (default): hooks/tools of its own; opt-in via
# ``plugins.enabled``.
# ``backend``: pluggable backend for an existing core tool (e.g.
# image_gen). Built-in (bundled) backends auto-load;
# user-installed still gated by ``plugins.enabled``.
# ``exclusive``: category with exactly one active provider (memory).
# Selection via ``<category>.provider`` config key; the
# category's own discovery system handles loading and the
# general scanner skips these.
kind: str = "standalone"
# Registry key — path-derived, used by ``plugins.enabled``/``disabled``
# lookups and by ``hermes plugins list``. For a flat plugin at
# ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
# category plugin at ``plugins/image_gen/openai/`` the key is
# ``image_gen/openai``. When empty, falls back to ``name``.
key: str = ""
@dataclass
@@ -283,7 +221,6 @@ class PluginContext:
name: str,
handler: Callable,
description: str = "",
args_hint: str = "",
) -> None:
"""Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.
@@ -294,13 +231,6 @@ class PluginContext:
terminal commands), this registers in-session slash commands that users
invoke during a conversation.
``args_hint`` is an optional short string (e.g. ``"<file>"`` or
``"dias:7 formato:json"``) used by gateway adapters to surface the
command with an argument field for example Discord's native slash
command picker. Plugin commands without ``args_hint`` register as
parameterless in Discord and still accept trailing text when invoked
as free-form chat.
Names conflicting with built-in commands are rejected with a warning.
"""
clean = name.lower().strip().lstrip("/").replace(" ", "-")
@@ -328,7 +258,6 @@ class PluginContext:
"handler": handler,
"description": description or "Plugin command",
"plugin": self.manifest.name,
"args_hint": (args_hint or "").strip(),
}
logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)
@@ -395,33 +324,6 @@ class PluginContext:
self.manifest.name, engine.name,
)
# -- image gen provider registration ------------------------------------
def register_image_gen_provider(self, provider) -> None:
"""Register an image generation backend.
``provider`` must be an instance of
:class:`agent.image_gen_provider.ImageGenProvider`. The
``provider.name`` attribute is what ``image_gen.provider`` in
``config.yaml`` matches against when routing ``image_generate``
tool calls.
"""
from agent.image_gen_provider import ImageGenProvider
from agent.image_gen_registry import register_provider
if not isinstance(provider, ImageGenProvider):
logger.warning(
"Plugin '%s' tried to register an image_gen provider that does "
"not inherit from ImageGenProvider. Ignoring.",
self.manifest.name,
)
return
register_provider(provider)
logger.info(
"Plugin '%s' registered image_gen provider: %s",
self.manifest.name, provider.name,
)
# -- hook registration --------------------------------------------------
def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -512,124 +414,34 @@ class PluginManager:
# Public
# -----------------------------------------------------------------------
def discover_and_load(self, force: bool = False) -> None:
"""Scan all plugin sources and load each plugin found.
When ``force`` is true, clear cached discovery state first so config
changes or newly-added bundled backends become visible in long-lived
sessions without requiring a full agent restart.
"""
if self._discovered and not force:
def discover_and_load(self) -> None:
"""Scan all plugin sources and load each plugin found."""
if self._discovered:
return
if force:
self._plugins.clear()
self._hooks.clear()
self._plugin_tool_names.clear()
self._cli_commands.clear()
self._plugin_commands.clear()
self._plugin_skills.clear()
self._context_engine = None
self._discovered = True
manifests: List[PluginManifest] = []
# 1. Bundled plugins (<repo>/plugins/<name>/)
#
# Repo-shipped plugins live next to hermes_cli/. Two layouts are
# supported (see ``_scan_directory`` for details):
#
# - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
# - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
#
# ``memory/`` and ``context_engine/`` are skipped at the top level —
# they have their own discovery systems. Porting those to the
# category-namespace ``kind: exclusive`` model is a future PR.
repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
manifests.extend(
self._scan_directory(
repo_plugins,
source="bundled",
skip_names={"memory", "context_engine"},
)
)
# 2. User plugins (~/.hermes/plugins/)
# 1. User plugins (~/.hermes/plugins/)
user_dir = get_hermes_home() / "plugins"
manifests.extend(self._scan_directory(user_dir, source="user"))
# 3. Project plugins (./.hermes/plugins/)
# 2. Project plugins (./.hermes/plugins/)
if _env_enabled("HERMES_ENABLE_PROJECT_PLUGINS"):
project_dir = Path.cwd() / ".hermes" / "plugins"
manifests.extend(self._scan_directory(project_dir, source="project"))
# 4. Pip / entry-point plugins
# 3. Pip / entry-point plugins
manifests.extend(self._scan_entry_points())
# Load each manifest (skip user-disabled plugins).
# Later sources override earlier ones on key collision — user
# plugins take precedence over bundled, project plugins take
# precedence over user. Dedup here so we only load the final
# winner. Keys are path-derived (``image_gen/openai``,
# ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
# don't collide even when both manifests say ``name: openai``.
# Load each manifest (skip user-disabled plugins)
disabled = _get_disabled_plugins()
enabled = _get_enabled_plugins() # None = opt-in default (nothing enabled)
winners: Dict[str, PluginManifest] = {}
for manifest in manifests:
winners[manifest.key or manifest.name] = manifest
for manifest in winners.values():
lookup_key = manifest.key or manifest.name
# Explicit disable always wins (matches on key or on legacy
# bare name for back-compat with existing user configs).
if lookup_key in disabled or manifest.name in disabled:
if manifest.name in disabled:
loaded = LoadedPlugin(manifest=manifest, enabled=False)
loaded.error = "disabled via config"
self._plugins[lookup_key] = loaded
logger.debug("Skipping disabled plugin '%s'", lookup_key)
continue
# Exclusive plugins (memory providers) have their own
# discovery/activation path. The general loader records the
# manifest for introspection but does not load the module.
if manifest.kind == "exclusive":
loaded = LoadedPlugin(manifest=manifest, enabled=False)
loaded.error = (
"exclusive plugin — activate via <category>.provider config"
)
self._plugins[lookup_key] = loaded
logger.debug(
"Skipping '%s' (exclusive, handled by category discovery)",
lookup_key,
)
continue
# Built-in backends auto-load — they ship with hermes and must
# just work. Selection among them (e.g. which image_gen backend
# services calls) is driven by ``<category>.provider`` config,
# enforced by the tool wrapper.
if manifest.kind == "backend" and manifest.source == "bundled":
self._load_plugin(manifest)
continue
# Everything else (standalone, user-installed backends,
# entry-point plugins) is opt-in via plugins.enabled.
# Accept both the path-derived key and the legacy bare name
# so existing configs keep working.
is_enabled = (
enabled is not None
and (lookup_key in enabled or manifest.name in enabled)
)
if not is_enabled:
loaded = LoadedPlugin(manifest=manifest, enabled=False)
loaded.error = (
"not enabled in config (run `hermes plugins enable {}` to activate)"
.format(lookup_key)
)
self._plugins[lookup_key] = loaded
logger.debug(
"Skipping '%s' (not in plugins.enabled)", lookup_key
)
self._plugins[manifest.name] = loaded
logger.debug("Skipping disabled plugin '%s'", manifest.name)
continue
self._load_plugin(manifest)
@@ -644,46 +456,8 @@ class PluginManager:
# Directory scanning
# -----------------------------------------------------------------------
def _scan_directory(
self,
path: Path,
source: str,
skip_names: Optional[Set[str]] = None,
) -> List[PluginManifest]:
"""Read ``plugin.yaml`` manifests from subdirectories of *path*.
Supports two layouts, mixed freely:
* **Flat** ``<root>/<plugin-name>/plugin.yaml``. Key is
``<plugin-name>`` (e.g. ``disk-cleanup``).
* **Category** ``<root>/<category>/<plugin-name>/plugin.yaml``,
where the ``<category>`` directory itself has no ``plugin.yaml``.
Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
Depth is capped at two segments.
*skip_names* is an optional allow-list of names to ignore at the
top level (kept for back-compat; the current call sites no longer
pass it now that categories are first-class).
"""
return self._scan_directory_level(
path, source, skip_names=skip_names, prefix="", depth=0
)
def _scan_directory_level(
self,
path: Path,
source: str,
*,
skip_names: Optional[Set[str]],
prefix: str,
depth: int,
) -> List[PluginManifest]:
"""Recursive implementation of :meth:`_scan_directory`.
``prefix`` is the category path already accumulated ("" at root,
"image_gen" one level in). ``depth`` is the recursion depth; we
cap at 2 so ``<root>/a/b/c/`` is ignored.
"""
def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]:
"""Read ``plugin.yaml`` manifests from subdirectories of *path*."""
manifests: List[PluginManifest] = []
if not path.is_dir():
return manifests
@@ -691,112 +465,35 @@ class PluginManager:
for child in sorted(path.iterdir()):
if not child.is_dir():
continue
if depth == 0 and skip_names and child.name in skip_names:
continue
manifest_file = child / "plugin.yaml"
if not manifest_file.exists():
manifest_file = child / "plugin.yml"
if manifest_file.exists():
manifest = self._parse_manifest(
manifest_file, child, source, prefix
)
if manifest is not None:
manifests.append(manifest)
if not manifest_file.exists():
logger.debug("Skipping %s (no plugin.yaml)", child)
continue
# No manifest at this level. If we're still within the depth
# cap, treat this directory as a category namespace and recurse
# one level in looking for children with manifests.
if depth >= 1:
logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
continue
sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
manifests.extend(
self._scan_directory_level(
child,
source,
skip_names=None,
prefix=sub_prefix,
depth=depth + 1,
try:
if yaml is None:
logger.warning("PyYAML not installed cannot load %s", manifest_file)
continue
data = yaml.safe_load(manifest_file.read_text()) or {}
manifest = PluginManifest(
name=data.get("name", child.name),
version=str(data.get("version", "")),
description=data.get("description", ""),
author=data.get("author", ""),
requires_env=data.get("requires_env", []),
provides_tools=data.get("provides_tools", []),
provides_hooks=data.get("provides_hooks", []),
source=source,
path=str(child),
)
)
manifests.append(manifest)
except Exception as exc:
logger.warning("Failed to parse %s: %s", manifest_file, exc)
return manifests
def _parse_manifest(
self,
manifest_file: Path,
plugin_dir: Path,
source: str,
prefix: str,
) -> Optional[PluginManifest]:
"""Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
Returns ``None`` on parse failure (logs a warning).
"""
try:
if yaml is None:
logger.warning("PyYAML not installed cannot load %s", manifest_file)
return None
data = yaml.safe_load(manifest_file.read_text()) or {}
name = data.get("name", plugin_dir.name)
key = f"{prefix}/{plugin_dir.name}" if prefix else name
raw_kind = data.get("kind", "standalone")
if not isinstance(raw_kind, str):
raw_kind = "standalone"
kind = raw_kind.strip().lower()
if kind not in _VALID_PLUGIN_KINDS:
logger.warning(
"Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
)
kind = "standalone"
# Auto-coerce user-installed memory providers to kind="exclusive"
# so they're routed to plugins/memory discovery instead of being
# loaded by the general PluginManager (which has no
# register_memory_provider on PluginContext). Mirrors the
# heuristic in plugins/memory/__init__.py:_is_memory_provider_dir.
# Bundled memory providers are already skipped via skip_names.
if kind == "standalone" and "kind" not in data:
init_file = plugin_dir / "__init__.py"
if init_file.exists():
try:
source_text = init_file.read_text(errors="replace")[:8192]
if (
"register_memory_provider" in source_text
or "MemoryProvider" in source_text
):
kind = "exclusive"
logger.debug(
"Plugin %s: detected memory provider, "
"treating as kind='exclusive'",
key,
)
except Exception:
pass
return PluginManifest(
name=name,
version=str(data.get("version", "")),
description=data.get("description", ""),
author=data.get("author", ""),
requires_env=data.get("requires_env", []),
provides_tools=data.get("provides_tools", []),
provides_hooks=data.get("provides_hooks", []),
source=source,
path=str(plugin_dir),
kind=kind,
key=key,
)
except Exception as exc:
logger.warning("Failed to parse %s: %s", manifest_file, exc)
return None
# -----------------------------------------------------------------------
# Entry-point scanning
# -----------------------------------------------------------------------
@@ -819,7 +516,6 @@ class PluginManager:
name=ep.name,
source="entrypoint",
path=ep.value,
key=ep.name,
)
manifests.append(manifest)
except Exception as exc:
@@ -836,7 +532,7 @@ class PluginManager:
loaded = LoadedPlugin(manifest=manifest)
try:
if manifest.source in ("user", "project", "bundled"):
if manifest.source in ("user", "project"):
module = self._load_directory_module(manifest)
else:
module = self._load_entrypoint_module(manifest)
@@ -881,16 +577,10 @@ class PluginManager:
loaded.error = str(exc)
logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)
self._plugins[manifest.key or manifest.name] = loaded
self._plugins[manifest.name] = loaded
def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
"""Import a directory-based plugin as ``hermes_plugins.<slug>``.
The module slug is derived from ``manifest.key`` so category-namespaced
plugins (``image_gen/openai``) import as
``hermes_plugins.image_gen__openai`` without colliding with any
future ``tts/openai``.
"""
"""Import a directory-based plugin as ``hermes_plugins.<name>``."""
plugin_dir = Path(manifest.path) # type: ignore[arg-type]
init_file = plugin_dir / "__init__.py"
if not init_file.exists():
@@ -903,9 +593,7 @@ class PluginManager:
ns_pkg.__package__ = _NS_PARENT
sys.modules[_NS_PARENT] = ns_pkg
key = manifest.key or manifest.name
slug = key.replace("/", "__").replace("-", "_")
module_name = f"{_NS_PARENT}.{slug}"
module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
spec = importlib.util.spec_from_file_location(
module_name,
init_file,
@@ -986,12 +674,10 @@ class PluginManager:
def list_plugins(self) -> List[Dict[str, Any]]:
"""Return a list of info dicts for all discovered plugins."""
result: List[Dict[str, Any]] = []
for key, loaded in sorted(self._plugins.items()):
for name, loaded in sorted(self._plugins.items()):
result.append(
{
"name": loaded.manifest.name,
"key": loaded.manifest.key or loaded.manifest.name,
"kind": loaded.manifest.kind,
"name": name,
"version": loaded.manifest.version,
"description": loaded.manifest.description,
"source": loaded.manifest.source,
@@ -1042,13 +728,9 @@ def get_plugin_manager() -> PluginManager:
return _plugin_manager
def discover_plugins(force: bool = False) -> None:
"""Discover and load all plugins.
Default behavior is idempotent. Pass ``force=True`` to rescan plugin
manifests and reload state in the current process.
"""
get_plugin_manager().discover_and_load(force=force)
def discover_plugins() -> None:
"""Discover and load all plugins (idempotent)."""
get_plugin_manager().discover_and_load()
def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
@@ -1099,34 +781,23 @@ def get_pre_tool_call_block_message(
return None
def _ensure_plugins_discovered(force: bool = False) -> PluginManager:
"""Return the global manager after ensuring plugin discovery has run.
Pass ``force=True`` to rescan in the current process.
"""
manager = get_plugin_manager()
manager.discover_and_load(force=force)
return manager
def get_plugin_context_engine():
"""Return the plugin-registered context engine, or None."""
return _ensure_plugins_discovered()._context_engine
return get_plugin_manager()._context_engine
def get_plugin_command_handler(name: str) -> Optional[Callable]:
"""Return the handler for a plugin-registered slash command, or ``None``."""
entry = _ensure_plugins_discovered()._plugin_commands.get(name)
entry = get_plugin_manager()._plugin_commands.get(name)
return entry["handler"] if entry else None
def get_plugin_commands() -> Dict[str, dict]:
"""Return the full plugin commands dict (name → {handler, description, plugin}).
Triggers idempotent plugin discovery so callers can use plugin commands
before any explicit discover_plugins() call.
Safe to call before discovery returns an empty dict if no plugins loaded.
"""
return _ensure_plugins_discovered()._plugin_commands
return get_plugin_manager()._plugin_commands
def get_plugin_toolsets() -> List[tuple]:
+93 -245
View File
@@ -15,7 +15,6 @@ import shutil
import subprocess
import sys
from pathlib import Path
from typing import Optional
from hermes_constants import get_hermes_home
@@ -282,16 +281,8 @@ def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
# ---------------------------------------------------------------------------
def cmd_install(
identifier: str,
force: bool = False,
enable: Optional[bool] = None,
) -> None:
"""Install a plugin from a Git URL or owner/repo shorthand.
After install, prompt "Enable now? [y/N]" unless *enable* is provided
(True = auto-enable without prompting, False = install disabled).
"""
def cmd_install(identifier: str, force: bool = False) -> None:
"""Install a plugin from a Git URL or owner/repo shorthand."""
import tempfile
from rich.console import Console
@@ -400,40 +391,6 @@ def cmd_install(
_display_after_install(target, identifier)
# Determine the canonical plugin name for enable-list bookkeeping.
installed_name = installed_manifest.get("name") or target.name
# Decide whether to enable: explicit flag > interactive prompt > default off
should_enable = enable
if should_enable is None:
# Interactive prompt unless stdin isn't a TTY (scripted install).
if sys.stdin.isatty() and sys.stdout.isatty():
try:
answer = input(
f" Enable '{installed_name}' now? [y/N]: "
).strip().lower()
should_enable = answer in ("y", "yes")
except (EOFError, KeyboardInterrupt):
should_enable = False
else:
should_enable = False
if should_enable:
enabled = _get_enabled_set()
disabled = _get_disabled_set()
enabled.add(installed_name)
disabled.discard(installed_name)
_save_enabled_set(enabled)
_save_disabled_set(disabled)
console.print(
f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled."
)
else:
console.print(
f"[dim]Plugin installed but not enabled. "
f"Run `hermes plugins enable {installed_name}` to activate.[/dim]"
)
console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]")
console.print("[dim] hermes gateway restart[/dim]")
console.print()
@@ -511,11 +468,7 @@ def cmd_remove(name: str) -> None:
def _get_disabled_set() -> set:
"""Read the disabled plugins set from config.yaml.
An explicit deny-list. A plugin name here never loads, even if also
listed in ``plugins.enabled``.
"""
"""Read the disabled plugins set from config.yaml."""
try:
from hermes_cli.config import load_config
config = load_config()
@@ -535,196 +488,103 @@ def _save_disabled_set(disabled: set) -> None:
save_config(config)
def _get_enabled_set() -> set:
"""Read the enabled plugins allow-list from config.yaml.
Plugins are opt-in: only names here are loaded. Returns ``set()`` if
the key is missing (same behaviour as "nothing enabled yet").
"""
try:
from hermes_cli.config import load_config
config = load_config()
plugins_cfg = config.get("plugins", {})
if not isinstance(plugins_cfg, dict):
return set()
enabled = plugins_cfg.get("enabled", [])
return set(enabled) if isinstance(enabled, list) else set()
except Exception:
return set()
def _save_enabled_set(enabled: set) -> None:
"""Write the enabled plugins list to config.yaml."""
from hermes_cli.config import load_config, save_config
config = load_config()
if "plugins" not in config:
config["plugins"] = {}
config["plugins"]["enabled"] = sorted(enabled)
save_config(config)
def cmd_enable(name: str) -> None:
"""Add a plugin to the enabled allow-list (and remove it from disabled)."""
"""Enable a previously disabled plugin."""
from rich.console import Console
console = Console()
# Discover the plugin — check installed (user) AND bundled.
if not _plugin_exists(name):
console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]")
plugins_dir = _plugins_dir()
# Verify the plugin exists
target = plugins_dir / name
if not target.is_dir():
console.print(f"[red]Plugin '{name}' is not installed.[/red]")
sys.exit(1)
enabled = _get_enabled_set()
disabled = _get_disabled_set()
if name in enabled and name not in disabled:
if name not in disabled:
console.print(f"[dim]Plugin '{name}' is already enabled.[/dim]")
return
enabled.add(name)
disabled.discard(name)
_save_enabled_set(enabled)
_save_disabled_set(disabled)
console.print(
f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. "
"Takes effect on next session."
)
console.print(f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. Takes effect on next session.")
def cmd_disable(name: str) -> None:
"""Remove a plugin from the enabled allow-list (and add to disabled)."""
"""Disable a plugin without removing it."""
from rich.console import Console
console = Console()
if not _plugin_exists(name):
console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]")
plugins_dir = _plugins_dir()
# Verify the plugin exists
target = plugins_dir / name
if not target.is_dir():
console.print(f"[red]Plugin '{name}' is not installed.[/red]")
sys.exit(1)
enabled = _get_enabled_set()
disabled = _get_disabled_set()
if name not in enabled and name in disabled:
if name in disabled:
console.print(f"[dim]Plugin '{name}' is already disabled.[/dim]")
return
enabled.discard(name)
disabled.add(name)
_save_enabled_set(enabled)
_save_disabled_set(disabled)
console.print(
f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. "
"Takes effect on next session."
)
console.print(f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.")
def _plugin_exists(name: str) -> bool:
"""Return True if a plugin with *name* is installed (user) or bundled."""
# Installed: directory name or manifest name match in user plugins dir
user_dir = _plugins_dir()
if user_dir.is_dir():
if (user_dir / name).is_dir():
return True
for child in user_dir.iterdir():
if not child.is_dir():
continue
manifest = _read_manifest(child)
if manifest.get("name") == name:
return True
# Bundled: <repo>/plugins/<name>/
from pathlib import Path as _P
import hermes_cli
repo_plugins = _P(hermes_cli.__file__).resolve().parent.parent / "plugins"
if repo_plugins.is_dir():
candidate = repo_plugins / name
if candidate.is_dir() and (
(candidate / "plugin.yaml").exists()
or (candidate / "plugin.yml").exists()
):
return True
return False
def cmd_list() -> None:
"""List installed plugins."""
from rich.console import Console
from rich.table import Table
def _discover_all_plugins() -> list:
"""Return a list of (name, version, description, source, dir_path) for
every plugin the loader can see user + bundled + project.
Matches the ordering/dedup of ``PluginManager.discover_and_load``:
bundled first, then user, then project; user overrides bundled on
name collision.
"""
try:
import yaml
except ImportError:
yaml = None
seen: dict = {} # name -> (name, version, description, source, path)
# Bundled (<repo>/plugins/<name>/), excluding memory/ and context_engine/
import hermes_cli
repo_plugins = Path(hermes_cli.__file__).resolve().parent.parent / "plugins"
for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")):
if not base.is_dir():
continue
for d in sorted(base.iterdir()):
if not d.is_dir():
continue
if source == "bundled" and d.name in ("memory", "context_engine"):
continue
manifest_file = d / "plugin.yaml"
if not manifest_file.exists():
manifest_file = d / "plugin.yml"
if not manifest_file.exists():
continue
name = d.name
version = ""
description = ""
if yaml:
try:
with open(manifest_file) as f:
manifest = yaml.safe_load(f) or {}
name = manifest.get("name", d.name)
version = manifest.get("version", "")
description = manifest.get("description", "")
except Exception:
pass
# User plugins override bundled on name collision.
if name in seen and source == "bundled":
continue
src_label = source
if source == "user" and (d / ".git").exists():
src_label = "git"
seen[name] = (name, version, description, src_label, d)
return list(seen.values())
def cmd_list() -> None:
"""List all plugins (bundled + user) with enabled/disabled state."""
from rich.console import Console
from rich.table import Table
console = Console()
entries = _discover_all_plugins()
if not entries:
plugins_dir = _plugins_dir()
dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
if not dirs:
console.print("[dim]No plugins installed.[/dim]")
console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
return
enabled = _get_enabled_set()
disabled = _get_disabled_set()
table = Table(title="Plugins", show_lines=False)
table = Table(title="Installed Plugins", show_lines=False)
table.add_column("Name", style="bold")
table.add_column("Status")
table.add_column("Version", style="dim")
table.add_column("Description")
table.add_column("Source", style="dim")
for name, version, description, source, _dir in entries:
if name in disabled:
status = "[red]disabled[/red]"
elif name in enabled:
status = "[green]enabled[/green]"
else:
status = "[yellow]not enabled[/yellow]"
for d in dirs:
manifest_file = d / "plugin.yaml"
name = d.name
version = ""
description = ""
source = "local"
if manifest_file.exists() and yaml:
try:
with open(manifest_file) as f:
manifest = yaml.safe_load(f) or {}
name = manifest.get("name", d.name)
version = manifest.get("version", "")
description = manifest.get("description", "")
except Exception:
pass
# Check if it's a git repo (installed via hermes plugins install)
if (d / ".git").exists():
source = "git"
is_disabled = name in disabled or d.name in disabled
status = "[red]disabled[/red]" if is_disabled else "[green]enabled[/green]"
table.add_row(name, status, str(version), description, source)
console.print()
@@ -732,7 +592,6 @@ def cmd_list() -> None:
console.print()
console.print("[dim]Interactive toggle:[/dim] hermes plugins")
console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable <name>")
console.print("[dim]Plugins are opt-in by default — only 'enabled' plugins load.[/dim]")
# ---------------------------------------------------------------------------
@@ -883,25 +742,41 @@ def cmd_toggle() -> None:
"""Interactive composite UI — general plugins + provider plugin categories."""
from rich.console import Console
console = Console()
try:
import yaml
except ImportError:
yaml = None
# -- General plugins discovery (bundled + user) --
entries = _discover_all_plugins()
enabled_set = _get_enabled_set()
disabled_set = _get_disabled_set()
console = Console()
plugins_dir = _plugins_dir()
# -- General plugins discovery --
dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
disabled = _get_disabled_set()
plugin_names = []
plugin_labels = []
plugin_selected = set()
for i, (name, _version, description, source, _d) in enumerate(entries):
label = f"{name} \u2014 {description}" if description else name
if source == "bundled":
label = f"{label} [bundled]"
for i, d in enumerate(dirs):
manifest_file = d / "plugin.yaml"
name = d.name
description = ""
if manifest_file.exists() and yaml:
try:
with open(manifest_file) as f:
manifest = yaml.safe_load(f) or {}
name = manifest.get("name", d.name)
description = manifest.get("description", "")
except Exception:
pass
plugin_names.append(name)
label = f"{name} \u2014 {description}" if description else name
plugin_labels.append(label)
# Selected (enabled) when in enabled-set AND not in disabled-set
if name in enabled_set and name not in disabled_set:
if name not in disabled and d.name not in disabled:
plugin_selected.add(i)
# -- Provider categories --
@@ -929,10 +804,10 @@ def cmd_toggle() -> None:
try:
import curses
_run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
disabled_set, categories, console)
disabled, categories, console)
except ImportError:
_run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
disabled_set, categories, console)
disabled, categories, console)
def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
@@ -1145,29 +1020,18 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
curses.wrapper(_draw)
flush_stdin()
# Persist general plugin changes. The new allow-list is the set of
# plugin names that were checked; anything not checked is explicitly
# disabled (written to disabled-list) so it remains off even if the
# plugin code does something clever like auto-enable in the future.
new_enabled: set = set()
new_disabled: set = set(disabled) # preserve existing disabled state for unseen plugins
# Persist general plugin changes
new_disabled = set()
for i, name in enumerate(plugin_names):
if i in chosen:
new_enabled.add(name)
new_disabled.discard(name)
else:
if i not in chosen:
new_disabled.add(name)
prev_enabled = _get_enabled_set()
enabled_changed = new_enabled != prev_enabled
disabled_changed = new_disabled != disabled
if enabled_changed or disabled_changed:
_save_enabled_set(new_enabled)
if new_disabled != disabled:
_save_disabled_set(new_disabled)
enabled_count = len(plugin_names) - len(new_disabled)
console.print(
f"\n[green]\u2713[/green] General plugins: {len(new_enabled)} enabled, "
f"{len(plugin_names) - len(new_enabled)} disabled."
f"\n[green]\u2713[/green] General plugins: {enabled_count} enabled, "
f"{len(new_disabled)} disabled."
)
elif n_plugins > 0:
console.print("\n[dim]General plugins unchanged.[/dim]")
@@ -1214,17 +1078,11 @@ def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
return
print()
new_enabled: set = set()
new_disabled: set = set(disabled)
new_disabled = set()
for i, name in enumerate(plugin_names):
if i in chosen:
new_enabled.add(name)
new_disabled.discard(name)
else:
if i not in chosen:
new_disabled.add(name)
prev_enabled = _get_enabled_set()
if new_enabled != prev_enabled or new_disabled != disabled:
_save_enabled_set(new_enabled)
if new_disabled != disabled:
_save_disabled_set(new_disabled)
# Provider categories
@@ -1250,17 +1108,7 @@ def plugins_command(args) -> None:
action = getattr(args, "plugins_action", None)
if action == "install":
# Map argparse tri-state: --enable=True, --no-enable=False, neither=None (prompt)
enable_arg = None
if getattr(args, "enable", False):
enable_arg = True
elif getattr(args, "no_enable", False):
enable_arg = False
cmd_install(
args.identifier,
force=getattr(args, "force", False),
enable=enable_arg,
)
cmd_install(args.identifier, force=getattr(args, "force", False))
elif action == "update":
cmd_update(args.name)
elif action in ("remove", "rm", "uninstall"):
+15 -41
View File
@@ -863,15 +863,19 @@ def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
pass
def _inspect_profile_archive_roots(archive: Path) -> set[str]:
"""Return the archive's top-level directory names.
def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
"""Import a profile from a tar.gz archive.
Profile imports expect exactly one root directory. Inspecting the archive
before extraction lets us stage the import safely instead of mutating a
live profile tree first and reconciling names later.
If *name* is not given, infers it from the archive's top-level directory.
Returns the imported profile directory.
"""
import tarfile
archive = Path(archive_path)
if not archive.exists():
raise FileNotFoundError(f"Archive not found: {archive}")
# Peek at the archive to find the top-level directory name
with tarfile.open(archive, "r:gz") as tf:
top_dirs = {
parts[0]
@@ -885,33 +889,13 @@ def _inspect_profile_archive_roots(archive: Path) -> set[str]:
for member in tf.getmembers()
if member.isdir()
}
return top_dirs
def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
"""Import a profile from a tar.gz archive.
If *name* is not given, infers it from the archive's top-level directory.
Returns the imported profile directory.
"""
import tempfile
archive = Path(archive_path)
if not archive.exists():
raise FileNotFoundError(f"Archive not found: {archive}")
top_dirs = _inspect_profile_archive_roots(archive)
archive_root = top_dirs.pop() if len(top_dirs) == 1 else None
inferred_name = name or archive_root
inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
if not inferred_name:
raise ValueError(
"Cannot determine profile name from archive. "
"Specify it explicitly: hermes profile import <archive> --name <name>"
)
if archive_root is None:
raise ValueError(
"Profile archive must contain exactly one top-level directory."
)
# Archives exported from the default profile have "default/" as top-level
# dir. Importing as "default" would target ~/.hermes itself — disallow
@@ -930,22 +914,12 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
profiles_root = _get_profiles_root()
profiles_root.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(prefix="hermes_profile_import_") as tmpdir:
staging_root = Path(tmpdir)
_safe_extract_profile_archive(archive, staging_root)
_safe_extract_profile_archive(archive, profiles_root)
extracted = staging_root / archive_root
if not extracted.is_dir():
raise ValueError(
f"Profile archive root is missing or invalid: {archive_root}"
)
final_source = extracted
if archive_root != inferred_name:
final_source = staging_root / inferred_name
extracted.rename(final_source)
shutil.move(str(final_source), str(profile_dir))
# If the archive extracted under a different name, rename
extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
if extracted != profile_dir and extracted.exists():
extracted.rename(profile_dir)
return profile_dir
+3 -29
View File
@@ -23,8 +23,6 @@ import logging
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
from utils import base_url_host_matches, base_url_hostname
logger = logging.getLogger(__name__)
@@ -94,12 +92,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
transport="openai_chat",
base_url_env_var="KIMI_BASE_URL",
),
"stepfun": HermesOverlay(
transport="openai_chat",
extra_env_vars=("STEPFUN_API_KEY",),
base_url_override="https://api.stepfun.ai/step_plan/v1",
base_url_env_var="STEPFUN_BASE_URL",
),
"minimax": HermesOverlay(
transport="anthropic_messages",
base_url_env_var="MINIMAX_BASE_URL",
@@ -216,10 +208,6 @@ ALIASES: Dict[str, str] = {
"kimi-coding-cn": "kimi-for-coding",
"moonshot": "kimi-for-coding",
# stepfun
"step": "stepfun",
"stepfun-coding-plan": "stepfun",
# minimax-cn
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
@@ -304,7 +292,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"nous": "Nous Portal",
"openai-codex": "OpenAI Codex",
"copilot-acp": "GitHub Copilot ACP",
"stepfun": "StepFun Step Plan",
"xiaomi": "Xiaomi MiMo",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
@@ -438,16 +425,6 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
"""
pdef = get_provider(provider)
if pdef is not None:
# Even for known providers, check URL heuristics for special endpoints
# (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
if base_url:
url_lower = base_url.rstrip("/").lower()
if "api.kimi.com/coding" in url_lower:
return "anthropic_messages"
if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
return "anthropic_messages"
if "api.openai.com" in url_lower:
return "codex_responses"
return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
# Direct provider checks for providers not in HERMES_OVERLAYS
@@ -457,14 +434,11 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
# URL-based heuristics for custom / unknown providers
if base_url:
url_lower = base_url.rstrip("/").lower()
hostname = base_url_hostname(base_url)
if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
return "anthropic_messages"
if hostname == "api.kimi.com" and "/coding" in url_lower:
return "anthropic_messages"
if hostname == "api.openai.com":
if "api.openai.com" in url_lower:
return "codex_responses"
if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
return "bedrock_converse"
return "chat_completions"
+9 -21
View File
@@ -29,7 +29,6 @@ from hermes_cli.auth import (
)
from hermes_cli.config import get_compatible_custom_providers, load_config
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname
def _normalize_custom_provider_name(value: str) -> str:
@@ -46,20 +45,14 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
protocol under a ``/anthropic`` suffix treat those as
``anthropic_messages`` transport instead of the default
``chat_completions``.
- Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
Anthropic Messages protocol (the /coding route accepts Claude
Code's native request shape).
"""
normalized = (base_url or "").strip().lower().rstrip("/")
hostname = base_url_hostname(base_url)
if hostname == "api.x.ai":
if "api.x.ai" in normalized:
return "codex_responses"
if hostname == "api.openai.com":
if "api.openai.com" in normalized and "openrouter" not in normalized:
return "codex_responses"
if normalized.endswith("/anthropic"):
return "anthropic_messages"
if hostname == "api.kimi.com" and "/coding" in normalized:
return "anthropic_messages"
return None
@@ -210,8 +203,7 @@ def _resolve_runtime_from_pool_entry(
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
else:
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
# Kimi /coding, api.openai.com → codex_responses, api.x.ai →
# codex_responses).
# api.openai.com → codex_responses, api.x.ai → codex_responses).
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
@@ -488,7 +480,7 @@ def _resolve_openrouter_runtime(
# When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer
# OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
# provider (issues #420, #560).
_is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
_is_openrouter_url = "openrouter.ai" in base_url
if _is_openrouter_url:
api_key_candidates = [
explicit_api_key,
@@ -498,12 +490,8 @@ def _resolve_openrouter_runtime(
else:
# Custom endpoint: use api_key from config when using config base_url (#1760).
# When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
# the canonical env var for ollama.com authentication. Match on
# HOST, not substring — a custom base_url whose path contains
# "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
# hostname is a look-alike (ollama.com.attacker.test) must not
# receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
# the canonical env var for ollama.com authentication.
_is_ollama_url = "ollama.com" in base_url.lower()
api_key_candidates = [
explicit_api_key,
(cfg_api_key if use_config_base_url else ""),
@@ -666,8 +654,7 @@ def _resolve_explicit_runtime(
if configured_mode:
api_mode = configured_mode
else:
# Auto-detect from URL (Anthropic /anthropic suffix,
# api.openai.com → Responses, Kimi /coding, etc.).
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
detected = _detect_api_mode_for_url(base_url)
if detected:
api_mode = detected
@@ -917,7 +904,8 @@ def resolve_runtime_provider(
code="no_aws_credentials",
)
# Read bedrock-specific config from config.yaml
_bedrock_cfg = load_config().get("bedrock", {})
from hermes_cli.config import load_config as _load_bedrock_config
_bedrock_cfg = _load_bedrock_config().get("bedrock", {})
# Region priority: config.yaml bedrock.region → env var → us-east-1
region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
+58 -182
View File
@@ -22,7 +22,6 @@ from typing import Optional, Dict, Any
from hermes_cli.nous_subscription import get_nous_subscription_features
from tools.tool_backend_helpers import managed_nous_tools_enabled
from utils import base_url_hostname
from hermes_constants import get_optional_skills_dir
logger = logging.getLogger(__name__)
@@ -94,16 +93,15 @@ _DEFAULT_PROVIDER_MODELS = {
"gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
],
"zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"kimi-coding-cn": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
"minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
"minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
"opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
"huggingface": [
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -409,36 +407,13 @@ def _print_setup_summary(config: dict, hermes_home):
("Browser Automation", False, missing_browser_hint)
)
# Image generation — FAL (direct or via Nous), or any plugin-registered
# provider (OpenAI, etc.)
# FAL (image generation)
if subscription_features.image_gen.managed_by_nous:
tool_status.append(("Image Generation (Nous subscription)", True, None))
elif subscription_features.image_gen.available:
tool_status.append(("Image Generation", True, None))
else:
# Fall back to probing plugin-registered providers so OpenAI-only
# setups don't show as "missing FAL_KEY".
_img_backend = None
try:
from agent.image_gen_registry import list_providers
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
for _p in list_providers():
if _p.name == "fal":
continue
try:
if _p.is_available():
_img_backend = _p.display_name
break
except Exception:
continue
except Exception:
pass
if _img_backend:
tool_status.append((f"Image Generation ({_img_backend})", True, None))
else:
tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))
tool_status.append(("Image Generation", False, "FAL_KEY"))
# TTS — show configured provider
tts_provider = config.get("tts", {}).get("provider", "edge")
@@ -458,6 +433,7 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
elif tts_provider == "neutts":
try:
import importlib.util
neutts_ok = importlib.util.find_spec("neutts") is not None
except Exception:
neutts_ok = False
@@ -465,16 +441,6 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
else:
tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
elif tts_provider == "kittentts":
try:
import importlib.util
kittentts_ok = importlib.util.find_spec("kittentts") is not None
except Exception:
kittentts_ok = False
if kittentts_ok:
tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
else:
tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
else:
tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
@@ -805,7 +771,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):
"zai": "Z.AI / GLM",
"kimi-coding": "Kimi / Moonshot",
"kimi-coding-cn": "Kimi / Moonshot (China)",
"stepfun": "StepFun Step Plan",
"minimax": "MiniMax",
"minimax-cn": "MiniMax CN",
"anthropic": "Anthropic",
@@ -838,8 +803,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
elif _vision_idx == 1: # OpenAI-compatible endpoint
_base_url = prompt(" Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
_api_key_label = " API key"
_is_native_openai = base_url_hostname(_base_url) == "api.openai.com"
if _is_native_openai:
if "api.openai.com" in _base_url.lower():
_api_key_label = " OpenAI API key"
_oai_key = prompt(_api_key_label, password=True).strip()
if _oai_key:
@@ -847,7 +811,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
# Save vision base URL to config (not .env — only secrets go there)
_vaux = config.setdefault("auxiliary", {}).setdefault("vision", {})
_vaux["base_url"] = _base_url
if _is_native_openai:
if "api.openai.com" in _base_url.lower():
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
@@ -883,6 +847,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
def _check_espeak_ng() -> bool:
"""Check if espeak-ng is installed."""
import shutil
return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None
@@ -936,31 +901,6 @@ def _install_neutts_deps() -> bool:
return False
def _install_kittentts_deps() -> bool:
"""Install KittenTTS dependencies with user approval. Returns True on success."""
import subprocess
import sys
wheel_url = (
"https://github.com/KittenML/KittenTTS/releases/download/"
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
)
print()
print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
print()
try:
subprocess.run(
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
check=True, timeout=300,
)
print_success("kittentts installed successfully")
return True
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
print_error(f"Failed to install kittentts: {e}")
print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
return False
def _setup_tts_provider(config: dict):
"""Interactive TTS provider selection with install flow for NeuTTS."""
tts_config = config.get("tts", {})
@@ -976,7 +916,6 @@ def _setup_tts_provider(config: dict):
"mistral": "Mistral Voxtral TTS",
"gemini": "Google Gemini TTS",
"neutts": "NeuTTS",
"kittentts": "KittenTTS",
}
current_label = provider_labels.get(current_provider, current_provider)
@@ -1000,10 +939,9 @@ def _setup_tts_provider(config: dict):
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
"NeuTTS (local on-device, free, ~300MB model download)",
"KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
]
)
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
choices.append(f"Keep current ({current_label})")
keep_current_idx = len(choices) - 1
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -1024,6 +962,7 @@ def _setup_tts_provider(config: dict):
if selected == "neutts":
# Check if already installed
try:
import importlib.util
already_installed = importlib.util.find_spec("neutts") is not None
except Exception:
already_installed = False
@@ -1122,29 +1061,6 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "kittentts":
# Check if already installed
try:
import importlib.util
already_installed = importlib.util.find_spec("kittentts") is not None
except Exception:
already_installed = False
if already_installed:
print_success("KittenTTS is already installed")
else:
print()
print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
print()
if prompt_yes_no("Install KittenTTS now?", True):
if not _install_kittentts_deps():
print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
selected = "edge"
else:
print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
selected = "edge"
# Save the selection
if "tts" not in config:
config["tts"] = {}
@@ -1166,6 +1082,8 @@ def setup_tts(config: dict):
def setup_terminal_backend(config: dict):
"""Configure the terminal execution backend."""
import platform as _platform
import shutil
print_header("Terminal Backend")
print_info("Choose where Hermes runs shell commands and code.")
print_info("This affects tool execution, file access, and isolation.")
@@ -2334,7 +2252,6 @@ def setup_gateway(config: dict):
launchd_install,
launchd_start,
launchd_restart,
UserSystemdUnavailableError,
)
service_installed = _is_service_installed()
@@ -2358,10 +2275,6 @@ def setup_gateway(config: dict):
systemd_restart()
elif _is_macos:
launchd_restart()
except UserSystemdUnavailableError as e:
print_error(" Restart failed — user systemd not reachable:")
for line in str(e).splitlines():
print(f" {line}")
except Exception as e:
print_error(f" Restart failed: {e}")
elif service_installed:
@@ -2371,10 +2284,6 @@ def setup_gateway(config: dict):
systemd_start()
elif _is_macos:
launchd_start()
except UserSystemdUnavailableError as e:
print_error(" Start failed — user systemd not reachable:")
for line in str(e).splitlines():
print(f" {line}")
except Exception as e:
print_error(f" Start failed: {e}")
elif supports_service_manager:
@@ -2398,10 +2307,6 @@ def setup_gateway(config: dict):
systemd_start(system=installed_scope == "system")
elif _is_macos:
launchd_start()
except UserSystemdUnavailableError as e:
print_error(" Start failed — user systemd not reachable:")
for line in str(e).splitlines():
print(f" {line}")
except Exception as e:
print_error(f" Start failed: {e}")
except Exception as e:
@@ -2453,74 +2358,6 @@ def setup_tools(config: dict, first_install: bool = False):
# =============================================================================
def _model_section_has_credentials(config: dict) -> bool:
"""Return True when any known inference provider has usable credentials.
Sources of truth:
* ``PROVIDER_REGISTRY`` in ``hermes_cli.auth`` lists every supported
provider along with its ``api_key_env_vars``.
* ``active_provider`` in the auth store covers OAuth device-code /
external-OAuth providers (Nous, Codex, Qwen, Gemini CLI, ...).
* The legacy OpenRouter aggregator env vars, which route generic
``OPENAI_API_KEY`` / ``OPENROUTER_API_KEY`` values through OpenRouter.
"""
try:
from hermes_cli.auth import get_active_provider
if get_active_provider():
return True
except Exception:
pass
try:
from hermes_cli.auth import PROVIDER_REGISTRY
except Exception:
PROVIDER_REGISTRY = {} # type: ignore[assignment]
def _has_key(pconfig) -> bool:
for env_var in pconfig.api_key_env_vars:
# CLAUDE_CODE_OAUTH_TOKEN is set by Claude Code itself, not by
# the user — mirrors is_provider_explicitly_configured in auth.py.
if env_var == "CLAUDE_CODE_OAUTH_TOKEN":
continue
if get_env_value(env_var):
return True
return False
# Prefer the provider declared in config.yaml, avoids false positives
# from stray env vars (GH_TOKEN, etc.) when the user has already picked
# a different provider.
model_cfg = config.get("model") if isinstance(config, dict) else None
if isinstance(model_cfg, dict):
provider_id = (model_cfg.get("provider") or "").strip().lower()
if provider_id in PROVIDER_REGISTRY:
if _has_key(PROVIDER_REGISTRY[provider_id]):
return True
if provider_id == "openrouter":
for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
if get_env_value(env_var):
return True
# OpenRouter aggregator fallback (no provider declared in config).
for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
if get_env_value(env_var):
return True
for pid, pconfig in PROVIDER_REGISTRY.items():
# Skip copilot in auto-detect: GH_TOKEN / GITHUB_TOKEN are
# commonly set for git tooling. Mirrors resolve_provider in auth.py.
if pid == "copilot":
continue
if _has_key(pconfig):
return True
return False
def _gateway_platform_short_label(label: str) -> str:
"""Strip trailing parenthetical qualifiers from a gateway platform label."""
base = label.split("(", 1)[0].strip()
return base or label
def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]:
"""Return a short summary if a setup section is already configured, else None.
@@ -2529,7 +2366,20 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
so that test patches on ``setup_mod.get_env_value`` take effect.
"""
if section_key == "model":
if not _model_section_has_credentials(config):
has_key = bool(
get_env_value("OPENROUTER_API_KEY")
or get_env_value("OPENAI_API_KEY")
or get_env_value("ANTHROPIC_API_KEY")
)
if not has_key:
# Check for OAuth providers
try:
from hermes_cli.auth import get_active_provider
if get_active_provider():
has_key = True
except Exception:
pass
if not has_key:
return None
model = config.get("model")
if isinstance(model, str) and model.strip():
@@ -2547,11 +2397,37 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
return f"max turns: {max_turns}"
elif section_key == "gateway":
platforms = [
_gateway_platform_short_label(label)
for label, env_var, _ in _GATEWAY_PLATFORMS
if get_env_value(env_var)
]
platforms = []
if get_env_value("TELEGRAM_BOT_TOKEN"):
platforms.append("Telegram")
if get_env_value("DISCORD_BOT_TOKEN"):
platforms.append("Discord")
if get_env_value("SLACK_BOT_TOKEN"):
platforms.append("Slack")
if get_env_value("SIGNAL_ACCOUNT"):
platforms.append("Signal")
if get_env_value("EMAIL_ADDRESS"):
platforms.append("Email")
if get_env_value("TWILIO_ACCOUNT_SID"):
platforms.append("SMS")
if get_env_value("MATRIX_ACCESS_TOKEN") or get_env_value("MATRIX_PASSWORD"):
platforms.append("Matrix")
if get_env_value("MATTERMOST_TOKEN"):
platforms.append("Mattermost")
if get_env_value("WHATSAPP_PHONE_NUMBER_ID"):
platforms.append("WhatsApp")
if get_env_value("DINGTALK_CLIENT_ID"):
platforms.append("DingTalk")
if get_env_value("FEISHU_APP_ID"):
platforms.append("Feishu")
if get_env_value("WECOM_BOT_ID"):
platforms.append("WeCom")
if get_env_value("WEIXIN_ACCOUNT_ID"):
platforms.append("Weixin")
if get_env_value("BLUEBUBBLES_SERVER_URL"):
platforms.append("BlueBubbles")
if get_env_value("WEBHOOK_ENABLED"):
platforms.append("Webhooks")
if platforms:
return ", ".join(platforms)
return None # No platforms configured — section must run
+7 -71
View File
@@ -30,14 +30,6 @@ All fields are optional. Missing values inherit from the ``default`` skin.
prompt: "#FFF8DC" # Prompt text color
input_rule: "#CD7F32" # Input area horizontal rule
response_border: "#FFD700" # Response box border (ANSI)
status_bar_bg: "#1a1a2e" # Status bar background
status_bar_text: "#C0C0C0" # Status bar default text
status_bar_strong: "#FFD700" # Status bar highlighted text
status_bar_dim: "#8B8682" # Status bar separators/muted text
status_bar_good: "#8FBC8F" # Healthy context usage
status_bar_warn: "#FFD700" # Warning context usage
status_bar_bad: "#FF8C00" # High context usage
status_bar_critical: "#FF6B6B" # Critical context usage
session_label: "#DAA520" # Session label color
session_border: "#8B8682" # Session ID dim color
status_bar_bg: "#1a1a2e" # TUI status/usage bar background
@@ -178,7 +170,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#FFF8DC",
"input_rule": "#CD7F32",
"response_border": "#FFD700",
"status_bar_bg": "#1a1a2e",
"session_label": "#DAA520",
"session_border": "#8B8682",
},
@@ -212,14 +203,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#F1E6CF",
"input_rule": "#9F1C1C",
"response_border": "#C7A96B",
"status_bar_bg": "#2A1212",
"status_bar_text": "#F1E6CF",
"status_bar_strong": "#C7A96B",
"status_bar_dim": "#6E584B",
"status_bar_good": "#7BC96F",
"status_bar_warn": "#C7A96B",
"status_bar_bad": "#DD4A3A",
"status_bar_critical": "#EF5350",
"session_label": "#C7A96B",
"session_border": "#6E584B",
},
@@ -284,14 +267,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#c9d1d9",
"input_rule": "#444444",
"response_border": "#aaaaaa",
"status_bar_bg": "#1F1F1F",
"status_bar_text": "#C9D1D9",
"status_bar_strong": "#E6EDF3",
"status_bar_dim": "#777777",
"status_bar_good": "#B5B5B5",
"status_bar_warn": "#AAAAAA",
"status_bar_bad": "#D0D0D0",
"status_bar_critical": "#F0F0F0",
"session_label": "#888888",
"session_border": "#555555",
},
@@ -323,14 +298,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#c9d1d9",
"input_rule": "#4169e1",
"response_border": "#7eb8f6",
"status_bar_bg": "#151C2F",
"status_bar_text": "#C9D1D9",
"status_bar_strong": "#7EB8F6",
"status_bar_dim": "#4B5563",
"status_bar_good": "#63D0A6",
"status_bar_warn": "#E6A855",
"status_bar_bad": "#F7A072",
"status_bar_critical": "#FF7A7A",
"session_label": "#7eb8f6",
"session_border": "#4b5563",
},
@@ -436,14 +403,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#EAF7FF",
"input_rule": "#2A6FB9",
"response_border": "#5DB8F5",
"status_bar_bg": "#0F2440",
"status_bar_text": "#EAF7FF",
"status_bar_strong": "#A9DFFF",
"status_bar_dim": "#496884",
"status_bar_good": "#6ED7B0",
"status_bar_warn": "#5DB8F5",
"status_bar_bad": "#2A6FB9",
"status_bar_critical": "#D94F4F",
"session_label": "#A9DFFF",
"session_border": "#496884",
},
@@ -508,14 +467,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#F5F5F5",
"input_rule": "#656565",
"response_border": "#B7B7B7",
"status_bar_bg": "#202020",
"status_bar_text": "#D3D3D3",
"status_bar_strong": "#F5F5F5",
"status_bar_dim": "#656565",
"status_bar_good": "#B7B7B7",
"status_bar_warn": "#D3D3D3",
"status_bar_bad": "#E7E7E7",
"status_bar_critical": "#F5F5F5",
"session_label": "#919191",
"session_border": "#656565",
},
@@ -581,14 +532,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
"prompt": "#FFF0D4",
"input_rule": "#C75B1D",
"response_border": "#F29C38",
"status_bar_bg": "#2B160E",
"status_bar_text": "#FFF0D4",
"status_bar_strong": "#FFD39A",
"status_bar_dim": "#6C4724",
"status_bar_good": "#6BCB77",
"status_bar_warn": "#F29C38",
"status_bar_bad": "#E2832B",
"status_bar_critical": "#EF5350",
"session_label": "#FFD39A",
"session_border": "#6C4724",
},
@@ -827,13 +770,6 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
warn = skin.get_color("ui_warn", "#FF8C00")
error = skin.get_color("ui_error", "#FF6B6B")
status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
status_text = skin.get_color("status_bar_text", text)
status_strong = skin.get_color("status_bar_strong", title)
status_dim = skin.get_color("status_bar_dim", dim)
status_good = skin.get_color("status_bar_good", skin.get_color("ui_ok", "#8FBC8F"))
status_warn = skin.get_color("status_bar_warn", warn)
status_bad = skin.get_color("status_bar_bad", skin.get_color("banner_accent", warn))
status_critical = skin.get_color("status_bar_critical", error)
voice_bg = skin.get_color("voice_status_bg", status_bg)
menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
@@ -846,13 +782,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
"prompt": prompt,
"prompt-working": f"{dim} italic",
"hint": f"{dim} italic",
"status-bar": f"bg:{status_bg} {status_text}",
"status-bar-strong": f"bg:{status_bg} {status_strong} bold",
"status-bar-dim": f"bg:{status_bg} {status_dim}",
"status-bar-good": f"bg:{status_bg} {status_good} bold",
"status-bar-warn": f"bg:{status_bg} {status_warn} bold",
"status-bar-bad": f"bg:{status_bg} {status_bad} bold",
"status-bar-critical": f"bg:{status_bg} {status_critical} bold",
"status-bar": f"bg:{status_bg} {text}",
"status-bar-strong": f"bg:{status_bg} {title} bold",
"status-bar-dim": f"bg:{status_bg} {dim}",
"status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
"status-bar-warn": f"bg:{status_bg} {warn} bold",
"status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
"status-bar-critical": f"bg:{status_bg} {error} bold",
"input-rule": input_rule,
"image-badge": f"{label} bold",
"completion-menu": f"bg:{menu_bg} {text}",
-2
View File
@@ -122,7 +122,6 @@ def show_status(args):
"OpenAI": "OPENAI_API_KEY",
"Z.AI/GLM": "GLM_API_KEY",
"Kimi": "KIMI_API_KEY",
"StepFun Step Plan": "STEPFUN_API_KEY",
"MiniMax": "MINIMAX_API_KEY",
"MiniMax-CN": "MINIMAX_CN_API_KEY",
"Firecrawl": "FIRECRAWL_API_KEY",
@@ -253,7 +252,6 @@ def show_status(args):
apikey_providers = {
"Z.AI / GLM": ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
"Kimi / Moonshot": ("KIMI_API_KEY",),
"StepFun Step Plan": ("STEPFUN_API_KEY",),
"MiniMax": ("MINIMAX_API_KEY",),
"MiniMax (China)": ("MINIMAX_CN_API_KEY",),
}
+1 -2
View File
@@ -127,7 +127,7 @@ TIPS = [
# --- Tools & Capabilities ---
"execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
"delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
"delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
"web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
"search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
"patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
@@ -289,7 +289,6 @@ TIPS = [
"When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
"agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
"agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
"agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.",
"The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
"Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
"The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
+5 -284
View File
@@ -24,8 +24,7 @@ from hermes_cli.nous_subscription import (
apply_nous_managed_defaults,
get_nous_subscription_features,
)
from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
from utils import base_url_hostname
from tools.tool_backend_helpers import managed_nous_tools_enabled
logger = logging.getLogger(__name__)
@@ -182,14 +181,6 @@ TOOL_CATEGORIES = {
],
"tts_provider": "gemini",
},
{
"name": "KittenTTS",
"badge": "local · free",
"tag": "Lightweight local ONNX TTS (~25MB), no API key",
"env_vars": [],
"tts_provider": "kittentts",
"post_setup": "kittentts",
},
],
},
"web": {
@@ -431,36 +422,6 @@ def _run_post_setup(post_setup_key: str):
_print_warning(" Node.js not found. Install Camofox via Docker:")
_print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
elif post_setup_key == "kittentts":
try:
__import__("kittentts")
_print_success(" kittentts is already installed")
return
except ImportError:
pass
import subprocess
_print_info(" Installing kittentts (~25-80MB model, CPU-only)...")
wheel_url = (
"https://github.com/KittenML/KittenTTS/releases/download/"
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
)
try:
result = subprocess.run(
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
capture_output=True, text=True, timeout=300,
)
if result.returncode == 0:
_print_success(" kittentts installed")
_print_info(" Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
_print_info(" Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
else:
_print_warning(" kittentts install failed:")
_print_info(f" {result.stderr.strip()[:300]}")
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
except subprocess.TimeoutExpired:
_print_warning(" kittentts install timed out (>5min)")
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
elif post_setup_key == "rl_training":
try:
__import__("tinker_atropos")
@@ -585,10 +546,6 @@ def _get_platform_tools(
ts_tools = set(resolve_toolset(ts_key))
if ts_tools and ts_tools.issubset(all_tool_names):
enabled_toolsets.add(ts_key)
default_off = set(_DEFAULT_OFF_TOOLSETS)
if platform in default_off:
default_off.remove(platform)
enabled_toolsets -= default_off
# Plugin toolsets: enabled by default unless explicitly disabled.
# A plugin toolset is "known" for a platform once `hermes tools`
@@ -847,51 +804,6 @@ def _configure_toolset(ts_key: str, config: dict):
_configure_simple_requirements(ts_key)
def _plugin_image_gen_providers() -> list[dict]:
"""Build picker-row dicts from plugin-registered image gen providers.
Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
row but carries an ``image_gen_plugin_name`` marker so downstream
code (config writing, model picker) knows to route through the
plugin registry instead of the in-tree FAL backend.
FAL is skipped it's already exposed by the hardcoded
``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
a plugin in a follow-up PR, the hardcoded entries go away and this
function surfaces it alongside OpenAI automatically.
"""
try:
from agent.image_gen_registry import list_providers
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
providers = list_providers()
except Exception:
return []
rows: list[dict] = []
for provider in providers:
if getattr(provider, "name", None) == "fal":
# FAL has its own hardcoded rows today.
continue
try:
schema = provider.get_setup_schema()
except Exception:
continue
if not isinstance(schema, dict):
continue
rows.append(
{
"name": schema.get("name", provider.display_name),
"badge": schema.get("badge", ""),
"tag": schema.get("tag", ""),
"env_vars": schema.get("env_vars", []),
"image_gen_plugin_name": provider.name,
}
)
return rows
def _visible_providers(cat: dict, config: dict) -> list[dict]:
"""Return provider entries visible for the current auth/config state."""
features = get_nous_subscription_features(config)
@@ -902,12 +814,6 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
if provider.get("requires_nous_auth") and not features.nous_auth_present:
continue
visible.append(provider)
# Inject plugin-registered image_gen backends (OpenAI today, more
# later) so the picker lists them alongside FAL / Nous Subscription.
if cat.get("name") == "Image Generation":
visible.extend(_plugin_image_gen_providers())
return visible
@@ -927,24 +833,7 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
browser_cfg = config.get("browser", {})
return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
if ts_key == "image_gen":
# Satisfied when the in-tree FAL backend is configured OR any
# plugin-registered image gen provider is available.
if fal_key_is_configured():
return False
try:
from agent.image_gen_registry import list_providers
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
for provider in list_providers():
try:
if provider.is_available():
return False
except Exception:
continue
except Exception:
pass
return True
return not get_env_value("FAL_KEY")
return not _toolset_has_keys(ts_key, config)
@@ -1019,11 +908,6 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
def _is_provider_active(provider: dict, config: dict) -> bool:
"""Check if a provider entry matches the currently active config."""
plugin_name = provider.get("image_gen_plugin_name")
if plugin_name:
image_cfg = config.get("image_gen", {})
return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name
managed_feature = provider.get("managed_nous_feature")
if managed_feature:
features = get_nous_subscription_features(config)
@@ -1031,13 +915,6 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
if feature is None:
return False
if managed_feature == "image_gen":
image_cfg = config.get("image_gen", {})
if isinstance(image_cfg, dict):
configured_provider = image_cfg.get("provider")
if configured_provider not in (None, "", "fal"):
return False
if image_cfg.get("use_gateway") is False:
return False
return feature.managed_by_nous
if provider.get("tts_provider"):
return (
@@ -1060,16 +937,6 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
if provider.get("web_backend"):
current = config.get("web", {}).get("backend")
return current == provider["web_backend"]
if provider.get("imagegen_backend"):
image_cfg = config.get("image_gen", {})
if not isinstance(image_cfg, dict):
return False
configured_provider = image_cfg.get("provider")
return (
provider["imagegen_backend"] == "fal"
and configured_provider in (None, "", "fal")
and not image_cfg.get("use_gateway")
)
return False
@@ -1185,100 +1052,6 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None:
_print_success(f" Model set to: {chosen}")
def _plugin_image_gen_catalog(plugin_name: str):
"""Return ``(catalog_dict, default_model_id)`` for a plugin provider.
``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table
``{model_id: {"display", "speed", "strengths", "price", ...}}``
so the existing picker code paths work without change. Returns
``({}, None)`` if the provider isn't registered or has no models.
"""
try:
from agent.image_gen_registry import get_provider
from hermes_cli.plugins import _ensure_plugins_discovered
_ensure_plugins_discovered()
provider = get_provider(plugin_name)
except Exception:
return {}, None
if provider is None:
return {}, None
try:
models = provider.list_models() or []
default = provider.default_model()
except Exception:
return {}, None
catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m}
return catalog, default
def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None:
"""Prompt the user to pick a model for a plugin-registered backend.
Writes selection to ``image_gen.model``. Mirrors
:func:`_configure_imagegen_model` but sources its catalog from the
plugin registry instead of :data:`IMAGEGEN_BACKENDS`.
"""
catalog, default_model = _plugin_image_gen_catalog(plugin_name)
if not catalog:
return
cur_cfg = config.setdefault("image_gen", {})
if not isinstance(cur_cfg, dict):
cur_cfg = {}
config["image_gen"] = cur_cfg
current_model = cur_cfg.get("model") or default_model
if current_model not in catalog:
current_model = default_model
model_ids = list(catalog.keys())
ordered = [current_model] + [m for m in model_ids if m != current_model]
widths = {
"model": max(len(m) for m in model_ids),
"speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
"strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
}
print()
header = (
f" {'Model':<{widths['model']}} "
f"{'Speed':<{widths['speed']}} "
f"{'Strengths':<{widths['strengths']}} "
f"Price"
)
print(color(header, Colors.CYAN))
rows = []
for mid in ordered:
row = _format_imagegen_model_row(mid, catalog[mid], widths)
if mid == current_model:
row += " ← currently in use"
rows.append(row)
idx = _prompt_choice(
f" Choose {plugin_name} model:",
rows,
default=0,
)
chosen = ordered[idx]
cur_cfg["model"] = chosen
_print_success(f" Model set to: {chosen}")
def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
"""Persist a plugin-backed image generation provider selection."""
img_cfg = config.setdefault("image_gen", {})
if not isinstance(img_cfg, dict):
img_cfg = {}
config["image_gen"] = img_cfg
img_cfg["provider"] = plugin_name
img_cfg["use_gateway"] = False
_print_success(f" image_gen.provider set to: {plugin_name}")
_configure_imagegen_model_for_plugin(plugin_name, config)
def _configure_provider(provider: dict, config: dict):
"""Configure a single provider - prompt for API keys and set config."""
env_vars = provider.get("env_vars", [])
@@ -1335,22 +1108,10 @@ def _configure_provider(provider: dict, config: dict):
_print_success(f" {provider['name']} - no configuration needed!")
if managed_feature:
_print_info(" Requests for this tool will be billed to your Nous subscription.")
# Plugin-registered image_gen provider: write image_gen.provider
# and route model selection to the plugin's own catalog.
plugin_name = provider.get("image_gen_plugin_name")
if plugin_name:
_select_plugin_image_gen_provider(plugin_name, config)
return
# Imagegen backends prompt for model selection after backend pick.
backend = provider.get("imagegen_backend")
if backend:
_configure_imagegen_model(backend, config)
# In-tree FAL is the only non-plugin backend today. Keep
# image_gen.provider clear so the dispatch shim falls through
# to the legacy FAL path.
img_cfg = config.setdefault("image_gen", {})
if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
img_cfg["provider"] = "fal"
return
# Prompt for each required env var
@@ -1385,17 +1146,10 @@ def _configure_provider(provider: dict, config: dict):
if all_configured:
_print_success(f" {provider['name']} configured!")
plugin_name = provider.get("image_gen_plugin_name")
if plugin_name:
_select_plugin_image_gen_provider(plugin_name, config)
return
# Imagegen backends prompt for model selection after env vars are in.
backend = provider.get("imagegen_backend")
if backend:
_configure_imagegen_model(backend, config)
img_cfg = config.setdefault("image_gen", {})
if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
img_cfg["provider"] = "fal"
def _configure_simple_requirements(ts_key: str):
@@ -1421,17 +1175,17 @@ def _configure_simple_requirements(ts_key: str):
_print_warning(" Skipped")
elif idx == 1:
base_url = _prompt(" OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
is_native_openai = base_url_hostname(base_url) == "api.openai.com"
key_label = " OPENAI_API_KEY" if is_native_openai else " API key"
key_label = " OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else " API key"
api_key = _prompt(key_label, password=True)
if api_key and api_key.strip():
save_env_value("OPENAI_API_KEY", api_key.strip())
# Save vision base URL to config (not .env — only secrets go there)
from hermes_cli.config import load_config, save_config
_cfg = load_config()
_aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
_aux["base_url"] = base_url
save_config(_cfg)
if is_native_openai:
if "api.openai.com" in base_url.lower():
save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
_print_success(" Saved")
else:
@@ -1561,39 +1315,16 @@ def _reconfigure_provider(provider: dict, config: dict):
config.setdefault("web", {})["backend"] = provider["web_backend"]
_print_success(f" Web backend set to: {provider['web_backend']}")
if managed_feature and managed_feature not in ("web", "tts", "browser"):
section = config.setdefault(managed_feature, {})
if not isinstance(section, dict):
section = {}
config[managed_feature] = section
section["use_gateway"] = True
elif not managed_feature:
for cat_key, cat in TOOL_CATEGORIES.items():
if provider in cat.get("providers", []):
section = config.get(cat_key)
if isinstance(section, dict) and section.get("use_gateway"):
section["use_gateway"] = False
break
if not env_vars:
if provider.get("post_setup"):
_run_post_setup(provider["post_setup"])
_print_success(f" {provider['name']} - no configuration needed!")
if managed_feature:
_print_info(" Requests for this tool will be billed to your Nous subscription.")
plugin_name = provider.get("image_gen_plugin_name")
if plugin_name:
_select_plugin_image_gen_provider(plugin_name, config)
return
# Imagegen backends prompt for model selection on reconfig too.
backend = provider.get("imagegen_backend")
if backend:
_configure_imagegen_model(backend, config)
if backend == "fal":
img_cfg = config.setdefault("image_gen", {})
if isinstance(img_cfg, dict):
img_cfg["provider"] = "fal"
img_cfg["use_gateway"] = False
return
for var in env_vars:
@@ -1612,19 +1343,9 @@ def _reconfigure_provider(provider: dict, config: dict):
_print_info(" Kept current")
# Imagegen backends prompt for model selection on reconfig too.
plugin_name = provider.get("image_gen_plugin_name")
if plugin_name:
_select_plugin_image_gen_provider(plugin_name, config)
return
backend = provider.get("imagegen_backend")
if backend:
_configure_imagegen_model(backend, config)
if backend == "fal":
img_cfg = config.setdefault("image_gen", {})
if isinstance(img_cfg, dict):
img_cfg["provider"] = "fal"
img_cfg["use_gateway"] = False
def _reconfigure_simple_requirements(ts_key: str):

Some files were not shown because too many files have changed in this diff Show More