Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1cd2b280fd | |||
| 2c2e32cc45 | |||
| a0701b1d5a | |||
| 3d21aee811 | |||
| 29b337bca7 |
@@ -5,9 +5,7 @@
|
||||
|
||||
# Dependencies
|
||||
node_modules
|
||||
**/node_modules
|
||||
.venv
|
||||
**/.venv
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
@@ -16,6 +14,3 @@ node_modules
|
||||
.env
|
||||
|
||||
*.md
|
||||
|
||||
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
|
||||
data/
|
||||
|
||||
@@ -398,19 +398,3 @@ IMAGE_TOOLS_DEBUG=false
|
||||
# Override STT provider endpoints (for proxies or self-hosted instances)
|
||||
# GROQ_BASE_URL=https://api.groq.com/openai/v1
|
||||
# STT_OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# MICROSOFT TEAMS INTEGRATION
|
||||
# =============================================================================
|
||||
# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
|
||||
# Or use Azure Portal: Azure Active Directory → App registrations → New registration
|
||||
# Then add the bot to Teams via the Bot Framework or App Studio.
|
||||
#
|
||||
# TEAMS_CLIENT_ID= # Azure AD App (client) ID
|
||||
# TEAMS_CLIENT_SECRET= # Azure AD client secret value
|
||||
# TEAMS_TENANT_ID= # Azure AD tenant ID (or "common" for multi-tenant)
|
||||
# TEAMS_ALLOWED_USERS= # Comma-separated AAD object IDs or UPNs
|
||||
# TEAMS_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# TEAMS_HOME_CHANNEL= # Default channel/chat ID for cron delivery
|
||||
# TEAMS_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
# TEAMS_PORT=3978 # Webhook listen port (Bot Framework default)
|
||||
|
||||
@@ -1,18 +1,8 @@
|
||||
name: 'Setup Nix'
|
||||
description: 'Install Nix and configure Cachix binary cache'
|
||||
|
||||
inputs:
|
||||
cachix-auth-token:
|
||||
description: 'Cachix auth token (enables push). Omit for read-only.'
|
||||
required: false
|
||||
default: ''
|
||||
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
|
||||
with:
|
||||
name: hermes-agent
|
||||
authToken: ${{ inputs.cachix-auth-token }}
|
||||
continue-on-error: true
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
|
||||
@@ -53,9 +53,6 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Build skills index (if not already present)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -36,9 +36,6 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
name: Nix Lockfile Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-lockfile-check-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
|
||||
- name: Resolve head SHA
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check lockfile hashes
|
||||
id: check
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
- name: Post sticky PR comment (stale)
|
||||
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
|
||||
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Fail if stale
|
||||
if: steps.check.outputs.stale == 'true'
|
||||
run: exit 1
|
||||
@@ -1,13 +1,6 @@
|
||||
name: Nix Lockfile Fix
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'web/package-lock.json'
|
||||
- 'web/package.json'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr_number:
|
||||
@@ -26,105 +19,9 @@ concurrency:
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# ── Auto-fix on main ───────────────────────────────────────────────
|
||||
# Fires when a push to main touches package.json or package-lock.json
|
||||
# in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
|
||||
# update commit directly to main so Nix builds never stay broken.
|
||||
#
|
||||
# Safety invariants:
|
||||
# 1. The fix commit only touches nix/*.nix files, which are NOT in
|
||||
# the paths filter above, so this cannot re-trigger itself.
|
||||
# 2. An explicit file-whitelist check before commit aborts if
|
||||
# fix-lockfiles ever modifies unexpected files.
|
||||
# 3. Job-level concurrency with cancel-in-progress: true ensures
|
||||
# back-to-back pushes collapse to the newest; ref: main checkout
|
||||
# always operates on the latest branch state.
|
||||
# 4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
|
||||
# triggers downstream nix.yml verification.
|
||||
auto-fix-main:
|
||||
if: github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
concurrency:
|
||||
group: auto-fix-main
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00 # v1.9.3
|
||||
with:
|
||||
app-id: ${{ secrets.APP_ID }}
|
||||
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure only nix files were modified — prevents accidental
|
||||
# self-triggering if fix-lockfiles ever touches package files.
|
||||
unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
|
||||
if [ -n "$unexpected" ]; then
|
||||
echo "::error::Unexpected modified files: $unexpected"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Record the base SHA before committing — used to detect package
|
||||
# file changes if we need to rebase after a non-fast-forward push.
|
||||
BASE_SHA="$(git rev-parse HEAD)"
|
||||
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
|
||||
-m "Source: $GITHUB_SHA" \
|
||||
-m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
|
||||
|
||||
# Retry push with rebase in case main advanced with an unrelated
|
||||
# commit during the nix build. Without this, a non-fast-forward
|
||||
# rejection silently loses the fix. If package files changed during
|
||||
# the rebase, abort — a fresh auto-fix run will handle the new state.
|
||||
for attempt in 1 2 3; do
|
||||
if git push origin HEAD:main; then
|
||||
exit 0
|
||||
fi
|
||||
echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
|
||||
git fetch origin main
|
||||
|
||||
# If package files changed between our base and the new main,
|
||||
# our computed hashes are stale. Abort and let the next triggered
|
||||
# run recompute from the correct package-lock state.
|
||||
pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
|
||||
'ui-tui/package-lock.json' 'ui-tui/package.json' \
|
||||
'web/package-lock.json' 'web/package.json' || true)"
|
||||
if [ -n "$pkg_changed" ]; then
|
||||
echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git rebase origin/main
|
||||
done
|
||||
echo "::error::Failed to push after 3 rebase attempts"
|
||||
exit 1
|
||||
|
||||
# ── PR fix (manual / checkbox) ─────────────────────────────────────
|
||||
# Existing behavior: run on manual dispatch OR when a task-list
|
||||
# checkbox in the sticky lockfile-check comment flips from [ ] to [x].
|
||||
fix:
|
||||
# Run on manual dispatch OR when a task-list checkbox in the sticky
|
||||
# lockfile-check comment flips from `[ ]` to `[x]`.
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'issue_comment'
|
||||
@@ -202,12 +99,10 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
|
||||
@@ -7,7 +7,6 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-${{ github.ref }}
|
||||
@@ -23,95 +22,12 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Resolve head SHA
|
||||
if: github.event_name == 'pull_request'
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check flake
|
||||
id: flake
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
- name: Build package
|
||||
id: build
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix build --print-build-logs
|
||||
|
||||
# When the real Nix build fails, run a targeted diagnostic to see if
|
||||
# the failure is specifically a stale npm lockfile hash in one of the
|
||||
# known npm subpackages (tui / web). This avoids surfacing a generic
|
||||
# "build failed" message when the fix is a single known command.
|
||||
- name: Diagnose npm lockfile hashes
|
||||
id: hash_check
|
||||
if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
# If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
|
||||
# etc.) it won't set stale=true/false. Treat that as a distinct failure
|
||||
# mode rather than silently ignoring it.
|
||||
- name: Fail if hash check crashed without reporting
|
||||
if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
|
||||
run: |
|
||||
echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
|
||||
exit 1
|
||||
|
||||
- name: Post sticky PR comment (stale hashes)
|
||||
if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.hash_check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles` and commit the diff
|
||||
|
||||
# Clear the sticky comment when either the build passed outright (no
|
||||
# hash check needed) or the hash check explicitly returned stale=false
|
||||
# (build failed for a non-hash reason).
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
runner.os == 'Linux' &&
|
||||
(steps.hash_check.outputs.stale == 'false' ||
|
||||
(steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Final fail if build or flake failed
|
||||
if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
|
||||
run: |
|
||||
if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
|
||||
echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
|
||||
else
|
||||
echo "::error::Nix build/flake check failed. See logs above."
|
||||
fi
|
||||
exit 1
|
||||
|
||||
- name: Evaluate flake (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
run: nix flake show --json > /dev/null
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
.DS_Store
|
||||
/venv/
|
||||
/_pycache/
|
||||
*.pyc*
|
||||
@@ -69,4 +68,3 @@ mini-swe-agent/
|
||||
.nix-stamps/
|
||||
result
|
||||
website/static/api/skills-index.json
|
||||
models-dev-upstream/
|
||||
|
||||
@@ -5,61 +5,78 @@ Instructions for AI coding assistants and developers working on the hermes-agent
|
||||
## Development Environment
|
||||
|
||||
```bash
|
||||
# Prefer .venv; fall back to venv if that's what your checkout has.
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
source venv/bin/activate # ALWAYS activate before running Python
|
||||
```
|
||||
|
||||
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
|
||||
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
|
||||
main checkout).
|
||||
|
||||
## Project Structure
|
||||
|
||||
File counts shift constantly — don't treat the tree below as exhaustive.
|
||||
The canonical source is the filesystem. The notes call out the load-bearing
|
||||
entry points you'll actually edit.
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
|
||||
├── run_agent.py # AIAgent class — core conversation loop
|
||||
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
|
||||
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator
|
||||
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
|
||||
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
|
||||
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
|
||||
├── batch_runner.py # Parallel batch processing
|
||||
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
|
||||
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
|
||||
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
|
||||
├── agent/ # Agent internals
|
||||
│ ├── prompt_builder.py # System prompt assembly
|
||||
│ ├── context_compressor.py # Auto context compression
|
||||
│ ├── prompt_caching.py # Anthropic prompt caching
|
||||
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
|
||||
│ ├── model_metadata.py # Model context lengths, token estimation
|
||||
│ ├── models_dev.py # models.dev registry integration (provider-aware context)
|
||||
│ ├── display.py # KawaiiSpinner, tool preview formatting
|
||||
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
|
||||
│ └── trajectory.py # Trajectory saving helpers
|
||||
├── hermes_cli/ # CLI subcommands and setup
|
||||
│ ├── main.py # Entry point — all `hermes` subcommands
|
||||
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
|
||||
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
|
||||
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
|
||||
│ ├── setup.py # Interactive setup wizard
|
||||
│ ├── skin_engine.py # Skin/theme engine — CLI visual customization
|
||||
│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform
|
||||
│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform
|
||||
│ ├── skills_hub.py # `/skills` slash command (search, browse, install)
|
||||
│ ├── models.py # Model catalog, provider model lists
|
||||
│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway)
|
||||
│ └── auth.py # Provider credential resolution
|
||||
├── tools/ # Tool implementations (one file per tool)
|
||||
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
|
||||
│ ├── approval.py # Dangerous command detection
|
||||
│ ├── terminal_tool.py # Terminal orchestration
|
||||
│ ├── process_registry.py # Background process management
|
||||
│ ├── file_tools.py # File read/write/search/patch
|
||||
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
|
||||
│ ├── browser_tool.py # Browserbase browser automation
|
||||
│ ├── code_execution_tool.py # execute_code sandbox
|
||||
│ ├── delegate_tool.py # Subagent delegation
|
||||
│ ├── mcp_tool.py # MCP client (~1050 lines)
|
||||
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Extension point for always-registered gateway hooks (none shipped)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── gateway/ # Messaging platform gateway
|
||||
│ ├── run.py # Main loop, slash commands, message dispatch
|
||||
│ ├── session.py # SessionStore — conversation persistence
|
||||
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
|
||||
│ ├── src/entry.tsx # TTY gate + render()
|
||||
│ ├── src/app.tsx # Main state machine and UI
|
||||
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
|
||||
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
|
||||
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
|
||||
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
|
||||
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
|
||||
├── tui_gateway/ # Python JSON-RPC backend for the TUI
|
||||
│ ├── entry.py # stdio entrypoint
|
||||
│ ├── server.py # RPC handlers and session logic
|
||||
│ ├── render.py # Optional rich/ANSI bridge
|
||||
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler — jobs.py, scheduler.py
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
|
||||
├── tests/ # Pytest suite (~3000 tests)
|
||||
└── batch_runner.py # Parallel batch processing
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
|
||||
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
|
||||
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
@@ -77,30 +94,20 @@ run_agent.py, cli.py, batch_runner.py, environments/
|
||||
|
||||
## AIAgent Class (run_agent.py)
|
||||
|
||||
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
|
||||
session context, budget, credential pool, etc.). The signature below is the
|
||||
minimum subset you'll usually touch — read `run_agent.py` for the full list.
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(self,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
provider: str = None,
|
||||
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
|
||||
model: str = "", # empty → resolved from config/provider later
|
||||
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
max_iterations: int = 90,
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
quiet_mode: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
session_id: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
credential_pool=None,
|
||||
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
|
||||
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
|
||||
# ... plus provider, api_mode, callbacks, routing params
|
||||
): ...
|
||||
|
||||
def chat(self, message: str) -> str:
|
||||
@@ -113,13 +120,10 @@ class AIAgent:
|
||||
|
||||
### Agent Loop
|
||||
|
||||
The core loop is inside `run_conversation()` — entirely synchronous, with
|
||||
interrupt checks, budget tracking, and a one-turn grace call:
|
||||
The core loop is inside `run_conversation()` — entirely synchronous:
|
||||
|
||||
```python
|
||||
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
|
||||
or self._budget_grace_call:
|
||||
if self._interrupt_requested: break
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
@@ -130,8 +134,7 @@ while (api_call_count < self.max_iterations and self.iteration_budget.remaining
|
||||
return response.content
|
||||
```
|
||||
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
|
||||
Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
|
||||
---
|
||||
|
||||
@@ -249,9 +252,7 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
|
||||
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
|
||||
|
||||
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
|
||||
**Never add a parallel chat surface in React.** If you catch yourself re-implementing slash popover / model picker / tool cards for the dashboard, stop — the TUI already does those, and anything new you add to Ink will appear in the dashboard automatically.
|
||||
|
||||
---
|
||||
|
||||
@@ -290,7 +291,7 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
|
||||
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
@@ -298,13 +299,9 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
### config.yaml options:
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
|
||||
ONLY if you need to actively migrate/transform existing user config
|
||||
(renaming keys, changing structure). Adding a new key to an existing
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
2. Bump `_config_version` (currently 5) to trigger migration for existing users
|
||||
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
### .env variables:
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
"NEW_API_KEY": {
|
||||
@@ -316,29 +313,13 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
},
|
||||
```
|
||||
|
||||
Non-secret settings (timeouts, thresholds, feature flags, paths, display
|
||||
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
|
||||
env var mirror for backward compatibility, bridge it from `config.yaml` to
|
||||
the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
|
||||
|
||||
### Config loaders (three paths — know which one you're in):
|
||||
### Config loaders (two separate systems):
|
||||
|
||||
| Loader | Used by | Location |
|
||||
|--------|---------|----------|
|
||||
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
|
||||
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
|
||||
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
|
||||
|
||||
If you add a new key and the CLI sees it but the gateway doesn't (or vice
|
||||
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
|
||||
|
||||
### Working directory:
|
||||
- **CLI** — uses the process's current directory (`os.getcwd()`).
|
||||
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
|
||||
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
|
||||
removed** — the config loader prints a deprecation warning if it's set in
|
||||
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
|
||||
`terminal.cwd` in `config.yaml`.
|
||||
| `load_cli_config()` | CLI mode | `cli.py` |
|
||||
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
|
||||
| Direct YAML load | Gateway | `gateway/run.py` |
|
||||
|
||||
---
|
||||
|
||||
@@ -431,95 +412,7 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
|
||||
|
||||
---
|
||||
|
||||
## Plugins
|
||||
|
||||
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
|
||||
repo-shipped plugins can be discovered alongside user-installed ones in
|
||||
`~/.hermes/plugins/` and pip-installed entry points.
|
||||
|
||||
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
|
||||
|
||||
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
|
||||
and pip entry points. Each plugin exposes a `register(ctx)` function that
|
||||
can:
|
||||
|
||||
- Register Python-callback lifecycle hooks:
|
||||
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
|
||||
`on_session_start`, `on_session_end`
|
||||
- Register new tools via `ctx.register_tool(...)`
|
||||
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
|
||||
plugin's argparse tree is wired into `hermes` at startup so
|
||||
`hermes <pluginname> <subcmd>` works with no change to `main.py`
|
||||
|
||||
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
|
||||
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
|
||||
as a side effect of importing `model_tools.py`. Code paths that read plugin
|
||||
state without importing `model_tools.py` first must call `discover_plugins()`
|
||||
explicitly (it's idempotent).
|
||||
|
||||
### Memory-provider plugins (`plugins/memory/<name>/`)
|
||||
|
||||
Separate discovery system for pluggable memory backends. Current built-in
|
||||
providers include **honcho, mem0, supermemory, byterover, hindsight,
|
||||
holographic, openviking, retaindb**.
|
||||
|
||||
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
|
||||
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
|
||||
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
|
||||
`post_setup(hermes_home, config)` for setup-wizard integration.
|
||||
|
||||
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
|
||||
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
|
||||
it at argparse setup time and wires it into `hermes <plugin>`. The
|
||||
framework only exposes CLI commands for the **currently active** memory
|
||||
provider (read from `memory.provider` in config.yaml), so disabled
|
||||
providers don't clutter `hermes --help`.
|
||||
|
||||
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
|
||||
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
|
||||
If a plugin needs a capability the framework doesn't expose, expand the
|
||||
generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
|
||||
etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
|
||||
Context engines plug into `agent/context_engine.py`; image-gen providers
|
||||
into `agent/image_gen_provider.py`.
|
||||
|
||||
---
|
||||
|
||||
## Skills
|
||||
|
||||
Two parallel surfaces:
|
||||
|
||||
- **`skills/`** — built-in skills shipped and loadable by default.
|
||||
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
|
||||
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
|
||||
NOT active by default. Installed explicitly via
|
||||
`hermes skills install official/<category>/<skill>`. Adapter lives in
|
||||
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
|
||||
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
|
||||
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
|
||||
`research`, `security`, `web-development`.
|
||||
|
||||
When reviewing skill PRs, check which directory they target — heavy-dep or
|
||||
niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `platforms`
|
||||
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.config` (config.yaml settings the skill needs — stored
|
||||
under `skills.config.<key>`, prompted during setup, injected at load time).
|
||||
|
||||
---
|
||||
|
||||
## Important Policies
|
||||
|
||||
### Prompt Caching Must Not Break
|
||||
|
||||
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
|
||||
@@ -529,10 +422,9 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
|
||||
|
||||
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
|
||||
|
||||
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
|
||||
must be **cache-aware**: default to deferred invalidation (change takes
|
||||
effect next session), with an opt-in `--now` flag for immediate
|
||||
invalidation. See `/skills install --now` for the canonical pattern.
|
||||
### Working Directory Behavior
|
||||
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
|
||||
|
||||
### Background Process Notifications (Gateway)
|
||||
|
||||
@@ -554,7 +446,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
|
||||
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
|
||||
|
||||
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
|
||||
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
|
||||
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
|
||||
automatically scope to the active profile.
|
||||
|
||||
### Rules for profile-safe code
|
||||
@@ -611,12 +503,8 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
|
||||
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
|
||||
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
|
||||
|
||||
### DO NOT introduce new `simple_term_menu` usage
|
||||
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
|
||||
the preferred UI is curses (stdlib) because `simple_term_menu` has
|
||||
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
|
||||
interactive menus must use `hermes_cli/curses_ui.py` — see
|
||||
`hermes_cli/tools_config.py` for the canonical pattern.
|
||||
### DO NOT use `simple_term_menu` for interactive menus
|
||||
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
|
||||
|
||||
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
|
||||
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
|
||||
@@ -627,30 +515,6 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
|
||||
### DO NOT hardcode cross-tool references in schema descriptions
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### The gateway has TWO message guards — both must bypass approval/control commands
|
||||
When an agent is running, messages pass through two sequential guards:
|
||||
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
|
||||
`_pending_messages` when `session_key in self._active_sessions`, and
|
||||
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
|
||||
`/queue`, `/status`, `/approve`, `/deny` before they reach
|
||||
`running_agent.interrupt()`. Any new command that must reach the runner
|
||||
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
|
||||
guards and be dispatched inline, not via `_process_message_background()`
|
||||
(which races session lifecycle).
|
||||
|
||||
### Squash merges from stale branches silently revert recent fixes
|
||||
Before squash-merging a PR, ensure the branch is up to date with `main`
|
||||
(`git fetch origin main && git reset --hard origin/main` in the worktree,
|
||||
then re-apply the PR's commits). A stale branch's version of an unrelated
|
||||
file will silently overwrite recent fixes on main when squashed. Verify
|
||||
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
|
||||
red flag.
|
||||
|
||||
### Don't wire in dead code without E2E validation
|
||||
Unused code that was never shipped was dead for a reason. Before wiring an
|
||||
unused module into a live code path, E2E test the real resolution chain
|
||||
with actual imports (not mocks) against a temp `HERMES_HOME`.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
|
||||
@@ -706,7 +570,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
```
|
||||
|
||||
|
||||
+6
-6
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
|
||||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
|
||||
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite
|
||||
|
||||
| Requirement | Notes |
|
||||
|-------------|-------|
|
||||
| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
|
||||
| **Git** | With `--recurse-submodules` support |
|
||||
| **Python 3.11+** | uv will install it if missing |
|
||||
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
|
||||
| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
|
||||
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
|
||||
|
||||
### Clone and install
|
||||
|
||||
@@ -494,7 +494,7 @@ branding:
|
||||
agent_name: "My Agent"
|
||||
welcome: "Welcome message"
|
||||
response_label: " ⚔ Agent "
|
||||
prompt_symbol: "⚔"
|
||||
prompt_symbol: "⚔ ❯ "
|
||||
|
||||
tool_prefix: "╎" # Tool output line prefix
|
||||
```
|
||||
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
|
||||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
|
||||
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
|
||||
|
||||
### Critical rules
|
||||
|
||||
@@ -597,7 +597,7 @@ refactor/description # Code restructuring
|
||||
|
||||
1. **Run tests**: `pytest tests/ -v`
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
||||
+6
-24
@@ -10,11 +10,9 @@ ENV PYTHONUNBUFFERED=1
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
@@ -30,38 +28,22 @@ WORKDIR /opt/hermes
|
||||
# unless the lockfiles themselves change.
|
||||
COPY package.json package-lock.json ./
|
||||
COPY web/package.json web/package-lock.json web/
|
||||
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
|
||||
COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/
|
||||
|
||||
RUN npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
(cd web && npm install --prefer-offline --no-audit) && \
|
||||
(cd ui-tui && npm install --prefer-offline --no-audit) && \
|
||||
npm cache clean --force
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build && \
|
||||
rm -rf node_modules/@hermes/ink && \
|
||||
rm -rf packages/hermes-ink/node_modules && \
|
||||
cp -R packages/hermes-ink node_modules/@hermes/ink && \
|
||||
npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
|
||||
rm -rf node_modules/@hermes/ink/node_modules/react && \
|
||||
node --input-type=module -e "await import('@hermes/ink')"
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
# The venv needs to be traversable too.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes
|
||||
# Start as root so the entrypoint can usermod/groupmod + gosu.
|
||||
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
|
||||
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
|
||||
RUN cd web && npm run build
|
||||
|
||||
# ---------- Python virtualenv ----------
|
||||
RUN chown hermes:hermes /opt/hermes
|
||||
USER hermes
|
||||
RUN uv venv && \
|
||||
uv pip install --no-cache-dir -e ".[all]"
|
||||
|
||||
@@ -70,4 +52,4 @@ ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
ENV HERMES_HOME=/opt/data
|
||||
ENV PATH="/opt/data/.local/bin:${PATH}"
|
||||
VOLUME [ "/opt/data" ]
|
||||
ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
|
||||
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
|
||||
|
||||
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
|
||||
| Set a personality | `/personality [name]` | `/personality [name]` |
|
||||
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
|
||||
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
|
||||
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
|
||||
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
|
||||
|
||||
@@ -157,10 +157,14 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
scripts/run_tests.sh
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
|
||||
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,453 +0,0 @@
|
||||
# Hermes Agent v0.11.0 (v2026.4.23)
|
||||
|
||||
**Release Date:** April 23, 2026
|
||||
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
|
||||
|
||||
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
|
||||
|
||||
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
|
||||
|
||||
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
|
||||
|
||||
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
|
||||
|
||||
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
|
||||
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
|
||||
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
|
||||
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
|
||||
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
|
||||
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
|
||||
|
||||
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Transport Layer (NEW)
|
||||
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
|
||||
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
|
||||
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
|
||||
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
|
||||
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
|
||||
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
|
||||
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
|
||||
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
|
||||
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
|
||||
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
|
||||
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
|
||||
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
|
||||
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
|
||||
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
|
||||
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
|
||||
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
|
||||
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
|
||||
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
|
||||
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
|
||||
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
|
||||
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
|
||||
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
|
||||
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
|
||||
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
|
||||
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
|
||||
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
|
||||
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
|
||||
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
|
||||
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
|
||||
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
|
||||
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
|
||||
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
|
||||
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
|
||||
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
|
||||
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
|
||||
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
|
||||
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
|
||||
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
|
||||
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
|
||||
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
|
||||
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
|
||||
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
|
||||
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
|
||||
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
|
||||
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
|
||||
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
|
||||
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
|
||||
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
|
||||
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
|
||||
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
|
||||
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
|
||||
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
|
||||
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
|
||||
|
||||
### Session & Memory
|
||||
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
|
||||
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
|
||||
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
|
||||
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
|
||||
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
|
||||
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
|
||||
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ New Ink-based TUI
|
||||
|
||||
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
|
||||
|
||||
### TUI Foundations
|
||||
- New TUI based on Ink + Python JSON-RPC backend
|
||||
- Prettier + ESLint + vitest tooling for `ui-tui/`
|
||||
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
|
||||
- Persistent `_SlashWorker` subprocess for slash command dispatch
|
||||
|
||||
### UX & Features
|
||||
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
|
||||
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
|
||||
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
|
||||
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
|
||||
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
|
||||
- Sticky composer that freezes during scroll
|
||||
- OSC-52 clipboard support for copy across SSH sessions
|
||||
- Virtualized history rendering for performance
|
||||
- Slash command autocomplete via `complete.slash` RPC
|
||||
- Path autocomplete via `complete.path` RPC
|
||||
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
|
||||
|
||||
### Structural Refactors
|
||||
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
|
||||
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
|
||||
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
### Telegram
|
||||
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
|
||||
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
|
||||
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
|
||||
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
|
||||
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
|
||||
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
|
||||
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
|
||||
- Fix: Markdown special char escaping in `send_exec_approval`
|
||||
- Fix: parentheses in URLs during MarkdownV2 link conversion
|
||||
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
|
||||
- Many platform hint / streaming / session-key fixes
|
||||
|
||||
### Discord
|
||||
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
|
||||
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
|
||||
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
|
||||
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
|
||||
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
|
||||
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
|
||||
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
|
||||
|
||||
### Feishu
|
||||
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
|
||||
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
|
||||
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
|
||||
|
||||
### DingTalk
|
||||
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
|
||||
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
|
||||
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
|
||||
|
||||
### WhatsApp
|
||||
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
|
||||
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
|
||||
|
||||
### WeCom / Weixin
|
||||
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
|
||||
|
||||
### Signal
|
||||
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
|
||||
|
||||
### Slack
|
||||
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
|
||||
|
||||
### BlueBubbles (iMessage)
|
||||
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
|
||||
|
||||
### Gateway Core
|
||||
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
|
||||
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
|
||||
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
|
||||
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
|
||||
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
|
||||
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
|
||||
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
|
||||
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
|
||||
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
|
||||
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
|
||||
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
|
||||
- Fix: unlink stale PID + lock files on cleanup
|
||||
- Fix: force-unlink stale PID file after `--replace` takeover
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin Surface (major expansion)
|
||||
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
|
||||
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
|
||||
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
|
||||
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
|
||||
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
|
||||
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
|
||||
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
|
||||
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
|
||||
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
### Browser
|
||||
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
|
||||
- Camofox hardening + connection stability across the window
|
||||
|
||||
### Execute Code
|
||||
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
|
||||
|
||||
### Image Generation
|
||||
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
|
||||
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
|
||||
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
|
||||
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
|
||||
|
||||
### TTS / STT / Voice
|
||||
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
|
||||
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
|
||||
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
|
||||
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
|
||||
|
||||
### Webhook / Cron
|
||||
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
|
||||
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
|
||||
|
||||
### Delegate
|
||||
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
### File / Patch
|
||||
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
|
||||
|
||||
### API Server
|
||||
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
|
||||
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
|
||||
|
||||
### Docker / Podman
|
||||
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
|
||||
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
|
||||
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
|
||||
|
||||
### MCP
|
||||
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill System
|
||||
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
|
||||
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
|
||||
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
|
||||
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
|
||||
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
|
||||
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
|
||||
|
||||
### New Skills
|
||||
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
|
||||
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
|
||||
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
|
||||
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
|
||||
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
|
||||
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
|
||||
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
|
||||
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
|
||||
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
|
||||
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
|
||||
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
|
||||
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
|
||||
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
|
||||
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
|
||||
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
|
||||
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
|
||||
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
|
||||
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
|
||||
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
|
||||
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
|
||||
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
|
||||
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
|
||||
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
|
||||
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
|
||||
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
|
||||
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
|
||||
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
|
||||
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
|
||||
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
|
||||
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
|
||||
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
|
||||
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
|
||||
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
|
||||
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
|
||||
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
|
||||
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
The `fix:` category in this window covers 482 PRs. Highlights:
|
||||
|
||||
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
|
||||
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
|
||||
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
|
||||
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
|
||||
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
|
||||
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
|
||||
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
|
||||
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
|
||||
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
|
||||
- Doctor checks for Kimi China credentials fixed
|
||||
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
|
||||
- Rapid Telegram follow-ups no longer get cut off
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- Atropos + wandb links in user guide
|
||||
- ACP / VS Code / Zed / JetBrains integration docs refresh
|
||||
- Webhook subscription docs updated for direct-delivery mode
|
||||
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
|
||||
- Transport layer developer guide added
|
||||
- Website removed Discussions link from README
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count)
|
||||
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
|
||||
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
|
||||
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
|
||||
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
|
||||
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
|
||||
- **@ethernet8023** — 3 PRs
|
||||
- **@benbarclay** — 3 PRs
|
||||
- **@Aslaaen** — 2 PRs
|
||||
|
||||
### Also contributing
|
||||
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
|
||||
|
||||
### All Contributors (alphabetical)
|
||||
|
||||
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
|
||||
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
|
||||
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
|
||||
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
|
||||
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
|
||||
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
|
||||
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
|
||||
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
|
||||
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
|
||||
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
|
||||
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
|
||||
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
|
||||
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
|
||||
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
|
||||
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
|
||||
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
|
||||
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
|
||||
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
|
||||
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
|
||||
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
|
||||
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
|
||||
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
|
||||
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
|
||||
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
|
||||
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
|
||||
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
|
||||
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
|
||||
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
|
||||
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
|
||||
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
|
||||
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
|
||||
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
|
||||
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
|
||||
|
||||
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
|
||||
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
|
||||
@@ -1,505 +0,0 @@
|
||||
# Hermes Agent v0.12.0 (v2026.4.30)
|
||||
|
||||
**Release Date:** April 30, 2026
|
||||
**Since v0.11.0:** 1,096 commits · 550 merged PRs · 1,270 files changed · 217,776 insertions · 213 community contributors (including co-authors)
|
||||
|
||||
> The Curator release — Hermes Agent now maintains itself. An autonomous background Curator grades, prunes, and consolidates your skill library on its own schedule. The self-improvement loop that reviews what to save got a substantial upgrade. Four new inference providers, a 18th messaging platform, a 19th via Teams plugin, native Spotify + Google Meet integrations, ComfyUI and TouchDesigner-MCP moved from optional to bundled-by-default, and a ~57% cut to visible TUI cold start.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Autonomous Curator** — `hermes curator` runs as a background agent on the gateway's cron ticker (7-day cycle default). It grades your skill library, consolidates related skills, prunes dead ones, and writes per-run reports to `logs/curator/run.json` + `REPORT.md`. Archived skills are classified consolidated-vs-pruned via model + heuristic. Defense-in-depth gates protect bundled/hub skills from mutation. Unified under `auxiliary.curator` — pick the curator's model in `hermes model`, manage it from the dashboard. `hermes curator status` ranks skills by usage (most-used / least-used). ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277), [#17307](https://github.com/NousResearch/hermes-agent/pull/17307), [#17941](https://github.com/NousResearch/hermes-agent/pull/17941), [#17868](https://github.com/NousResearch/hermes-agent/pull/17868), [#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
|
||||
- **Self-improvement loop — substantially upgraded** — The background review fork (the core of Hermes' self-improvement: after each turn it decides what memories/skills to save or update) is now class-first (rubric-based rather than free-form), active-update biased (prefers the skill the agent just loaded), handles `references/`/`templates/` sub-files, and properly inherits the parent's live runtime (provider, model, credentials actually propagate). Restricted to memory + skills toolsets so it can't sprawl. Memory providers shut down cleanly. Prior-turn tool messages excluded from the summary so the fork sees a clean context. ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026), [#17213](https://github.com/NousResearch/hermes-agent/pull/17213), [#16099](https://github.com/NousResearch/hermes-agent/pull/16099), [#16569](https://github.com/NousResearch/hermes-agent/pull/16569), [#16204](https://github.com/NousResearch/hermes-agent/pull/16204), [#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
- **Skill integrations — major expansion** — **ComfyUI v5** with official CLI + REST + hardware-gated local install, moved from optional to **built-in by default** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734)). **TouchDesigner-MCP** bundled by default, expanded with GLSL, post-FX, audio, geometry, and 9 new reference docs ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753), [#16624](https://github.com/NousResearch/hermes-agent/pull/16624), [#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @kshitijk4poor + @SHL0MS). **Humanizer** skill ports a text-cleaner that strips AI-isms ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787)). **claude-design** HTML artifact skill + design-md (Google DESIGN.md spec) + airtable salvage + `skill_manage` edits in `external_dirs` + direct-URL skill install + `/reload-skills` slash command. ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358), [#14876](https://github.com/NousResearch/hermes-agent/pull/14876), [#16291](https://github.com/NousResearch/hermes-agent/pull/16291), [#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#16323](https://github.com/NousResearch/hermes-agent/pull/16323), [#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
|
||||
- **LM Studio — first-class provider** — upgraded from a custom-endpoint alias to a full-blown native provider: dedicated auth, `hermes doctor` checks, reasoning transport, live `/models` listing. (Salvage of @kshitijk4poor's #17061.) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
|
||||
- **Four more new inference providers** — **GMI Cloud** (first-class, salvage of #11955 — @isaachuangGMICLOUD), **Azure AI Foundry** with auto-detection, **MiniMax OAuth** with PKCE browser flow (salvage #15203), **Tencent Tokenhub** (salvage of #16860). ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663), [#15845](https://github.com/NousResearch/hermes-agent/pull/15845), [#17524](https://github.com/NousResearch/hermes-agent/pull/17524), [#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
- **Pluggable gateway platforms + Microsoft Teams** — the gateway is now a plugin host. Drop-in messaging adapters live outside the core, and Microsoft Teams is the first plugin-shipped platform. (Salvage of #17664.) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751), [#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
|
||||
- **Tencent 元宝 (Yuanbao) — 18th messaging platform** — native gateway adapter with text + media delivery. ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424))
|
||||
|
||||
- **Spotify — native tools + bundled skill + wizard** — 7 tools (play, search, queue, playlists, devices) behind PKCE OAuth, interactive setup wizard, bundled skill, surfacing in `hermes tools`, cron usage documented. ([#15121](https://github.com/NousResearch/hermes-agent/pull/15121), [#15130](https://github.com/NousResearch/hermes-agent/pull/15130), [#15154](https://github.com/NousResearch/hermes-agent/pull/15154), [#15180](https://github.com/NousResearch/hermes-agent/pull/15180))
|
||||
|
||||
- **Google Meet plugin** — join calls, transcribe, speak, follow up. Realtime OpenAI transport + Node bot server, full pipeline bundled as a plugin. ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364))
|
||||
|
||||
- **`hermes -z` one-shot mode + `hermes update --check`** — non-interactive `hermes -z <prompt>` with `--model`/`--provider`/`HERMES_INFERENCE_MODEL`. `hermes update --check` preflight. Opt-in pre-update HERMES_HOME backup. ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702), [#15704](https://github.com/NousResearch/hermes-agent/pull/15704), [#15841](https://github.com/NousResearch/hermes-agent/pull/15841), [#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
|
||||
- **Models dashboard tab + in-browser model config** — rich per-model analytics, switch main + auxiliary models from the dashboard. ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745), [#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal model catalogs are now pulled from a remote manifest so new models show up without a release. ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
|
||||
- **Native multimodal image routing** — images now route based on the model's actual vision capability rather than provider defaults. ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
|
||||
- **Gateway media parity** — native multi-image sending across Telegram, Discord, Slack, Mattermost, Email, and Signal; centralized audio routing with FLAC support + Telegram document fallback. ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909), [#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
|
||||
- **TUI catches up to (and past) the classic CLI** — LaTeX rendering (@austinpickett), `/reload` .env hot-reload, pluggable busy-indicator styles (@OutThisLife, #13610), opt-in auto-resume of last session, expanded light-terminal auto-detection, session delete from `/resume` picker with `d`, modified mouse-wheel line scroll, and a `/mouse` toggle that kills ConPTY's phantom mouse injection (@kevin-ho). ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175), [#17286](https://github.com/NousResearch/hermes-agent/pull/17286), [#17150](https://github.com/NousResearch/hermes-agent/pull/17150), [#17130](https://github.com/NousResearch/hermes-agent/pull/17130), [#17113](https://github.com/NousResearch/hermes-agent/pull/17113), [#17668](https://github.com/NousResearch/hermes-agent/pull/17668), [#17669](https://github.com/NousResearch/hermes-agent/pull/17669), [#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
|
||||
- **Observability + achievements plugins** — bundled Langfuse observability plugin (salvage #16845) + bundled hermes-achievements plugin that scans full session history. ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917), [#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
|
||||
- **TTS provider registry + Piper local TTS** — pluggable `tts.providers.<name>` registry; Piper ships as a native local TTS provider. (Closes #8508.) ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843), [#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
|
||||
- **Vercel Sandbox backend** — Vercel sandboxes as an execute_code/terminal backend (@kshitijk4poor). ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
|
||||
- **Secret redaction off by default** — default flipped to off. Prevents the long-standing patch-corruption incidents where fake secret-shaped substrings mangled tool outputs. Opt in via `redaction.enabled: true` when you need it. ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
|
||||
- **Cold-start performance** — visible TUI cold start cut **~57%** via lazy agent init (@OutThisLife), lazy imports of OpenAI / Anthropic / Firecrawl / account_usage, mtime-cached `load_config()`, memoized `get_tool_definitions()` with TTL-cached `check_fn` results, precompiled dangerous-command patterns. ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190), [#17046](https://github.com/NousResearch/hermes-agent/pull/17046), [#17041](https://github.com/NousResearch/hermes-agent/pull/17041), [#17098](https://github.com/NousResearch/hermes-agent/pull/17098), [#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
- **Configurable prompt cache TTL** — `prompt_caching.cache_ttl` (5m default, 1h opt-in — cost savings for bursty sessions that keep cache warm). Salvage of #12659. ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
|
||||
---
|
||||
|
||||
## 🧠 Autonomous Curator & Self-Improvement Loop
|
||||
|
||||
### Curator — autonomous skill maintenance
|
||||
- **`hermes curator` as a background agent** — runs on the gateway's cron ticker, 7-day cycle by default, umbrella-first prompt, inherits parent config, unbounded iterations ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277) — issue #7816)
|
||||
- **Per-run reports** — `logs/curator/run.json` + `REPORT.md` per cycle ([#17307](https://github.com/NousResearch/hermes-agent/pull/17307))
|
||||
- **Consolidated vs pruned classification** — archived skills split with model + heuristic ([#17941](https://github.com/NousResearch/hermes-agent/pull/17941))
|
||||
- **`hermes curator status`** — ranks skills by usage, shows most-used and least-used ([#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
- **Unified under `auxiliary.curator`** — pick the model in `hermes model`, configure from the dashboard ([#17868](https://github.com/NousResearch/hermes-agent/pull/17868))
|
||||
- **Documentation** — dedicated curator feature page on the docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- Fix: seed defaults on update, create `logs/curator/` directory, defer fire import ([#17927](https://github.com/NousResearch/hermes-agent/pull/17927))
|
||||
- Fix: scan nested archive subdirs in `restore_skill` (@0xDevNinja) ([#17951](https://github.com/NousResearch/hermes-agent/pull/17951))
|
||||
- Fix: use actual skill activity in curator status (@y0shua1ee) ([#17953](https://github.com/NousResearch/hermes-agent/pull/17953))
|
||||
- Fix: `skill_manage` refuses writes on pinned skills; pinning now blocks curator writes ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562), [#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- Fix: `bump_use()` wired into skill invocation + preload + skill_view (salvage #17782) ([#17932](https://github.com/NousResearch/hermes-agent/pull/17932))
|
||||
|
||||
### Self-improvement loop (background review fork)
|
||||
- **Class-first skill-review prompt** — rubric-based grading rather than free-form "should this update" ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026))
|
||||
- **Active-update bias** — prefers updating skills the agent just loaded, handles `references/` + `templates/` sub-files ([#17213](https://github.com/NousResearch/hermes-agent/pull/17213))
|
||||
- **Fork inherits parent's live runtime** — provider, model, credentials actually propagate now ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Scoped toolsets** — review fork restricted to memory + skills (no shell, no web) ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Clean shutdown** — background review memory providers exit properly (salvage #15289) ([#16204](https://github.com/NousResearch/hermes-agent/pull/16204))
|
||||
- **Clean context** — prior-history tool messages excluded from review summary (salvage #14967) ([#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill integrations — newly bundled or promoted
|
||||
- **ComfyUI v5** — official CLI + REST + hardware-gated local install; **moved from optional to built-in** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734), [#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **TouchDesigner-MCP** — **bundled by default** ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753) — @kshitijk4poor), expanded with GLSL, post-FX, audio, geometry references ([#16624](https://github.com/NousResearch/hermes-agent/pull/16624)), 9 new reference docs ([#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @SHL0MS)
|
||||
- **Humanizer** — strips AI-isms from text ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787))
|
||||
- **claude-design** — HTML artifact skill with disambiguation from other design skills ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358))
|
||||
- **design-md** — Google's DESIGN.md spec skill ([#14876](https://github.com/NousResearch/hermes-agent/pull/14876))
|
||||
- **airtable** — salvaged skill + skill API keys wired into `.env` (#15838) ([#16291](https://github.com/NousResearch/hermes-agent/pull/16291))
|
||||
- **pretext** — creative browser demos with @chenglou/pretext ([#17259](https://github.com/NousResearch/hermes-agent/pull/17259))
|
||||
- **spike** + **sketch** — throwaway experiments + HTML mockups, adapted from gsd-build ([#17421](https://github.com/NousResearch/hermes-agent/pull/17421))
|
||||
|
||||
### Skills UX
|
||||
- **Install skills from a direct HTTP(S) URL** — `hermes skills install <url>` ([#16323](https://github.com/NousResearch/hermes-agent/pull/16323))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **`hermes skills list`** shows enabled/disabled status ([#16129](https://github.com/NousResearch/hermes-agent/pull/16129))
|
||||
- **`skill_manage` refuses writes on pinned skills** ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562))
|
||||
- **`skill_manage` edits external_dirs skills in place** (salvage #9966) ([#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#17289](https://github.com/NousResearch/hermes-agent/pull/17289))
|
||||
- Fix: inline-shell rendering in `skill_view` ([#15376](https://github.com/NousResearch/hermes-agent/pull/15376))
|
||||
- Fix: exclude `.archive/` from skill index walk (salvage #17639) ([#17931](https://github.com/NousResearch/hermes-agent/pull/17931))
|
||||
- Fix: dedicated docs page per bundled + optional skill ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929))
|
||||
- Fix: `google-workspace` shared HERMES_HOME helper + ship deps as optional extra ([#15405](https://github.com/NousResearch/hermes-agent/pull/15405))
|
||||
- Fix: auto-wrap ASCII-art code blocks in generated skill pages ([#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
- Point agent at `hermes-agent` skill + docs site for Hermes questions ([#16535](https://github.com/NousResearch/hermes-agent/pull/16535))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
|
||||
#### New providers
|
||||
- **GMI Cloud** — first-class API-key provider on par with Arcee/Kilocode/Xiaomi (salvage of #11955 — @isaachuangGMICLOUD) ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663))
|
||||
- **Azure AI Foundry** — auto-detection, full wiring ([#15845](https://github.com/NousResearch/hermes-agent/pull/15845))
|
||||
- **LM Studio** — upgraded from custom-endpoint alias to first-class provider: dedicated auth, doctor checks, reasoning transport, live `/models` (salvage of #17061 — @kshitijk4poor) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
- **MiniMax OAuth** — PKCE browser flow with full OAuth integration (salvage #15203) ([#17524](https://github.com/NousResearch/hermes-agent/pull/17524))
|
||||
- **Tencent Tokenhub** — new provider (salvage of #16860) ([#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
#### Model catalog
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal catalogs pulled from remote manifest so new models show up without a release ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
- `openai/gpt-5.5` and `gpt-5.5-pro` added to OpenRouter + Nous Portal ([#15343](https://github.com/NousResearch/hermes-agent/pull/15343))
|
||||
- `deepseek-v4-pro` and `deepseek-v4-flash` added ([#14934](https://github.com/NousResearch/hermes-agent/pull/14934))
|
||||
- `qwen3.6-plus` added to Alibaba-supported models ([#16896](https://github.com/NousResearch/hermes-agent/pull/16896))
|
||||
- Gemini free-tier keys blocked at setup with 429 guidance surfacing ([#15100](https://github.com/NousResearch/hermes-agent/pull/15100))
|
||||
|
||||
#### Model configuration
|
||||
- **Configurable `prompt_caching.cache_ttl`** — 5m default, 1h opt-in (salvage #12659) ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
- `/fast` whitelist broadened to all OpenAI + Anthropic models ([#16883](https://github.com/NousResearch/hermes-agent/pull/16883))
|
||||
- `auxiliary.extra_body.reasoning` translates into Codex Responses API ([#17004](https://github.com/NousResearch/hermes-agent/pull/17004))
|
||||
- `hermes fallback` command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **Native multimodal image routing** — based on model vision capability, not provider defaults ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
- **Delegate `child_timeout_seconds` default bumped to 600s** ([#14809](https://github.com/NousResearch/hermes-agent/pull/14809))
|
||||
- **Diagnostic dump when subagent times out with 0 API calls** ([#15105](https://github.com/NousResearch/hermes-agent/pull/15105))
|
||||
- **Gateway busts cached agent on compression/context_length config edits** ([#17008](https://github.com/NousResearch/hermes-agent/pull/17008))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- `/reload-mcp` awareness — rebuild cached agents + prompt-cache cost confirmation ([#17729](https://github.com/NousResearch/hermes-agent/pull/17729))
|
||||
- Fix: repair CamelCase + `_tool` suffix tool-call emissions ([#15124](https://github.com/NousResearch/hermes-agent/pull/15124))
|
||||
- Fix: retry on `json.JSONDecodeError` instead of treating as local validation error ([#15107](https://github.com/NousResearch/hermes-agent/pull/15107))
|
||||
- Fix: handle unescaped control chars in `tool_call.arguments` ([#15356](https://github.com/NousResearch/hermes-agent/pull/15356))
|
||||
- Fix: ordering fix in `_copy_reasoning_content_for_api` — cross-provider reasoning isolation (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749))
|
||||
- Fix: inject empty `reasoning_content` for DeepSeek/Kimi `tool_calls` unconditionally (@Zjianru) ([#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- Fix: persist streamed `reasoning_content` on assistant turns (#16844) ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- Fix: cancel coroutine on timeout so worker thread exits; full traceback on tool failure ([#17428](https://github.com/NousResearch/hermes-agent/pull/17428))
|
||||
- Fix: isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
|
||||
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- Fix: rename `[SYSTEM:` → `[IMPORTANT:` in all user-injected markers (dodges Azure content filter) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
|
||||
### Compression
|
||||
- **Retry summary on main model for unknown errors before giving up** ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774))
|
||||
- **Notify users when configured aux model fails even if main-model fallback recovers** ([#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- `/compress` wrapped in `_busy_command` to block input during compression ([#15388](https://github.com/NousResearch/hermes-agent/pull/15388))
|
||||
- Fix: reserve system + tools headroom when aux binds threshold ([#15631](https://github.com/NousResearch/hermes-agent/pull/15631))
|
||||
- Fix: use text-char sum for multimodal token estimation in `_find_tail_cut_by_tokens` ([#16369](https://github.com/NousResearch/hermes-agent/pull/16369))
|
||||
|
||||
### Session, Memory & State
|
||||
- **Trigram FTS5 index for CJK search, replace LIKE fallback** (@alt-glitch) ([#16651](https://github.com/NousResearch/hermes-agent/pull/16651))
|
||||
- **Index `tool_name` + `tool_calls` in FTS5, with repair + migration** (salvages #16866) ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Checkpoints: auto-prune orphan and stale shadow repos at startup** ([#16303](https://github.com/NousResearch/hermes-agent/pull/16303))
|
||||
- **Memory providers notified on mid-process session_id rotation** (#6672) ([#17409](https://github.com/NousResearch/hermes-agent/pull/17409))
|
||||
- Fix: quote underscored terms in FTS5 query sanitization ([#16915](https://github.com/NousResearch/hermes-agent/pull/16915))
|
||||
- Fix: resolve viking_read 500/412 on file URIs + pseudo-summary URIs (salvage #5886) ([#17869](https://github.com/NousResearch/hermes-agent/pull/17869))
|
||||
- Fix: skip external-provider sync on interrupted turns ([#15395](https://github.com/NousResearch/hermes-agent/pull/15395))
|
||||
- Fix: close embedded Hindsight async client cleanly (salvage #14605) ([#16209](https://github.com/NousResearch/hermes-agent/pull/16209))
|
||||
- Fix: pass session transcript to `shutdown_memory_provider` on gateway + CLI (#15165) ([#16571](https://github.com/NousResearch/hermes-agent/pull/16571))
|
||||
- Fix: write-origin metadata seam ([#15346](https://github.com/NousResearch/hermes-agent/pull/15346))
|
||||
- Fix: preserve symlinks during atomic file writes ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- Refactor: remove `flush_memories` entirely ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
### Auxiliary models
|
||||
- Fix: surface auxiliary failures in UI (previously silent) ([#15324](https://github.com/NousResearch/hermes-agent/pull/15324))
|
||||
- Fix: surface title-gen auxiliary failures instead of silently dropping ([#16371](https://github.com/NousResearch/hermes-agent/pull/16371))
|
||||
- Fix: generalize unsupported-parameter detector and harden `max_tokens` retry ([#15633](https://github.com/NousResearch/hermes-agent/pull/15633))
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **Microsoft Teams (19th platform)** — as a plugin, + xdist collision guard ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **Yuanbao (Tencent 元宝, 18th platform)** — native adapter with text + media delivery ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424), [#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
|
||||
### Pluggable Gateway Platforms
|
||||
- **Drop-in messaging adapters** — the gateway is now a plugin host for platforms (salvage of #17664) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
|
||||
### Telegram
|
||||
- **Chat allowlists for groups and forums** (@web3blind) ([#15027](https://github.com/NousResearch/hermes-agent/pull/15027))
|
||||
- **Send fresh finals for stale preview streams** (port openclaw#72038) ([#16261](https://github.com/NousResearch/hermes-agent/pull/16261))
|
||||
- **Render markdown tables as row-group bullets + prompt hint** ([#16997](https://github.com/NousResearch/hermes-agent/pull/16997))
|
||||
- Document fallback in centralized audio routing ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Discord
|
||||
- **Opt-in toolsets + ID injection + tool split + Feishu wiring** (salvage #15457, #15458) ([#15610](https://github.com/NousResearch/hermes-agent/pull/15610), [#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
- Fix: coerce `limit` parameter to int before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
|
||||
### Slack
|
||||
- **Register every gateway command as a native slash (Discord/Telegram parity)** ([#16164](https://github.com/NousResearch/hermes-agent/pull/16164))
|
||||
- **`strict_mention` config** — prevents thread auto-engagement ([#16193](https://github.com/NousResearch/hermes-agent/pull/16193))
|
||||
- **`channel_skill_bindings`** — bind specific skills to specific Slack channels ([#16283](https://github.com/NousResearch/hermes-agent/pull/16283))
|
||||
|
||||
### Signal
|
||||
- **Native formatting** — markdown → bodyRanges, reply quotes, reactions ([#17417](https://github.com/NousResearch/hermes-agent/pull/17417))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Feishu / Mattermost / Email / Signal
|
||||
- All participate in **native multi-image sending** ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Gateway Core
|
||||
- **Centralized audio routing + FLAC support + Telegram doc fallback** ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- **Native multi-image sending** across Telegram, Discord, Slack, Mattermost, Email, Signal ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
- **Make hygiene hard message limit configurable** ([#17000](https://github.com/NousResearch/hermes-agent/pull/17000))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- **`pre_gateway_dispatch` hook** — plugins can intercept before dispatch ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` / `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- Fix: timeouts — guard `load_config()` call against runtime exceptions ([#16318](https://github.com/NousResearch/hermes-agent/pull/16318))
|
||||
- Fix: support passing handler tools via registry ([#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin-first architecture
|
||||
- **Pluggable gateway platforms** — platforms can ship as plugins ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
- **Microsoft Teams as first plugin-shipped platform** ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **`pre_gateway_dispatch` hook** ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` + `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- **`duration_ms` on `post_tool_call`** (inspired by Claude Code 2.1.119) ([#15429](https://github.com/NousResearch/hermes-agent/pull/15429))
|
||||
- **Bundled plugins**: Spotify ([#15174](https://github.com/NousResearch/hermes-agent/pull/15174)), Google Meet ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364)), Langfuse observability ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917)), hermes-achievements ([#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
- **Page-scoped plugin slots for built-in dashboard pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
|
||||
### Browser
|
||||
- **CDP supervisor** — dialog detection + response + cross-origin iframe eval ([#14540](https://github.com/NousResearch/hermes-agent/pull/14540))
|
||||
- **Auto-spawn local Chromium for LAN/localhost URLs** when cloud provider is configured ([#16136](https://github.com/NousResearch/hermes-agent/pull/16136))
|
||||
|
||||
### Execute code / Terminal
|
||||
- **Vercel Sandbox backend** for `execute_code` / terminal (@kshitijk4poor) ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
- **Collapse subagent `task_id`s to shared container** ([#16177](https://github.com/NousResearch/hermes-agent/pull/16177))
|
||||
- **Docker: run container as host user** to avoid root-owned bind mounts (@benbarclay) ([#17305](https://github.com/NousResearch/hermes-agent/pull/17305))
|
||||
- Fix: safely quote `~/` subpaths in wrapped `cd` commands ([#15394](https://github.com/NousResearch/hermes-agent/pull/15394))
|
||||
- Fix: close file descriptor in `LocalEnvironment._update_cwd` ([#17300](https://github.com/NousResearch/hermes-agent/pull/17300))
|
||||
- Fix: SSH — prevent tar from overwriting remote home dir permissions ([#17898](https://github.com/NousResearch/hermes-agent/pull/17898), [#17867](https://github.com/NousResearch/hermes-agent/pull/17867))
|
||||
|
||||
### Image generation
|
||||
- See Provider section for updates; no new image providers this window.
|
||||
|
||||
### TTS / Voice
|
||||
- **Pluggable TTS provider registry** under `tts.providers.<name>` ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843))
|
||||
- **Piper** as native local TTS provider (closes #8508) ([#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
- **Voice mode CLI parity in the TUI** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- Fix: vision — use HERMES_HOME-based cache dir instead of cwd ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
|
||||
### Cron
|
||||
- **Honor `hermes tools` config for the cron platform** ([#14798](https://github.com/NousResearch/hermes-agent/pull/14798))
|
||||
- **Per-job `workdir`** — project-aware cron runs ([#15110](https://github.com/NousResearch/hermes-agent/pull/15110))
|
||||
- **`context_from` field** — chain cron job outputs ([#15606](https://github.com/NousResearch/hermes-agent/pull/15606))
|
||||
- Fix: promote `croniter` to a core dependency ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
|
||||
### Web search
|
||||
- **Expose `limit` for `web_search`** ([#16934](https://github.com/NousResearch/hermes-agent/pull/16934))
|
||||
|
||||
### Maps
|
||||
- Fix: include seconds in timezone UTC offset output ([#16300](https://github.com/NousResearch/hermes-agent/pull/16300))
|
||||
|
||||
### Approvals
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- Perf: precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
### ACP
|
||||
- **Advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
|
||||
### API Server
|
||||
- **POST `/v1/runs/{run_id}/stop`** (salvage of #15656) ([#15842](https://github.com/NousResearch/hermes-agent/pull/15842))
|
||||
- **Expose run status for external UIs** (#17085) ([#17458](https://github.com/NousResearch/hermes-agent/pull/17458))
|
||||
|
||||
### Nix
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
- Fix: use `--rebuild` in fix-lockfiles to bypass cached FOD store paths ([#15444](https://github.com/NousResearch/hermes-agent/pull/15444))
|
||||
- Fix: `extraPackages` now actually works via per-user profile ([#17047](https://github.com/NousResearch/hermes-agent/pull/17047))
|
||||
- Fix: refresh web/ npm-deps hash to unblock main builds ([#17174](https://github.com/NousResearch/hermes-agent/pull/17174))
|
||||
- Fix: replace magic-nix-cache with Cachix ([#17928](https://github.com/NousResearch/hermes-agent/pull/17928))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ TUI
|
||||
|
||||
### New features
|
||||
- **LaTeX rendering** (@austinpickett) ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175))
|
||||
- **`/reload` .env hot-reload** — ported from the classic CLI ([#17286](https://github.com/NousResearch/hermes-agent/pull/17286))
|
||||
- **Pluggable busy-indicator styles** (@OutThisLife, #13610) ([#17150](https://github.com/NousResearch/hermes-agent/pull/17150))
|
||||
- **Opt-in auto-resume of the most recent session** (@OutThisLife) ([#17130](https://github.com/NousResearch/hermes-agent/pull/17130))
|
||||
- **Expanded light-terminal auto-detection** — `HERMES_TUI_THEME` + background hex (@OutThisLife) ([#17113](https://github.com/NousResearch/hermes-agent/pull/17113))
|
||||
- **Delete sessions from `/resume` picker with `d`** (@OutThisLife) ([#17668](https://github.com/NousResearch/hermes-agent/pull/17668))
|
||||
- **Line-by-line scroll on modified mouse wheel** (@OutThisLife) ([#17669](https://github.com/NousResearch/hermes-agent/pull/17669))
|
||||
- **Delete queued message while editing with ctrl-x / cancel with esc** (@OutThisLife) ([#16707](https://github.com/NousResearch/hermes-agent/pull/16707))
|
||||
- **Per-section visibility for the details accordion** (@OutThisLife) ([#14968](https://github.com/NousResearch/hermes-agent/pull/14968))
|
||||
- **Voice mode CLI parity** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- **Contextual first-touch hints ported to TUI** — `/busy`, `/verbose` ([#16054](https://github.com/NousResearch/hermes-agent/pull/16054))
|
||||
- **Mini help menu on `?` in the input field** (@ethernet8023) ([#18043](https://github.com/NousResearch/hermes-agent/pull/18043))
|
||||
|
||||
### Fixes
|
||||
- Fix: proactive mouse disable on ConPTY + `/mouse` toggle command (@kevin-ho, WSL2 ghost-mouse fix) ([#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
- Fix: restore skills search RPC ([#15870](https://github.com/NousResearch/hermes-agent/pull/15870))
|
||||
- Perf: cache text measurements across yoga flex re-passes ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- Perf: stabilize long-session scrolling ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- Perf: lazily seed virtual history heights ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
- Perf: cut visible cold start ~57% with lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
### New commands
|
||||
- **`hermes -z <prompt>`** — non-interactive one-shot mode ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702))
|
||||
- **`hermes -z` with `--model` / `--provider` / `HERMES_INFERENCE_MODEL`** ([#15704](https://github.com/NousResearch/hermes-agent/pull/15704))
|
||||
- **`hermes update --check`** preflight flag ([#15841](https://github.com/NousResearch/hermes-agent/pull/15841))
|
||||
- **`hermes fallback`** command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
- **`/busy`** slash command for busy input mode ([#15382](https://github.com/NousResearch/hermes-agent/pull/15382))
|
||||
- **`/busy` input mode 'steer'** as a third option ([#16279](https://github.com/NousResearch/hermes-agent/pull/16279))
|
||||
- **`/btw` as alias for `/background`** ([#16053](https://github.com/NousResearch/hermes-agent/pull/16053))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **Surface `/queue`, `/bg`, `/steer` in agent-running placeholder** ([#16118](https://github.com/NousResearch/hermes-agent/pull/16118))
|
||||
|
||||
### Setup / onboarding
|
||||
- **Auto-reconfigure on existing installs** ([#15879](https://github.com/NousResearch/hermes-agent/pull/15879))
|
||||
- **Contextual first-touch hints for `/busy` and `/verbose`** ([#16046](https://github.com/NousResearch/hermes-agent/pull/16046))
|
||||
- **Cost-saving tips from the April 30 tip-of-the-day** ([#17841](https://github.com/NousResearch/hermes-agent/pull/17841))
|
||||
- **Hyperlink startup banner title to the latest GitHub Release** ([#14945](https://github.com/NousResearch/hermes-agent/pull/14945))
|
||||
|
||||
### Update / backup
|
||||
- **Snapshot pairing data before `git pull`** ([#16383](https://github.com/NousResearch/hermes-agent/pull/16383))
|
||||
- **Auto-backup HERMES_HOME before `hermes update`** (opt-in, off by default) ([#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
- **Exclude `checkpoints/` from backups** ([#16572](https://github.com/NousResearch/hermes-agent/pull/16572))
|
||||
- **Exclude SQLite WAL/SHM/journal sidecars from backups** ([#16576](https://github.com/NousResearch/hermes-agent/pull/16576))
|
||||
- **Installer FHS layout for root installs on Linux** ([#15608](https://github.com/NousResearch/hermes-agent/pull/15608))
|
||||
- Fix: kill stale dashboards instead of warning ([#17832](https://github.com/NousResearch/hermes-agent/pull/17832))
|
||||
- Fix: show correct update status on nix-built hermes ([#17550](https://github.com/NousResearch/hermes-agent/pull/17550))
|
||||
|
||||
### Slash-command housekeeping
|
||||
- Refactor: drop `/provider`, `/plan` handler, and clean up slash registry ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- Refactor: drop `persist_session` plumbing + fix broken `/btw` mid-turn bypass ([#16075](https://github.com/NousResearch/hermes-agent/pull/16075))
|
||||
|
||||
### OpenClaw migration (for folks coming from OpenClaw)
|
||||
- **Hardened OpenClaw import** — plan-first apply, redaction, pre-migration backup ([#16911](https://github.com/NousResearch/hermes-agent/pull/16911))
|
||||
- Fix: case-preserving brand rewrite + one-time `~/.openclaw` residue banner ([#16327](https://github.com/NousResearch/hermes-agent/pull/16327))
|
||||
- Fix: resolve `openclaw` workspace files from `agents.defaults.workspace` ([#16879](https://github.com/NousResearch/hermes-agent/pull/16879))
|
||||
- Fix: resolve model aliases against real OpenClaw catalog schema (salvage #16778) ([#16977](https://github.com/NousResearch/hermes-agent/pull/16977))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **Models tab** — rich per-model analytics ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745))
|
||||
- **Configure main + auxiliary models from the Models page** ([#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
- **Dashboard Chat tab — xterm.js + JSON-RPC sidecar** (supersedes #12710 + #13379, @OutThisLife) ([#14890](https://github.com/NousResearch/hermes-agent/pull/14890))
|
||||
- **Dashboard layout refresh** (@austinpickett) ([#14899](https://github.com/NousResearch/hermes-agent/pull/14899))
|
||||
- **`--stop` and `--status` flags** on the dashboard CLI ([#17840](https://github.com/NousResearch/hermes-agent/pull/17840))
|
||||
- **Page-scoped plugin slots for built-in pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- Fix: replace all buttons for design system buttons ([#17007](https://github.com/NousResearch/hermes-agent/pull/17007))
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
- **TUI visible cold start cut ~57%** via lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
- **Lazy-import OpenAI, Anthropic, Firecrawl, account_usage** ([#17046](https://github.com/NousResearch/hermes-agent/pull/17046))
|
||||
- **mtime-cache `load_config()` and `read_raw_config()`** ([#17041](https://github.com/NousResearch/hermes-agent/pull/17041))
|
||||
- **Memoize `get_tool_definitions()` + TTL-cache `check_fn` results** ([#17098](https://github.com/NousResearch/hermes-agent/pull/17098))
|
||||
- **Precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS** ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
- **Cache Ink text measurements across yoga flex re-passes** ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- **Stabilize long-session scrolling** ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- **Lazily seed virtual history heights** ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Secret redaction off by default** — stops corrupting patches / API payloads with fake-key substitutions. Opt in via `redaction.enabled: true` ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
- **`[SYSTEM:` → `[IMPORTANT:`** in all user-injected markers (Azure content filter dodge) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- **Canonical `mask_secret` helper; fix status.py DIM drift** ([#17207](https://github.com/NousResearch/hermes-agent/pull/17207))
|
||||
- **Sweep expired paste.rs uploads on a real timer** ([#16431](https://github.com/NousResearch/hermes-agent/pull/16431))
|
||||
- **Preserve symlinks during atomic file writes** ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- **Probe `/dev/tty` by opening it, not bare existence** ([#17024](https://github.com/NousResearch/hermes-agent/pull/17024))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
This window includes 360 `fix:` PRs. Selected highlights from across the stack:
|
||||
|
||||
- **Background review fork inherits parent's live runtime** — provider/model/creds now propagate correctly ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Hindsight configurable `HINDSIGHT_TIMEOUT` env var** ([#15077](https://github.com/NousResearch/hermes-agent/pull/15077))
|
||||
- **Tools: normalize numeric entries + clear stale `no_mcp` in `_save_platform_tools`** ([#15607](https://github.com/NousResearch/hermes-agent/pull/15607))
|
||||
- **MCP: rewrite `definitions` refs to `$defs` in input schemas** — closes provider-side 400s
|
||||
- **Azure content filter compatibility** — renamed `[SYSTEM:` markers so Azure's content filter stops flagging them ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Vision cache uses HERMES_HOME instead of cwd** ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
- **FTS5 search** — tool_name + tool_calls indexing with repair + migration ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Streaming reasoning persists on assistant turns** ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- **execute_code concurrent RPC serialization** (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- **Background reviewer scoped to memory + skills toolsets** — no more accidental web/shell escapes ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Compression recovery** — retry on main before giving up; notify user when aux fails ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774), [#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- **`croniter` promoted to a core dependency** ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
- **Discord tool `limit` parameter coerced to int** before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
- **Yuanbao messaging platform entrance fix** ([#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
- **ACP advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
- **DeepSeek / Kimi reasoning content isolation** across cross-provider histories (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749), [#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- **Preserve reasoning_content replay on DeepSeek v4 + Kimi/Moonshot thinking** ([#18045](https://github.com/NousResearch/hermes-agent/pull/18045))
|
||||
|
||||
The vast majority of the 360 fixes landed in the streaming/compression/tool-calling paths across all providers — DeepSeek, Kimi, Moonshot, GLM, Qwen, MiniMax, Gemini, Anthropic, OpenAI — alongside TUI polish (resize, scroll, sticky-prompt) and gateway platform-specific edge cases.
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- **Microsoft Teams xdist collision guard** — prevents worker collisions when Teams platform tests run in parallel ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- Chore: remove unused imports and dead locals (ruff F401, F841) ([#17010](https://github.com/NousResearch/hermes-agent/pull/17010))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- **Curator feature page** added to docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- **Document pin also blocking `skill_manage` writes** ([#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- **Direct-URL skill install documented** across features, reference, guide, and `hermes-agent` skill ([#16355](https://github.com/NousResearch/hermes-agent/pull/16355))
|
||||
- **Hooks tutorial — build a BOOT.md startup checklist** (replaces the removed built-in hook) ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202))
|
||||
- **ComfyUI docs: ask local vs cloud FIRST before hardware check** ([#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **Obliteratus skill: link YouTube video guide in SKILL.md** ([#15808](https://github.com/NousResearch/hermes-agent/pull/15808))
|
||||
- Per-skill docs pages generated for bundled + optional skills; ASCII art code blocks auto-wrapped ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929), [#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
|
||||
---
|
||||
|
||||
## ⚖️ Removed / Reverted
|
||||
|
||||
- **Kanban multi-profile collaboration board** — landed in #16081, reverted in ([#16098](https://github.com/NousResearch/hermes-agent/pull/16098)) while the design is reworked
|
||||
- **computer-use cua-driver** — 3 preparatory PRs landed then were reverted in ([#16927](https://github.com/NousResearch/hermes-agent/pull/16927))
|
||||
- **BOOT.md built-in hook** removed ([#17093](https://github.com/NousResearch/hermes-agent/pull/17093)); the hooks tutorial ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202)) shows how to build the same workflow yourself with a shell hook
|
||||
- **`/provider` + `/plan` slash commands dropped** ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- **`flush_memories` removed entirely** ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count since v0.11.0)
|
||||
|
||||
- **@OutThisLife** (Brooklyn) — 52 PRs · TUI — light-terminal detection + pluggable busy styles + auto-resume + session-delete from /resume + mouse-wheel scrolling + xterm.js dashboard Chat tab + cold-start cut + accordion polish
|
||||
- **@kshitijk4poor** — 12 PRs · LM Studio first-class provider (salvage), Vercel Sandbox backend, GMI Cloud salvage, bundled-by-default touchdesigner-mcp, many tool-call / reasoning fixes
|
||||
- **@helix4u** — 10 PRs · MCP schema robustness, assorted stability fixes
|
||||
- **@alt-glitch** — 8 PRs · trigram FTS5 CJK search, declarative Nix plugin install, matrix/feishu hints and fixes
|
||||
- **@ethernet8023** — 4 PRs
|
||||
- **@austinpickett** — 4 PRs · LaTeX rendering in TUI, dashboard layout refresh
|
||||
- **@benbarclay** — 3 PRs · Docker run-as-host-user so bind mounts don't get root-owned
|
||||
- **@vominh1919** — 2 PRs
|
||||
- **@stephenschoettler** — 2 PRs
|
||||
- **@kevin-ho** — ConPTY mouse-injection fix (#15488)
|
||||
- **@Zjianru** — cross-provider reasoning_content isolation + DeepSeek/Kimi empty-reasoning injection (#15749, #15762)
|
||||
- **@web3blind** — Telegram chat allowlists for groups and forums (#15027)
|
||||
- **@SHL0MS** — 9 new TouchDesigner-MCP reference docs (#16768)
|
||||
- **@0xDevNinja** — curator `restore_skill` nested-archive fix (#17951)
|
||||
- **@y0shua1ee** — curator `use` activity fix (#17953)
|
||||
|
||||
### Also contributing
|
||||
Salvaged or co-authored work from **@isaachuangGMICLOUD** (GMI Cloud), earlier upstream PRs from the original author of each salvage chain, and a long tail of one-shot fixes, documentation nudges, and skill contributions from the community.
|
||||
|
||||
### All Contributors (alphabetical, excluding @teknium1)
|
||||
|
||||
@0xbyt4, @0xharryriddle, @0xDevNinja, @0z1-ghb, @5park1e, @A-FdL-Prog, @aj-nt, @akhater, @alblez, @alexg0bot,
|
||||
@alexzhu0, @AllardQuek, @alt-glitch, @amanning3390, @amanuel2, @AndreKurait, @andrewhosf, @Andy283, @andyylin,
|
||||
@angel12, @AntAISecurityLab, @ash, @austinpickett, @badgerbees, @BadTechBandit, @Bartok9, @beenherebefore,
|
||||
@beesrsj2500, @BeliefanX, @benbarclay, @benjaminsehl, @BlackishGreen33, @bloodcarter, @BlueBirdBack,
|
||||
@briandevans, @brooklynnicholson, @bsgdigital, @buray, @bwjoke, @camaragon, @cdanis, @cgarwood82,
|
||||
@charles-brooks, @chen1749144759, @chengoak, @ching-kaching, @Contentment003111, @crayfish-ai, @CruxExperts,
|
||||
@cyclingwithelephants, @dandaka, @danklynn, @ddupont808, @dhabibi, @difujia, @dimitrovi, @dlkakbs,
|
||||
@dontcallmejames, @EKKOLearnAI, @emozilla, @ericnicolaides, @Erosika, @ethernet8023, @exiao, @Feranmi10,
|
||||
@flobo3, @foxion37, @georgeglessner, @georgex8001, @ghostmfr, @H-Ali13381, @HangGlidersRule, @harryplusplus,
|
||||
@haru398801, @heathley, @hejuntt1014, @hekaru-agent, @helix4u, @Heltman, @HenkDz, @heyitsaamir, @hharry11,
|
||||
@hhhonzik, @hhuang91, @HiddenPuppy, @htsh, @iamagenius00, @in-liberty420, @innocarpe, @irispillars, @iRonin,
|
||||
@isaachuangGMICLOUD, @Ito-69, @j3ffffff, @jackjin1997, @jakubkrcmar, @Jason2031, @JayGwod, @jerome-benoit,
|
||||
@johnncenae, @Kailigithub, @keiravoss94, @kevin-ho, @knockyai, @konsisumer, @kshitijk4poor, @kunlabs, @l0hde,
|
||||
@Leihb, @leoneparise, @LeonSGP43, @liizfq, @liuhao1024, @loongzhao, @lsdsjy, @luyao618, @ma-pony, @Magaav,
|
||||
@MagicRay1217, @math0r-be, @MattMaximo, @maxims-oss, @MaxyMoos, @maymuneth, @mcndjxlefnd, @memosr,
|
||||
@MestreY0d4-Uninter, @mewwts, @Mirac1eSky, @MorAlekss, @mrhwick, @mrunmayee17, @mssteuer, @Nanako0129,
|
||||
@nazirulhafiy, @Nerijusas, @Nicecsh, @nicoloboschi, @nightq, @ningfangbin, @octo-patch, @Octopus,
|
||||
@OutThisLife, @Paperclip, @pein892, @perlowja, @prasadus92, @qike-ms, @qiyin-code, @Readon, @ReginaldasR,
|
||||
@revaraver, @rfilgueiras, @rmoen, @romanornr, @rugvedS07, @rylena, @samrusani, @Sanjays2402, @sasha-id,
|
||||
@Satoshi-agi, @scheidti, @scotttrinh, @season179, @SeeYangZhi, @sgaofen, @shamork, @shannonsands, @SHL0MS,
|
||||
@simbam99, @Societus, @socrates1024, @Sonoyunchu, @sprmn24, @stephenschoettler, @tangyuanjc, @TechPrototyper,
|
||||
@tekgnosis-net, @ThomassJonax, @tmimmanuel, @tochukwuada, @Tosko4, @Tranquil-Flow, @twozle, @txbxxx,
|
||||
@UgwujaGeorge, @Versun, @vlwkaos, @voidborne-d, @vominh1919, @Wang-tianhao, @Wangshengyang2004, @web3blind,
|
||||
@westers, @Wysie, @xandersbell, @xiahu88988, @XieNBi, @xinbenlv, @xnbi, @y0shua1ee, @yatesjalex, @yes999zc,
|
||||
@yeyitech, @Yoimex, @YueLich, @Yukipukii1, @zhiyanliu, @zicochaos, @Zjianru, @zkl2333, @zons-zhaozhy,
|
||||
@ztexydt-cqh.
|
||||
|
||||
Also: @Siddharth Balyan, @YuShu.
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.23...v2026.4.30](https://github.com/NousResearch/hermes-agent/compare/v2026.4.23...v2026.4.30)
|
||||
@@ -112,17 +112,6 @@ def main() -> None:
|
||||
import acp
|
||||
from .server import HermesACPAgent
|
||||
|
||||
# MCP tool discovery from config.yaml — run before asyncio.run() so
|
||||
# it's safe to use blocking waits. (ACP also registers per-session
|
||||
# MCP servers dynamically via asyncio.to_thread inside the event
|
||||
# loop; that path is unaffected.) Moved from model_tools.py module
|
||||
# scope to avoid freezing the gateway's loop on lazy import (#16856).
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
|
||||
|
||||
+10
-181
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
@@ -13,7 +12,6 @@ from typing import Any, Deque, Optional
|
||||
import acp
|
||||
from acp.schema import (
|
||||
AgentCapabilities,
|
||||
AgentMessageChunk,
|
||||
AuthenticateResponse,
|
||||
AvailableCommand,
|
||||
AvailableCommandsUpdate,
|
||||
@@ -31,7 +29,6 @@ from acp.schema import (
|
||||
McpServerStdio,
|
||||
ModelInfo,
|
||||
NewSessionResponse,
|
||||
PromptCapabilities,
|
||||
PromptResponse,
|
||||
ResumeSessionResponse,
|
||||
SetSessionConfigOptionResponse,
|
||||
@@ -47,7 +44,6 @@ from acp.schema import (
|
||||
TextContentBlock,
|
||||
UnstructuredCommandInput,
|
||||
Usage,
|
||||
UserMessageChunk,
|
||||
)
|
||||
|
||||
# AuthMethodAgent was renamed from AuthMethod in agent-client-protocol 0.9.0
|
||||
@@ -64,7 +60,7 @@ from acp_adapter.events import (
|
||||
make_tool_progress_cb,
|
||||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
|
||||
from acp_adapter.session import SessionManager, SessionState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -91,69 +87,17 @@ def _extract_text(
|
||||
| EmbeddedResourceContentBlock
|
||||
],
|
||||
) -> str:
|
||||
"""Extract plain text from ACP content blocks for display/commands."""
|
||||
"""Extract plain text from ACP content blocks."""
|
||||
parts: list[str] = []
|
||||
for block in prompt:
|
||||
if isinstance(block, TextContentBlock):
|
||||
parts.append(block.text)
|
||||
elif hasattr(block, "text"):
|
||||
parts.append(str(block.text))
|
||||
# Non-text blocks are ignored for now.
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _image_block_to_openai_part(block: ImageContentBlock) -> dict[str, Any] | None:
|
||||
"""Convert an ACP image content block to OpenAI-style multimodal content."""
|
||||
data = str(getattr(block, "data", "") or "").strip()
|
||||
uri = str(getattr(block, "uri", "") or "").strip()
|
||||
mime_type = str(getattr(block, "mime_type", "") or "image/png").strip() or "image/png"
|
||||
|
||||
if data:
|
||||
url = data if data.startswith("data:") else f"data:{mime_type};base64,{data}"
|
||||
elif uri:
|
||||
url = uri
|
||||
else:
|
||||
return None
|
||||
|
||||
return {"type": "image_url", "image_url": {"url": url}}
|
||||
|
||||
|
||||
def _content_blocks_to_openai_user_content(
|
||||
prompt: list[
|
||||
TextContentBlock
|
||||
| ImageContentBlock
|
||||
| AudioContentBlock
|
||||
| ResourceContentBlock
|
||||
| EmbeddedResourceContentBlock
|
||||
],
|
||||
) -> str | list[dict[str, Any]]:
|
||||
"""Convert ACP prompt blocks into a Hermes/OpenAI-compatible user content payload."""
|
||||
parts: list[dict[str, Any]] = []
|
||||
text_parts: list[str] = []
|
||||
|
||||
for block in prompt:
|
||||
if isinstance(block, TextContentBlock):
|
||||
if block.text:
|
||||
parts.append({"type": "text", "text": block.text})
|
||||
text_parts.append(block.text)
|
||||
continue
|
||||
if isinstance(block, ImageContentBlock):
|
||||
image_part = _image_block_to_openai_part(block)
|
||||
if image_part is not None:
|
||||
parts.append(image_part)
|
||||
continue
|
||||
|
||||
if not parts:
|
||||
return _extract_text(prompt)
|
||||
|
||||
# Keep pure text prompts as strings so slash-command handling and text-only
|
||||
# providers keep the exact legacy path. Switch to structured content only
|
||||
# when an actual non-text block is present.
|
||||
if all(part.get("type") == "text" for part in parts):
|
||||
return "\n".join(text_parts)
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
class HermesACPAgent(acp.Agent):
|
||||
"""ACP Agent implementation wrapping Hermes AIAgent."""
|
||||
|
||||
@@ -343,11 +287,7 @@ class HermesACPAgent(acp.Agent):
|
||||
try:
|
||||
from model_tools import get_tool_definitions
|
||||
|
||||
enabled_toolsets = _expand_acp_enabled_toolsets(
|
||||
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
|
||||
mcp_server_names=[server.name for server in mcp_servers],
|
||||
)
|
||||
state.agent.enabled_toolsets = enabled_toolsets
|
||||
enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
|
||||
state.agent.tools = get_tool_definitions(
|
||||
enabled_toolsets=enabled_toolsets,
|
||||
@@ -407,7 +347,6 @@ class HermesACPAgent(acp.Agent):
|
||||
agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
|
||||
agent_capabilities=AgentCapabilities(
|
||||
load_session=True,
|
||||
prompt_capabilities=PromptCapabilities(image=True),
|
||||
session_capabilities=SessionCapabilities(
|
||||
fork=SessionForkCapabilities(),
|
||||
list=SessionListCapabilities(),
|
||||
@@ -433,78 +372,6 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
# ---- Session management -------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _history_message_text(message: dict[str, Any]) -> str:
|
||||
"""Extract displayable text from a persisted OpenAI-style message."""
|
||||
content = message.get("content")
|
||||
if isinstance(content, str):
|
||||
return content.strip()
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
elif item.get("type") == "text" and isinstance(item.get("content"), str):
|
||||
parts.append(item["content"])
|
||||
elif isinstance(item, str):
|
||||
parts.append(item)
|
||||
return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _history_message_update(
|
||||
*,
|
||||
role: str,
|
||||
text: str,
|
||||
) -> UserMessageChunk | AgentMessageChunk | None:
|
||||
"""Build an ACP history replay update for a user/assistant message."""
|
||||
block = TextContentBlock(type="text", text=text)
|
||||
if role == "user":
|
||||
return UserMessageChunk(
|
||||
session_update="user_message_chunk",
|
||||
content=block,
|
||||
)
|
||||
if role == "assistant":
|
||||
return AgentMessageChunk(
|
||||
session_update="agent_message_chunk",
|
||||
content=block,
|
||||
)
|
||||
return None
|
||||
|
||||
async def _replay_session_history(self, state: SessionState) -> None:
|
||||
"""Send persisted user/assistant history to clients during session/load.
|
||||
|
||||
Zed's ACP history UI calls ``session/load`` after the user picks an item
|
||||
from the Agents sidebar. The agent must then replay the full conversation
|
||||
as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
|
||||
restoring server-side state makes Hermes remember context, but leaves the
|
||||
editor looking like a clean thread.
|
||||
"""
|
||||
if not self._conn or not state.history:
|
||||
return
|
||||
|
||||
for message in state.history:
|
||||
role = str(message.get("role") or "")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
text = self._history_message_text(message)
|
||||
if not text:
|
||||
continue
|
||||
update = self._history_message_update(role=role, text=text)
|
||||
if update is None:
|
||||
continue
|
||||
try:
|
||||
await self._conn.session_update(session_id=state.session_id, update=update)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to replay ACP history for session %s",
|
||||
state.session_id,
|
||||
exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
async def new_session(
|
||||
self,
|
||||
cwd: str,
|
||||
@@ -533,7 +400,6 @@ class HermesACPAgent(acp.Agent):
|
||||
return None
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Loaded session %s", session_id)
|
||||
await self._replay_session_history(state)
|
||||
self._schedule_available_commands_update(session_id)
|
||||
return LoadSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
@@ -550,7 +416,6 @@ class HermesACPAgent(acp.Agent):
|
||||
state = self.session_manager.create_session(cwd=cwd)
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Resumed session %s", state.session_id)
|
||||
await self._replay_session_history(state)
|
||||
self._schedule_available_commands_update(state.session_id)
|
||||
return ResumeSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
@@ -647,18 +512,11 @@ class HermesACPAgent(acp.Agent):
|
||||
return PromptResponse(stop_reason="refusal")
|
||||
|
||||
user_text = _extract_text(prompt).strip()
|
||||
user_content = _content_blocks_to_openai_user_content(prompt)
|
||||
has_content = bool(user_text) or (
|
||||
isinstance(user_content, list) and bool(user_content)
|
||||
)
|
||||
if not has_content:
|
||||
if not user_text:
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
# Intercept slash commands — handle locally without calling the LLM.
|
||||
# Slash commands are text-only; if the client included images/resources,
|
||||
# send the whole multimodal prompt to the agent instead of treating it as
|
||||
# an ACP command.
|
||||
if isinstance(user_content, str) and user_text.startswith("/"):
|
||||
# Intercept slash commands — handle locally without calling the LLM
|
||||
if user_text.startswith("/"):
|
||||
response_text = self._handle_slash_command(user_text, state)
|
||||
if response_text is not None:
|
||||
if self._conn:
|
||||
@@ -712,22 +570,6 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
def _run_agent() -> dict:
|
||||
nonlocal previous_approval_cb, previous_interactive
|
||||
# Bind HERMES_SESSION_KEY for this session so per-session caches
|
||||
# (e.g. the interactive sudo password cache in tools.terminal_tool)
|
||||
# scope to the ACP session rather than leaking across sessions
|
||||
# that land on the same reused executor thread. This call runs
|
||||
# inside a contextvars.copy_context() below, so the ContextVar
|
||||
# write is isolated from other concurrent ACP sessions.
|
||||
try:
|
||||
from gateway.session_context import (
|
||||
clear_session_vars,
|
||||
set_session_vars,
|
||||
)
|
||||
session_tokens = set_session_vars(session_key=session_id)
|
||||
except Exception:
|
||||
session_tokens = None
|
||||
clear_session_vars = None # type: ignore[assignment]
|
||||
logger.debug("Could not set ACP session context", exc_info=True)
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
@@ -741,10 +583,9 @@ class HermesACPAgent(acp.Agent):
|
||||
os.environ["HERMES_INTERACTIVE"] = "1"
|
||||
try:
|
||||
result = agent.run_conversation(
|
||||
user_message=user_content,
|
||||
user_message=user_text,
|
||||
conversation_history=state.history,
|
||||
task_id=session_id,
|
||||
persist_user_message=user_text or "[Image attachment]",
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -762,19 +603,9 @@ class HermesACPAgent(acp.Agent):
|
||||
_terminal_tool.set_approval_callback(previous_approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not restore approval callback", exc_info=True)
|
||||
if session_tokens is not None and clear_session_vars is not None:
|
||||
try:
|
||||
clear_session_vars(session_tokens)
|
||||
except Exception:
|
||||
logger.debug("Could not clear ACP session context", exc_info=True)
|
||||
|
||||
try:
|
||||
# Wrap the executor call in a fresh copy of the current context so
|
||||
# concurrent ACP sessions on the shared ThreadPoolExecutor don't
|
||||
# stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
|
||||
# particular — used by the interactive sudo password cache scope).
|
||||
ctx = contextvars.copy_context()
|
||||
result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
|
||||
result = await loop.run_in_executor(_executor, _run_agent)
|
||||
except Exception:
|
||||
logger.exception("Executor error for session %s", session_id)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
@@ -923,9 +754,7 @@ class HermesACPAgent(acp.Agent):
|
||||
def _cmd_tools(self, args: str, state: SessionState) -> str:
|
||||
try:
|
||||
from model_tools import get_tool_definitions
|
||||
toolsets = _expand_acp_enabled_toolsets(
|
||||
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
)
|
||||
toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
|
||||
if not tools:
|
||||
return "No tools available."
|
||||
|
||||
+1
-28
@@ -106,24 +106,6 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
|
||||
logger.debug("Failed to register ACP task cwd override", exc_info=True)
|
||||
|
||||
|
||||
def _expand_acp_enabled_toolsets(
|
||||
toolsets: List[str] | None = None,
|
||||
mcp_server_names: List[str] | None = None,
|
||||
) -> List[str]:
|
||||
"""Return ACP toolsets plus explicit MCP server toolsets for this session."""
|
||||
expanded: List[str] = []
|
||||
for name in list(toolsets or ["hermes-acp"]):
|
||||
if name and name not in expanded:
|
||||
expanded.append(name)
|
||||
|
||||
for server_name in list(mcp_server_names or []):
|
||||
toolset_name = f"mcp-{server_name}"
|
||||
if server_name and toolset_name not in expanded:
|
||||
expanded.append(toolset_name)
|
||||
|
||||
return expanded
|
||||
|
||||
|
||||
def _clear_task_cwd(task_id: str) -> None:
|
||||
"""Remove task-specific cwd overrides for an ACP session."""
|
||||
if not task_id:
|
||||
@@ -555,18 +537,9 @@ class SessionManager:
|
||||
elif isinstance(model_cfg, str) and model_cfg.strip():
|
||||
default_model = model_cfg.strip()
|
||||
|
||||
configured_mcp_servers = [
|
||||
name
|
||||
for name, cfg in (config.get("mcp_servers") or {}).items()
|
||||
if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
|
||||
]
|
||||
|
||||
kwargs = {
|
||||
"platform": "acp",
|
||||
"enabled_toolsets": _expand_acp_enabled_toolsets(
|
||||
["hermes-acp"],
|
||||
mcp_server_names=configured_mcp_servers,
|
||||
),
|
||||
"enabled_toolsets": ["hermes-acp"],
|
||||
"quiet_mode": True,
|
||||
"session_id": session_id,
|
||||
"model": model or default_model,
|
||||
|
||||
+102
-355
@@ -14,33 +14,17 @@ import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from utils import base_url_host_matches, normalize_proxy_env_vars
|
||||
from utils import normalize_proxy_env_vars
|
||||
|
||||
# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
|
||||
# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
|
||||
# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
|
||||
# read_claude_code_credentials_from_keychain) are all on cold user-triggered
|
||||
# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
|
||||
# the module after the first call and returns None on ImportError.
|
||||
_anthropic_sdk: Any = ... # sentinel — None means "tried and missing"
|
||||
|
||||
|
||||
def _get_anthropic_sdk():
|
||||
"""Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
|
||||
global _anthropic_sdk
|
||||
if _anthropic_sdk is ...:
|
||||
try:
|
||||
import anthropic as _sdk
|
||||
_anthropic_sdk = _sdk
|
||||
except ImportError:
|
||||
_anthropic_sdk = None
|
||||
return _anthropic_sdk
|
||||
try:
|
||||
import anthropic as _anthropic_sdk
|
||||
except ImportError:
|
||||
_anthropic_sdk = None # type: ignore[assignment]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -217,33 +201,19 @@ def _forbids_sampling_params(model: str) -> bool:
|
||||
|
||||
|
||||
# Beta headers for enhanced features (sent with ALL auth types).
|
||||
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
|
||||
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
|
||||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||||
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
|
||||
# that still gate on the headers continue to get the enhanced features.
|
||||
#
|
||||
# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
|
||||
# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
|
||||
# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
|
||||
# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
|
||||
# at 200K even though model_metadata.py advertises 1M. The header is a harmless
|
||||
# no-op on endpoints where 1M is GA.
|
||||
#
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models or once
|
||||
# Bedrock/Azure promote 1M to GA.
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models.
|
||||
_COMMON_BETAS = [
|
||||
"interleaved-thinking-2025-05-14",
|
||||
"fine-grained-tool-streaming-2025-05-14",
|
||||
"context-1m-2025-08-07",
|
||||
]
|
||||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||||
# fall back to the provider's default response path.
|
||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||
# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
|
||||
# Bearer-auth (MiniMax) endpoints since they host their own models and
|
||||
# unknown Anthropic beta headers risk request rejection.
|
||||
_CONTEXT_1M_BETA = "context-1m-2025-08-07"
|
||||
|
||||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||||
@@ -308,9 +278,8 @@ def _is_oauth_token(key: str) -> bool:
|
||||
Positively identifies Anthropic OAuth tokens by their key format:
|
||||
- ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
|
||||
- ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
|
||||
- ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)
|
||||
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
|
||||
and correctly return False.
|
||||
"""
|
||||
if not key:
|
||||
@@ -324,9 +293,6 @@ def _is_oauth_token(key: str) -> bool:
|
||||
# JWTs from Anthropic OAuth flow
|
||||
if key.startswith("eyJ"):
|
||||
return True
|
||||
# Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
|
||||
if key.startswith("cc-"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -365,88 +331,6 @@ def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
|
||||
return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
|
||||
|
||||
|
||||
# Model-name prefixes that identify the Kimi / Moonshot family. Covers
|
||||
# - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
|
||||
# - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
|
||||
# Matched case-insensitively against the post-``normalize_model_name`` form,
|
||||
# so a caller's ``provider/vendor/model`` slug is handled the same as a
|
||||
# bare name.
|
||||
_KIMI_FAMILY_MODEL_PREFIXES = (
|
||||
"kimi-", "kimi_",
|
||||
"moonshot-", "moonshot_",
|
||||
"k1.", "k1-",
|
||||
"k2.", "k2-",
|
||||
"k25", "k2.5",
|
||||
)
|
||||
|
||||
|
||||
def _model_name_is_kimi_family(model: str | None) -> bool:
|
||||
if not isinstance(model, str):
|
||||
return False
|
||||
m = model.strip().lower()
|
||||
if not m:
|
||||
return False
|
||||
# Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
|
||||
if "/" in m:
|
||||
m = m.rsplit("/", 1)[-1]
|
||||
return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
|
||||
|
||||
|
||||
def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
|
||||
"""Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
|
||||
|
||||
Broader than ``_is_kimi_coding_endpoint`` — matches:
|
||||
|
||||
- Kimi's official ``/coding`` URL (legacy check, preserved)
|
||||
- Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
|
||||
- Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
|
||||
family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …). Users with
|
||||
``api_mode: anthropic_messages`` on a private gateway fronting Kimi
|
||||
fall into this branch — the upstream still enforces Kimi's thinking
|
||||
semantics (reasoning_content required on every replayed tool-call
|
||||
message) regardless of the gateway's hostname.
|
||||
|
||||
Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
|
||||
preserve unsigned reasoning_content-derived thinking blocks on replay.
|
||||
See hermes-agent#13848, #17057.
|
||||
"""
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
return True
|
||||
for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
|
||||
if base_url_host_matches(base_url or "", _domain):
|
||||
return True
|
||||
if _model_name_is_kimi_family(model):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
"""Return True for DeepSeek's Anthropic-compatible endpoint.
|
||||
|
||||
DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
|
||||
but, when thinking mode is enabled, requires the ``thinking`` blocks
|
||||
from prior assistant turns to round-trip on subsequent requests — the
|
||||
generic third-party path strips them and triggers HTTP 400::
|
||||
|
||||
The content[].thinking in the thinking mode must be passed back
|
||||
to the API.
|
||||
|
||||
Per DeepSeek's published compatibility matrix the blocks are unsigned
|
||||
(no Anthropic-proprietary signature, no ``redacted_thinking`` support),
|
||||
so this endpoint is handled with the same strip-signed / keep-unsigned
|
||||
policy used for Kimi's ``/coding`` endpoint. The match is pinned to
|
||||
the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
|
||||
base URL (which never reaches this adapter) is not misclassified.
|
||||
See hermes-agent#16748.
|
||||
"""
|
||||
if not base_url_host_matches(base_url or "", "api.deepseek.com"):
|
||||
return False
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
return "/anthropic" in normalized.rstrip("/").lower()
|
||||
|
||||
|
||||
def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
"""Return True for Anthropic-compatible providers that require Bearer auth.
|
||||
|
||||
@@ -461,45 +345,20 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||
|
||||
|
||||
def _common_betas_for_base_url(
|
||||
base_url: str | None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
) -> list[str]:
|
||||
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
"""Return the beta headers that are safe for the configured endpoint.
|
||||
|
||||
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
||||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||||
tool-use message triggers a connection error. Strip that beta for
|
||||
Bearer-auth endpoints while keeping all other betas intact.
|
||||
|
||||
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
|
||||
endpoints — MiniMax hosts its own models, not Claude, so the header is
|
||||
irrelevant at best and risks request rejection at worst.
|
||||
|
||||
``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
|
||||
otherwise-unrelated endpoints. The OAuth retry path flips this flag after
|
||||
a subscription rejects the beta with
|
||||
"The long context beta is not yet available for this subscription" so
|
||||
subsequent requests in the same session don't repeat the probe. See the
|
||||
reactive recovery loop in ``run_agent.py`` and issue-comment history on
|
||||
PR #17680 for the full rationale.
|
||||
"""
|
||||
if _requires_bearer_auth(base_url):
|
||||
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
|
||||
return [b for b in _COMMON_BETAS if b not in _stripped]
|
||||
if drop_context_1m_beta:
|
||||
return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
|
||||
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
def build_anthropic_client(
|
||||
api_key: str,
|
||||
base_url: str = None,
|
||||
timeout: float = None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
):
|
||||
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
|
||||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||||
|
||||
If *timeout* is provided it overrides the default 900s read timeout. The
|
||||
@@ -508,15 +367,8 @@ def build_anthropic_client(
|
||||
Anthropic-compatible providers respect the same knob as OpenAI-wire
|
||||
providers.
|
||||
|
||||
``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
|
||||
client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
|
||||
path in ``run_agent.py`` when a subscription rejects the beta; leave at
|
||||
its default on fresh clients so 1M-capable subscriptions keep the
|
||||
capability.
|
||||
|
||||
Returns an anthropic.Anthropic instance.
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for the Anthropic provider. "
|
||||
@@ -533,20 +385,8 @@ def build_anthropic_client(
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
# Azure Anthropic endpoints require an ``api-version`` query parameter.
|
||||
# Pass it via default_query so the SDK appends it to every request URL
|
||||
# without corrupting the base_url (appending it directly produces
|
||||
# malformed paths like /anthropic?api-version=.../v1/messages).
|
||||
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
|
||||
if _is_azure_endpoint and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url.rstrip("/")
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(
|
||||
normalized_base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
)
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
|
||||
@@ -602,16 +442,8 @@ def build_anthropic_bedrock_client(region: str):
|
||||
Claude feature parity: prompt caching, thinking budgets, adaptive
|
||||
thinking, fast mode — features not available via the Converse API.
|
||||
|
||||
Attaches the common Anthropic beta headers as client-level defaults so
|
||||
that Bedrock-hosted Claude models get the same enhanced features as
|
||||
native Anthropic. The ``context-1m-2025-08-07`` beta in particular
|
||||
unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
|
||||
it, Bedrock caps these models at 200K even though the Anthropic API
|
||||
serves them with 1M natively.
|
||||
|
||||
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for the Bedrock provider. "
|
||||
@@ -627,73 +459,11 @@ def build_anthropic_bedrock_client(region: str):
|
||||
return _anthropic_sdk.AnthropicBedrock(
|
||||
aws_region=region,
|
||||
timeout=Timeout(timeout=900.0, connect=10.0),
|
||||
default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
|
||||
)
|
||||
|
||||
|
||||
def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||||
"""Read Claude Code OAuth credentials from the macOS Keychain.
|
||||
|
||||
Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
|
||||
service name "Claude Code-credentials" rather than (or in addition to)
|
||||
the JSON file at ~/.claude/.credentials.json.
|
||||
|
||||
The password field contains a JSON string with the same claudeAiOauth
|
||||
structure as the JSON file.
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
if platform.system() != "Darwin":
|
||||
return None
|
||||
|
||||
try:
|
||||
# Read the "Claude Code-credentials" generic password entry
|
||||
result = subprocess.run(
|
||||
["security", "find-generic-password",
|
||||
"-s", "Claude Code-credentials",
|
||||
"-w"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
logger.debug("Keychain: security command not available or timed out")
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
|
||||
return None
|
||||
|
||||
raw = result.stdout.strip()
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Keychain: credentials payload is not valid JSON")
|
||||
return None
|
||||
|
||||
oauth_data = data.get("claudeAiOauth")
|
||||
if oauth_data and isinstance(oauth_data, dict):
|
||||
access_token = oauth_data.get("accessToken", "")
|
||||
if access_token:
|
||||
return {
|
||||
"accessToken": access_token,
|
||||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||||
"source": "macos_keychain",
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
"""Read refreshable Claude Code OAuth credentials.
|
||||
|
||||
Checks two sources in order:
|
||||
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
|
||||
2. ~/.claude/.credentials.json file
|
||||
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
|
||||
|
||||
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
|
||||
subscription flow is OAuth/setup-token based with refreshable credentials,
|
||||
@@ -702,12 +472,6 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
# Try macOS Keychain first (covers Claude Code >=2.1.114)
|
||||
kc_creds = _read_claude_code_credentials_from_keychain()
|
||||
if kc_creds:
|
||||
return kc_creds
|
||||
|
||||
# Fall back to JSON file
|
||||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||||
if cred_path.exists():
|
||||
try:
|
||||
@@ -878,9 +642,7 @@ def _write_claude_code_credentials(
|
||||
existing["claudeAiOauth"] = oauth_data
|
||||
|
||||
cred_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
_tmp_cred = cred_path.with_suffix(".tmp")
|
||||
_tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||||
_tmp_cred.replace(cred_path)
|
||||
cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||||
# Restrict permissions (credentials file)
|
||||
cred_path.chmod(0o600)
|
||||
except (OSError, IOError) as e:
|
||||
@@ -1147,26 +909,6 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_bedrock_model_id(model: str) -> bool:
|
||||
"""Detect AWS Bedrock model IDs that use dots as namespace separators.
|
||||
|
||||
Bedrock model IDs come in two forms:
|
||||
- Bare: ``anthropic.claude-opus-4-7``
|
||||
- Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
|
||||
|
||||
In both cases the dots separate namespace components, not version
|
||||
numbers, and must be preserved verbatim for the Bedrock API.
|
||||
"""
|
||||
lower = model.lower()
|
||||
# Regional inference-profile prefixes
|
||||
if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
|
||||
return True
|
||||
# Bare Bedrock model IDs: provider.model-family
|
||||
if lower.startswith("anthropic."):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
"""Normalize a model name for the Anthropic API.
|
||||
|
||||
@@ -1174,25 +916,14 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
- Converts dots to hyphens in version numbers (OpenRouter uses dots,
|
||||
Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
|
||||
preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
|
||||
- Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
|
||||
regional inference profiles (``us.anthropic.claude-*``) whose dots
|
||||
are namespace separators, not version separators.
|
||||
"""
|
||||
lower = model.lower()
|
||||
if lower.startswith("anthropic/"):
|
||||
model = model[len("anthropic/"):]
|
||||
if not preserve_dots:
|
||||
# Bedrock model IDs use dots as namespace separators
|
||||
# (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
|
||||
# These must not be converted to hyphens. See issue #12295.
|
||||
if _is_bedrock_model_id(model):
|
||||
return model
|
||||
# Only convert dots to hyphens for Anthropic/Claude models.
|
||||
# Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
|
||||
# as part of their canonical names. See issue #17171.
|
||||
_lower = model.lower()
|
||||
if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
|
||||
model = model.replace(".", "-")
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
model = model.replace(".", "-")
|
||||
return model
|
||||
|
||||
|
||||
@@ -1209,33 +940,6 @@ def _sanitize_tool_id(tool_id: str) -> str:
|
||||
return sanitized or "tool_0"
|
||||
|
||||
|
||||
def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool schemas before sending them to Anthropic.
|
||||
|
||||
Anthropic's tool schema validator rejects nullable unions such as
|
||||
``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
|
||||
commonly emits for optional fields. Tool optionality is represented by
|
||||
the parent ``required`` array, so we delegate to the shared
|
||||
``strip_nullable_unions`` helper to collapse nullable unions to the
|
||||
non-null branch while preserving metadata like description/default.
|
||||
|
||||
``keep_nullable_hint=False`` because the Anthropic validator does not
|
||||
recognize the OpenAPI-style ``nullable: true`` extension and strict
|
||||
schema-to-grammar converters may reject unknown keywords.
|
||||
"""
|
||||
if not schema:
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
from tools.schema_sanitizer import strip_nullable_unions
|
||||
|
||||
normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
|
||||
if not isinstance(normalized, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
|
||||
normalized = {**normalized, "properties": {}}
|
||||
return normalized
|
||||
|
||||
|
||||
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
"""Convert OpenAI tool definitions to Anthropic format."""
|
||||
if not tools:
|
||||
@@ -1246,9 +950,7 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
result.append({
|
||||
"name": fn.get("name", ""),
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": _normalize_tool_input_schema(
|
||||
fn.get("parameters", {"type": "object", "properties": {}})
|
||||
),
|
||||
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
|
||||
})
|
||||
return result
|
||||
|
||||
@@ -1379,7 +1081,6 @@ def _convert_content_to_anthropic(content: Any) -> Any:
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
@@ -1391,12 +1092,6 @@ def convert_messages_to_anthropic(
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
@@ -1625,16 +1320,7 @@ def convert_messages_to_anthropic(
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
||||
_preserve_unsigned_thinking = (
|
||||
_is_kimi_family_endpoint(base_url, model)
|
||||
or _is_deepseek_anthropic_endpoint(base_url)
|
||||
)
|
||||
_is_kimi = _is_kimi_coding_endpoint(base_url)
|
||||
|
||||
last_assistant_idx = None
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
@@ -1646,22 +1332,22 @@ def convert_messages_to_anthropic(
|
||||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||||
continue
|
||||
|
||||
if _preserve_unsigned_thinking:
|
||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
||||
# thinking server-side and require unsigned thinking blocks on
|
||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
||||
# blocks (neither upstream can validate Anthropic signatures) but
|
||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
||||
if _is_kimi:
|
||||
# Kimi's /coding endpoint enables thinking server-side and
|
||||
# requires unsigned thinking blocks on replayed assistant
|
||||
# tool-call messages. Strip signed Anthropic blocks (Kimi
|
||||
# can't validate signatures) but preserve the unsigned ones
|
||||
# we synthesised from reasoning_content above.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Anthropic-signed block — upstream can't validate, strip
|
||||
# Anthropic-signed block — Kimi can't validate, strip
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: the upstream needs it for message-history validation.
|
||||
# keep it: Kimi needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
@@ -1718,7 +1404,6 @@ def build_anthropic_kwargs(
|
||||
context_length: Optional[int] = None,
|
||||
base_url: str | None = None,
|
||||
fast_mode: bool = False,
|
||||
drop_context_1m_beta: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create().
|
||||
|
||||
@@ -1758,9 +1443,7 @@ def build_anthropic_kwargs(
|
||||
Currently only supported on native Anthropic endpoints (not third-party
|
||||
compatible ones).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(
|
||||
messages, base_url=base_url, model=model
|
||||
)
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
@@ -1866,7 +1549,7 @@ def build_anthropic_kwargs(
|
||||
# silently hides reasoning text that Hermes surfaces in its CLI. We
|
||||
# request "summarized" so the reasoning blocks stay populated — matching
|
||||
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
|
||||
_is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
|
||||
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
|
||||
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||
@@ -1892,9 +1575,9 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Strip sampling params on 4.7+ ─────────────────────────────────
|
||||
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
|
||||
# Callers (auxiliary_client, etc.) may set these for older models;
|
||||
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
|
||||
# don't require coordinated edits everywhere.
|
||||
# Callers (auxiliary_client, flush_memories, etc.) may set these for
|
||||
# older models; drop them here as a safety net so upstream 4.6 → 4.7
|
||||
# migrations don't require coordinated edits everywhere.
|
||||
if _forbids_sampling_params(model):
|
||||
for _sampling_key in ("temperature", "top_p", "top_k"):
|
||||
kwargs.pop(_sampling_key, None)
|
||||
@@ -1907,10 +1590,7 @@ def build_anthropic_kwargs(
|
||||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(
|
||||
base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
))
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
if is_oauth:
|
||||
betas.extend(_OAUTH_ONLY_BETAS)
|
||||
betas.append(_FAST_MODE_BETA)
|
||||
@@ -1919,3 +1599,70 @@ def build_anthropic_kwargs(
|
||||
return kwargs
|
||||
|
||||
|
||||
def normalize_anthropic_response(
|
||||
response,
|
||||
strip_tool_prefix: bool = False,
|
||||
) -> Tuple[SimpleNamespace, str]:
|
||||
"""Normalize Anthropic response to match the shape expected by AIAgent.
|
||||
|
||||
Returns (assistant_message, finish_reason) where assistant_message has
|
||||
.content, .tool_calls, and .reasoning attributes.
|
||||
|
||||
When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
|
||||
added to tool names for OAuth Claude Code compatibility.
|
||||
"""
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
reasoning_details = []
|
||||
tool_calls = []
|
||||
|
||||
for block in response.content:
|
||||
if block.type == "text":
|
||||
text_parts.append(block.text)
|
||||
elif block.type == "thinking":
|
||||
reasoning_parts.append(block.thinking)
|
||||
block_dict = _to_plain_data(block)
|
||||
if isinstance(block_dict, dict):
|
||||
reasoning_details.append(block_dict)
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
|
||||
name = name[len(_MCP_TOOL_PREFIX):]
|
||||
tool_calls.append(
|
||||
SimpleNamespace(
|
||||
id=block.id,
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=name,
|
||||
arguments=json.dumps(block.input),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Map Anthropic stop_reason to OpenAI finish_reason.
|
||||
# Newer stop reasons added in Claude 4.5+ / 4.7:
|
||||
# - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
|
||||
# - model_context_window_exceeded: hit context limit (not max_tokens)
|
||||
# Both need distinct handling upstream — a refusal should surface to the
|
||||
# user with a clear message, and a context-window overflow should trigger
|
||||
# compression/truncation rather than be treated as normal end-of-turn.
|
||||
stop_reason_map = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"refusal": "content_filter",
|
||||
"model_context_window_exceeded": "length",
|
||||
}
|
||||
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
|
||||
|
||||
return (
|
||||
SimpleNamespace(
|
||||
content="\n".join(text_parts) if text_parts else None,
|
||||
tool_calls=tool_calls or None,
|
||||
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=reasoning_details or None,
|
||||
),
|
||||
finish_reason,
|
||||
)
|
||||
|
||||
+106
-844
File diff suppressed because it is too large
Load Diff
+5
-171
@@ -87,114 +87,6 @@ def reset_client_cache():
|
||||
_bedrock_control_client_cache.clear()
|
||||
|
||||
|
||||
def invalidate_runtime_client(region: str) -> bool:
|
||||
"""Evict the cached ``bedrock-runtime`` client for a single region.
|
||||
|
||||
Per-region counterpart to :func:`reset_client_cache`. Used by the converse
|
||||
call wrappers to discard clients whose underlying HTTP connection has
|
||||
gone stale, so the next call allocates a fresh client (with a fresh
|
||||
connection pool) instead of reusing a dead socket.
|
||||
|
||||
Returns True if a cached entry was evicted, False if the region was not
|
||||
cached.
|
||||
"""
|
||||
existed = region in _bedrock_runtime_client_cache
|
||||
_bedrock_runtime_client_cache.pop(region, None)
|
||||
return existed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale-connection detection
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# boto3 caches its HTTPS connection pool inside the client object. When a
|
||||
# pooled connection is killed out from under us (NAT timeout, VPN flap,
|
||||
# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
|
||||
# one of a handful of low-level exceptions — most commonly
|
||||
# ``botocore.exceptions.ConnectionClosedError`` or
|
||||
# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
|
||||
# ``assert`` in a couple of paths (connection pool state checks, chunked
|
||||
# response readers) which bubbles up as a bare ``AssertionError`` with an
|
||||
# empty ``str(exc)``.
|
||||
#
|
||||
# In all of these cases the client is the problem, not the request: retrying
|
||||
# with the same cached client reproduces the failure until the process
|
||||
# restarts. The fix is to evict the region's cached client so the next
|
||||
# attempt builds a new one.
|
||||
|
||||
_STALE_LIB_MODULE_PREFIXES = (
|
||||
"urllib3.",
|
||||
"botocore.",
|
||||
"boto3.",
|
||||
)
|
||||
|
||||
|
||||
def _traceback_frames_modules(exc: BaseException):
|
||||
"""Yield ``__name__``-style module strings for each frame in exc's traceback."""
|
||||
tb = getattr(exc, "__traceback__", None)
|
||||
while tb is not None:
|
||||
frame = tb.tb_frame
|
||||
module = frame.f_globals.get("__name__", "")
|
||||
yield module or ""
|
||||
tb = tb.tb_next
|
||||
|
||||
|
||||
def is_stale_connection_error(exc: BaseException) -> bool:
|
||||
"""Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
|
||||
|
||||
Matches:
|
||||
* ``botocore.exceptions.ConnectionError`` and subclasses
|
||||
(``ConnectionClosedError``, ``EndpointConnectionError``,
|
||||
``ReadTimeoutError``, ``ConnectTimeoutError``).
|
||||
* ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
|
||||
``ConnectionError`` (best-effort import — urllib3 is a transitive
|
||||
dependency of botocore so it is always available in practice).
|
||||
* Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
|
||||
or boto3. These are internal-invariant failures (typically triggered
|
||||
by corrupted connection-pool state after a dropped socket) and are
|
||||
recoverable by swapping the client.
|
||||
|
||||
Non-library ``AssertionError``s (from application code or tests) are
|
||||
intentionally not matched — only library-internal asserts signal stale
|
||||
connection state.
|
||||
"""
|
||||
# botocore: the canonical signal — HTTPClientError is the umbrella for
|
||||
# ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
|
||||
# ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
|
||||
# the same family via a different branch of the hierarchy.
|
||||
try:
|
||||
from botocore.exceptions import (
|
||||
ConnectionError as BotoConnectionError,
|
||||
HTTPClientError,
|
||||
)
|
||||
botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
|
||||
except ImportError: # pragma: no cover — botocore always present with boto3
|
||||
botocore_errors = ()
|
||||
if botocore_errors and isinstance(exc, botocore_errors):
|
||||
return True
|
||||
|
||||
# urllib3: low-level transport failures
|
||||
try:
|
||||
from urllib3.exceptions import (
|
||||
ProtocolError,
|
||||
NewConnectionError,
|
||||
ConnectionError as Urllib3ConnectionError,
|
||||
)
|
||||
urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
|
||||
except ImportError: # pragma: no cover
|
||||
urllib3_errors = ()
|
||||
if urllib3_errors and isinstance(exc, urllib3_errors):
|
||||
return True
|
||||
|
||||
# Library-internal AssertionError (urllib3 / botocore / boto3)
|
||||
if isinstance(exc, AssertionError):
|
||||
for module in _traceback_frames_modules(exc):
|
||||
if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AWS credential detection
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -291,52 +183,14 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
|
||||
def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
|
||||
"""Resolve the AWS region for Bedrock API calls.
|
||||
|
||||
Priority:
|
||||
1. AWS_REGION env var
|
||||
2. AWS_DEFAULT_REGION env var
|
||||
3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
|
||||
4. us-east-1 (hard fallback)
|
||||
|
||||
The boto3 fallback is critical for EU/AP users who configure their region
|
||||
in ~/.aws/config via a named profile rather than env vars — without it,
|
||||
live model discovery would always return us.* profile IDs regardless of
|
||||
the user's actual region.
|
||||
Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
|
||||
"""
|
||||
env = env if env is not None else os.environ
|
||||
explicit = (
|
||||
return (
|
||||
env.get("AWS_REGION", "").strip()
|
||||
or env.get("AWS_DEFAULT_REGION", "").strip()
|
||||
or "us-east-1"
|
||||
)
|
||||
if explicit:
|
||||
return explicit
|
||||
try:
|
||||
import botocore.session
|
||||
region = botocore.session.get_session().get_config_variable("region")
|
||||
if region:
|
||||
return region
|
||||
except Exception:
|
||||
pass
|
||||
return "us-east-1"
|
||||
|
||||
|
||||
def bedrock_model_ids_or_none() -> Optional[List[str]]:
|
||||
"""Live-discover Bedrock model IDs for the active region.
|
||||
|
||||
Returns a list of model ID strings if discovery succeeds and yields
|
||||
at least one model, or ``None`` on failure / empty result. Callers
|
||||
should fall back to the static curated list when ``None`` is returned.
|
||||
|
||||
This helper consolidates the discover → extract-ids → fallback
|
||||
pattern that was previously duplicated across ``provider_model_ids``,
|
||||
``list_authenticated_providers`` section 2, and section 3.
|
||||
"""
|
||||
try:
|
||||
discovered = discover_bedrock_models(resolve_bedrock_region())
|
||||
if discovered:
|
||||
return [m["id"] for m in discovered]
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -933,17 +787,7 @@ def call_converse(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
try:
|
||||
response = client.converse(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse(region=%s, model=%s): "
|
||||
"%s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
response = client.converse(**kwargs)
|
||||
return normalize_converse_response(response)
|
||||
|
||||
|
||||
@@ -975,17 +819,7 @@ def call_converse_stream(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
try:
|
||||
response = client.converse_stream(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse_stream(region=%s, "
|
||||
"model=%s): %s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
response = client.converse_stream(**kwargs)
|
||||
return normalize_converse_stream_events(response)
|
||||
|
||||
|
||||
|
||||
@@ -23,52 +23,26 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Matches Codex/Harmony tool-call serialization that occasionally leaks into
|
||||
# assistant-message content when the model fails to emit a structured
|
||||
# ``function_call`` item. Accepts the common forms:
|
||||
#
|
||||
# to=functions.exec_command
|
||||
# assistant to=functions.exec_command
|
||||
# <|channel|>commentary to=functions.exec_command
|
||||
#
|
||||
# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
|
||||
# Harmony channel prefix varies by degeneration mode. Case-insensitive to
|
||||
# cover lowercase/uppercase ``assistant`` variants.
|
||||
_TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
The ``role`` parameter controls the text content type:
|
||||
- ``"user"`` (default) → ``"input_text"``
|
||||
- ``"assistant"`` → ``"output_text"``
|
||||
|
||||
The Responses API rejects ``input_text`` inside assistant messages and
|
||||
``output_text`` inside user messages, so callers MUST pass the correct
|
||||
role for the message being converted.
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": text_type, "text": part})
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
@@ -76,7 +50,7 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": text_type, "text": text})
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
@@ -227,23 +201,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
|
||||
|
||||
|
||||
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
|
||||
"""Normalize a Responses assistant message status for replay.
|
||||
|
||||
The API accepts completed/incomplete/in_progress on replayed assistant
|
||||
output messages. Preserve those exactly (modulo case/hyphen spelling) so
|
||||
incomplete Codex continuation turns don't get falsely marked completed.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
status = value.strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if status in _RESPONSE_MESSAGE_STATUSES:
|
||||
return status
|
||||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
items: List[Dict[str, Any]] = []
|
||||
@@ -259,10 +216,9 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content, role=role)
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == text_type
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
@@ -289,57 +245,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
seen_item_ids.add(item_id)
|
||||
has_codex_reasoning = True
|
||||
|
||||
# Replay exact assistant message items (with id/phase) from
|
||||
# previous turns so the API can maintain prefix-cache hits.
|
||||
# OpenAI docs: "preserve and resend phase on all assistant
|
||||
# messages — dropping it can degrade performance."
|
||||
codex_message_items = msg.get("codex_message_items")
|
||||
replayed_message_items = 0
|
||||
if isinstance(codex_message_items, list):
|
||||
for raw_item in codex_message_items:
|
||||
if not isinstance(raw_item, dict):
|
||||
continue
|
||||
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
|
||||
continue
|
||||
raw_content_parts = raw_item.get("content")
|
||||
if not isinstance(raw_content_parts, list):
|
||||
continue
|
||||
|
||||
normalized_content_parts = []
|
||||
for part in raw_content_parts:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
part_type = str(part.get("type") or "").strip()
|
||||
if part_type not in {"output_text", "text"}:
|
||||
continue
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content_parts.append({"type": "output_text", "text": text})
|
||||
|
||||
if not normalized_content_parts:
|
||||
continue
|
||||
|
||||
replay_item = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(raw_item.get("status")),
|
||||
"content": normalized_content_parts,
|
||||
}
|
||||
item_id = raw_item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
replay_item["id"] = item_id.strip()
|
||||
phase = raw_item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
replay_item["phase"] = phase.strip()
|
||||
items.append(replay_item)
|
||||
replayed_message_items += 1
|
||||
|
||||
if replayed_message_items > 0:
|
||||
pass
|
||||
elif content_parts:
|
||||
if content_parts:
|
||||
items.append({"role": "assistant", "content": content_parts})
|
||||
elif content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
@@ -499,47 +405,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
normalized.append(reasoning_item)
|
||||
continue
|
||||
|
||||
if item_type == "message":
|
||||
role = item.get("role")
|
||||
if role != "assistant":
|
||||
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
|
||||
content = item.get("content")
|
||||
if not isinstance(content, list):
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
|
||||
normalized_content = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
|
||||
)
|
||||
part_type = part.get("type")
|
||||
if part_type not in {"output_text", "text"}:
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
|
||||
)
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content.append({"type": "output_text", "text": text})
|
||||
if not normalized_content:
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
|
||||
normalized_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item.get("status")),
|
||||
"content": normalized_content,
|
||||
}
|
||||
item_id = item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
normalized_item["id"] = item_id.strip()
|
||||
phase = item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
normalized_item["phase"] = phase.strip()
|
||||
normalized.append(normalized_item)
|
||||
continue
|
||||
|
||||
role = item.get("role")
|
||||
if role in {"user", "assistant"}:
|
||||
content = item.get("content", "")
|
||||
@@ -547,16 +412,13 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``output_text``
|
||||
# / ``input_image``). Validate each part and pass through.
|
||||
# Use the correct text type for the role — ``output_text`` for
|
||||
# assistant messages, ``input_text`` for user messages.
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": text_type, "text": part})
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
@@ -567,7 +429,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": text_type, "text": text})
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
@@ -824,7 +686,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
content_parts: List[str] = []
|
||||
reasoning_parts: List[str] = []
|
||||
reasoning_items_raw: List[Dict[str, Any]] = []
|
||||
message_items_raw: List[Dict[str, Any]] = []
|
||||
tool_calls: List[Any] = []
|
||||
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
||||
saw_commentary_phase = False
|
||||
@@ -843,7 +704,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if item_type == "message":
|
||||
item_phase = getattr(item, "phase", None)
|
||||
normalized_phase = None
|
||||
if isinstance(item_phase, str):
|
||||
normalized_phase = item_phase.strip().lower()
|
||||
if normalized_phase in {"commentary", "analysis"}:
|
||||
@@ -853,18 +713,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
message_text = _extract_responses_message_text(item)
|
||||
if message_text:
|
||||
content_parts.append(message_text)
|
||||
raw_message_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item_status),
|
||||
"content": [{"type": "output_text", "text": message_text}],
|
||||
}
|
||||
item_id = getattr(item, "id", None)
|
||||
if isinstance(item_id, str) and item_id:
|
||||
raw_message_item["id"] = item_id
|
||||
if normalized_phase:
|
||||
raw_message_item["phase"] = normalized_phase
|
||||
message_items_raw.append(raw_message_item)
|
||||
elif item_type == "reasoning":
|
||||
reasoning_text = _extract_responses_reasoning_text(item)
|
||||
if reasoning_text:
|
||||
@@ -939,37 +787,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
if isinstance(out_text, str):
|
||||
final_text = out_text.strip()
|
||||
|
||||
# ── Tool-call leak recovery ──────────────────────────────────
|
||||
# gpt-5.x on the Codex Responses API sometimes degenerates and emits
|
||||
# what should be a structured `function_call` item as plain assistant
|
||||
# text using the Harmony/Codex serialization (``to=functions.foo
|
||||
# {json}`` or ``assistant to=functions.foo {json}``). The model
|
||||
# intended to call a tool, but the intent never made it into
|
||||
# ``response.output`` as a ``function_call`` item, so ``tool_calls``
|
||||
# is empty here. If we pass this through, the parent sees a
|
||||
# confident-looking summary with no audit trail (empty ``tool_trace``)
|
||||
# and no tools actually ran — the Taiwan-embassy-email incident.
|
||||
#
|
||||
# Detection: leaked tokens always contain ``to=functions.<name>`` and
|
||||
# the assistant message has no real tool calls. Treat it as incomplete
|
||||
# so the existing Codex-incomplete continuation path (3 retries,
|
||||
# handled in run_agent.py) gets a chance to re-elicit a proper
|
||||
# ``function_call`` item. The existing loop already handles message
|
||||
# append, dedup, and retry budget.
|
||||
leaked_tool_call_text = False
|
||||
if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
|
||||
leaked_tool_call_text = True
|
||||
logger.warning(
|
||||
"Codex response contains leaked tool-call text in assistant content "
|
||||
"(no structured function_call items). Treating as incomplete so the "
|
||||
"continuation path can re-elicit a proper tool call. Leaked snippet: %r",
|
||||
final_text[:300],
|
||||
)
|
||||
# Clear the text so downstream code doesn't surface the garbage as
|
||||
# a summary. The encrypted reasoning items (if any) are preserved
|
||||
# so the model keeps its chain-of-thought on the retry.
|
||||
final_text = ""
|
||||
|
||||
assistant_message = SimpleNamespace(
|
||||
content=final_text,
|
||||
tool_calls=tool_calls,
|
||||
@@ -977,13 +794,10 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=reasoning_items_raw or None,
|
||||
codex_message_items=message_items_raw or None,
|
||||
)
|
||||
|
||||
if tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif leaked_tool_call_text:
|
||||
finish_reason = "incomplete"
|
||||
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
||||
finish_reason = "incomplete"
|
||||
elif reasoning_items_raw and not final_text:
|
||||
|
||||
+5
-143
@@ -61,52 +61,9 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
|
||||
|
||||
# Chars per token rough estimate
|
||||
_CHARS_PER_TOKEN = 4
|
||||
# Flat token cost per attached image part. Real cost varies by provider and
|
||||
# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
|
||||
# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
|
||||
# that keeps compression budgeting honest for multi-image conversations.
|
||||
# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
|
||||
_IMAGE_TOKEN_ESTIMATE = 1600
|
||||
# Same figure expressed in the char-budget currency the rest of the
|
||||
# compressor speaks in. Used when accumulating message "content length"
|
||||
# for tail-cut decisions.
|
||||
_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
def _content_length_for_budget(raw_content: Any) -> int:
|
||||
"""Return the effective char-length of a message's content for token budgeting.
|
||||
|
||||
Plain strings: ``len(content)``. Multimodal lists: sum of text-part
|
||||
``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
|
||||
(``image_url`` / ``input_image`` / Anthropic-style ``image``). This
|
||||
keeps the compressor from treating a turn with 5 attached images as
|
||||
near-zero tokens just because the text part is empty.
|
||||
"""
|
||||
if isinstance(raw_content, str):
|
||||
return len(raw_content)
|
||||
if not isinstance(raw_content, list):
|
||||
return len(str(raw_content or ""))
|
||||
|
||||
total = 0
|
||||
for p in raw_content:
|
||||
if isinstance(p, str):
|
||||
total += len(p)
|
||||
continue
|
||||
if not isinstance(p, dict):
|
||||
total += len(str(p))
|
||||
continue
|
||||
ptype = p.get("type")
|
||||
if ptype in {"image_url", "input_image", "image"}:
|
||||
total += _IMAGE_CHAR_EQUIVALENT
|
||||
else:
|
||||
# text / input_text / tool_result-with-text / anything else with
|
||||
# a text field. Ignore the raw base64 payload inside image_url
|
||||
# dicts — dimensions don't matter, only whether it's an image.
|
||||
total += len(p.get("text", "") or "")
|
||||
return total
|
||||
|
||||
|
||||
def _content_text_for_contains(content: Any) -> str:
|
||||
"""Return a best-effort text view of message content.
|
||||
|
||||
@@ -337,11 +294,6 @@ class ContextCompressor(ContextEngine):
|
||||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_summary_dropped_count = 0
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_aux_model_failure_error = None
|
||||
self._last_aux_model_failure_model = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
@@ -365,13 +317,6 @@ class ContextCompressor(ContextEngine):
|
||||
int(context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Recalculate token budgets for the new context length so the
|
||||
# compressor stays calibrated after a model switch (e.g. 200K → 32K).
|
||||
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
|
||||
self.tail_token_budget = target_tokens
|
||||
self.max_summary_tokens = min(
|
||||
int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -444,18 +389,6 @@ class ContextCompressor(ContextEngine):
|
||||
self._last_compression_savings_pct: float = 100.0
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
# When summary generation fails and a static fallback is inserted,
|
||||
# record how many turns were unrecoverably dropped so callers
|
||||
# (gateway hygiene, /compress) can surface a visible warning.
|
||||
self._last_summary_dropped_count: int = 0
|
||||
self._last_summary_fallback_used: bool = False
|
||||
# When a user-configured summary model fails and we recover by
|
||||
# retrying on the main model, record the failure so gateway /
|
||||
# CLI callers can still warn the user even though compression
|
||||
# succeeded. Silent recovery would hide the broken config.
|
||||
self._last_aux_model_failure_error: Optional[str] = None
|
||||
self._last_aux_model_failure_model: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -542,7 +475,7 @@ class ContextCompressor(ContextEngine):
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
msg = result[i]
|
||||
raw_content = msg.get("content") or ""
|
||||
content_len = _content_length_for_budget(raw_content)
|
||||
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
|
||||
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if isinstance(tc, dict):
|
||||
@@ -879,12 +812,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
@@ -915,57 +846,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
"Falling back to main model '%s' for compression.",
|
||||
self.summary_model, e, self.model,
|
||||
)
|
||||
# Record the aux-model failure so callers can warn the user
|
||||
# even if the retry-on-main succeeds — a misconfigured aux
|
||||
# model is something the user needs to fix.
|
||||
_err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(_err_text) > 220:
|
||||
_err_text = _err_text[:217].rstrip() + "..."
|
||||
self._last_aux_model_failure_error = _err_text
|
||||
self._last_aux_model_failure_model = self.summary_model
|
||||
self.summary_model = "" # empty = use main model
|
||||
self._summary_failure_cooldown_until = 0.0 # no cooldown
|
||||
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately
|
||||
|
||||
# Unknown-error best-effort retry on main model. Losing N turns of
|
||||
# context is almost always worse than one extra summary attempt, so
|
||||
# if we haven't already fallen back and the summary model differs
|
||||
# from the main model, try once more on main before entering
|
||||
# cooldown. Errors that DID match _is_model_not_found above are
|
||||
# already handled by the fast-path retry; this branch catches
|
||||
# everything else (400s, provider-specific "no route" strings,
|
||||
# aggregator rejections, etc.) where auto-retry is still safer
|
||||
# than dropping the turns.
|
||||
if (
|
||||
self.summary_model
|
||||
and self.summary_model != self.model
|
||||
and not getattr(self, "_summary_model_fallen_back", False)
|
||||
):
|
||||
self._summary_model_fallen_back = True
|
||||
logging.warning(
|
||||
"Summary model '%s' failed (%s). "
|
||||
"Retrying on main model '%s' before giving up.",
|
||||
self.summary_model, e, self.model,
|
||||
)
|
||||
# Record the aux-model failure (see 404 branch above) — user
|
||||
# should know their configured model is broken even if main
|
||||
# recovers the call.
|
||||
_err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(_err_text) > 220:
|
||||
_err_text = _err_text[:217].rstrip() + "..."
|
||||
self._last_aux_model_failure_error = _err_text
|
||||
self._last_aux_model_failure_model = self.summary_model
|
||||
self.summary_model = "" # empty = use main model
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
|
||||
|
||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
@@ -1180,9 +1067,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
|
||||
for i in range(n - 1, head_end - 1, -1):
|
||||
msg = messages[i]
|
||||
raw_content = msg.get("content") or ""
|
||||
content_len = _content_length_for_budget(raw_content)
|
||||
msg_tokens = content_len // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
|
||||
content = msg.get("content") or ""
|
||||
msg_tokens = len(content) // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
|
||||
# Include tool call arguments in estimate
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if isinstance(tc, dict):
|
||||
@@ -1213,21 +1099,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
|
||||
return max(cut_idx, head_end + 1)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# ContextEngine: manual /compress preflight
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Return True if there is a non-empty middle region to compact.
|
||||
|
||||
Overrides the ABC default so the gateway ``/compress`` guard can
|
||||
skip the LLM call when the transcript is still entirely inside
|
||||
the protected head/tail.
|
||||
"""
|
||||
compress_start = self._align_boundary_forward(messages, self.protect_first_n)
|
||||
compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
|
||||
return compress_start < compress_end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main compression entry point
|
||||
# ------------------------------------------------------------------
|
||||
@@ -1251,13 +1122,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
related to this topic and be more aggressive about compressing
|
||||
everything else. Inspired by Claude Code's ``/compact``.
|
||||
"""
|
||||
# Reset per-call summary failure state — callers inspect these fields
|
||||
# after compress() returns to decide whether to surface a warning.
|
||||
self._last_summary_dropped_count = 0
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_summary_error = None
|
||||
self._last_aux_model_failure_error = None
|
||||
self._last_aux_model_failure_model = None
|
||||
n_messages = len(messages)
|
||||
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||
_min_for_compress = self.protect_first_n + 3 + 1
|
||||
@@ -1336,13 +1200,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
if not self.quiet_mode:
|
||||
logger.warning("Summary generation failed — inserting static fallback context marker")
|
||||
n_dropped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = n_dropped
|
||||
self._last_summary_fallback_used = True
|
||||
summary = (
|
||||
f"{SUMMARY_PREFIX}\n"
|
||||
f"Summary generation was unavailable. {n_dropped} message(s) were "
|
||||
f"Summary generation was unavailable. {n_dropped} conversation turns were "
|
||||
f"removed to free context space but could not be summarized. The removed "
|
||||
f"messages contained earlier work in this session. Continue based on the "
|
||||
f"turns contained earlier work in this session. Continue based on the "
|
||||
f"recent messages below and the current state of any files or resources."
|
||||
)
|
||||
|
||||
|
||||
@@ -78,7 +78,6 @@ class ContextEngine(ABC):
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
current_tokens: int = None,
|
||||
focus_topic: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Compact the message list and return the new message list.
|
||||
|
||||
@@ -87,12 +86,6 @@ class ContextEngine(ABC):
|
||||
context budget. The implementation is free to summarize, build a
|
||||
DAG, or do anything else — as long as the returned list is a valid
|
||||
OpenAI-format message sequence.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional topic string from manual ``/compress <focus>``.
|
||||
Engines that support guided compression should prioritise
|
||||
preserving information related to this topic. Engines that
|
||||
don't support it may simply ignore this argument.
|
||||
"""
|
||||
|
||||
# -- Optional: pre-flight check ----------------------------------------
|
||||
@@ -105,21 +98,6 @@ class ContextEngine(ABC):
|
||||
"""
|
||||
return False
|
||||
|
||||
# -- Optional: manual /compress preflight ------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick check: is there anything in ``messages`` that can be compacted?
|
||||
|
||||
Used by the gateway ``/compress`` command as a preflight guard —
|
||||
returning False lets the gateway report "nothing to compress yet"
|
||||
without making an LLM call.
|
||||
|
||||
Default returns True (always attempt). Engines with a cheap way
|
||||
to introspect their own head/tail boundaries should override this
|
||||
to return False when the transcript is still entirely protected.
|
||||
"""
|
||||
return True
|
||||
|
||||
# -- Optional: session lifecycle ---------------------------------------
|
||||
|
||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||
|
||||
@@ -46,47 +46,6 @@ def _resolve_args() -> list[str]:
|
||||
return shlex.split(raw)
|
||||
|
||||
|
||||
def _resolve_home_dir() -> str:
|
||||
"""Return a stable HOME for child ACP processes."""
|
||||
|
||||
try:
|
||||
from hermes_constants import get_subprocess_home
|
||||
|
||||
profile_home = get_subprocess_home()
|
||||
if profile_home:
|
||||
return profile_home
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
home = os.environ.get("HOME", "").strip()
|
||||
if home:
|
||||
return home
|
||||
|
||||
expanded = os.path.expanduser("~")
|
||||
if expanded and expanded != "~":
|
||||
return expanded
|
||||
|
||||
try:
|
||||
import pwd
|
||||
|
||||
resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
|
||||
if resolved:
|
||||
return resolved
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Last resort: /tmp (writable on any POSIX system). Avoids crashing the
|
||||
# subprocess with no HOME; callers can set HERMES_HOME explicitly if they
|
||||
# need a different writable dir.
|
||||
return "/tmp"
|
||||
|
||||
|
||||
def _build_subprocess_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = _resolve_home_dir()
|
||||
return env
|
||||
|
||||
|
||||
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
@@ -423,7 +382,6 @@ class CopilotACPClient:
|
||||
text=True,
|
||||
bufsize=1,
|
||||
cwd=self._acp_cwd,
|
||||
env=_build_subprocess_env(),
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
@@ -608,7 +566,7 @@ class CopilotACPClient:
|
||||
end = start + limit if isinstance(limit, int) and limit > 0 else None
|
||||
content = "".join(lines[start:end])
|
||||
if content:
|
||||
content = redact_sensitive_text(content, force=True)
|
||||
content = redact_sensitive_text(content)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
|
||||
+7
-232
@@ -7,13 +7,13 @@ import random
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, fields, replace
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_cli.config import get_env_value
|
||||
import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
@@ -455,125 +455,6 @@ class CredentialPool:
|
||||
logger.debug("Failed to sync from credentials file: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Codex device_code pool entry from auth.json if tokens differ.
|
||||
|
||||
When a Codex OAuth access token expires (or the ChatGPT account hits
|
||||
its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
|
||||
with a ``last_error_reset_at`` that can be many hours in the future.
|
||||
Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
|
||||
performs a fresh device-code login and writes new tokens to
|
||||
``auth.json`` under ``_auth_store_lock``. Without this sync the pool
|
||||
entry stays frozen until ``last_error_reset_at`` elapses — even
|
||||
though fresh credentials are sitting on disk — and every request
|
||||
fails with "no available entries (all exhausted or empty)".
|
||||
|
||||
Mirrors the Nous/Anthropic resync paths above. Only applies to
|
||||
device_code-sourced entries; env/API-key-sourced entries have no
|
||||
auth.json shadow to sync from.
|
||||
"""
|
||||
if self.provider != "openai-codex" or entry.source != "device_code":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
if not isinstance(state, dict):
|
||||
return entry
|
||||
tokens = state.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return entry
|
||||
store_access = tokens.get("access_token", "")
|
||||
store_refresh = tokens.get("refresh_token", "")
|
||||
# Adopt auth.json tokens when either side differs. Codex refresh
|
||||
# tokens are single-use too, so a fresh refresh_token from
|
||||
# another process means our entry's pair is consumed/stale.
|
||||
entry_access = entry.access_token or ""
|
||||
entry_refresh = entry.refresh_token or ""
|
||||
if store_access and (
|
||||
store_access != entry_access
|
||||
or (store_refresh and store_refresh != entry_refresh)
|
||||
):
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing Codex tokens from auth.json "
|
||||
"(refreshed by another process)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh or entry.refresh_token,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
"last_error_reason": None,
|
||||
"last_error_message": None,
|
||||
"last_error_reset_at": None,
|
||||
}
|
||||
if state.get("last_refresh"):
|
||||
field_updates["last_refresh"] = state["last_refresh"]
|
||||
updated = replace(entry, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Nous pool entry from auth.json if tokens differ.
|
||||
|
||||
Nous OAuth refresh tokens are single-use. When another process
|
||||
(e.g. a concurrent cron) refreshes the token via
|
||||
``resolve_nous_runtime_credentials``, it writes fresh tokens to
|
||||
auth.json under ``_auth_store_lock``. The pool entry's tokens
|
||||
become stale. This method detects that and adopts the newer pair,
|
||||
avoiding a "refresh token reuse" revocation on the Nous Portal.
|
||||
"""
|
||||
if self.provider != "nous" or entry.source != "device_code":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if not state:
|
||||
return entry
|
||||
store_refresh = state.get("refresh_token", "")
|
||||
store_access = state.get("access_token", "")
|
||||
if store_refresh and store_refresh != entry.refresh_token:
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
}
|
||||
if state.get("expires_at"):
|
||||
field_updates["expires_at"] = state["expires_at"]
|
||||
if state.get("agent_key"):
|
||||
field_updates["agent_key"] = state["agent_key"]
|
||||
if state.get("agent_key_expires_at"):
|
||||
field_updates["agent_key_expires_at"] = state["agent_key_expires_at"]
|
||||
if state.get("inference_base_url"):
|
||||
field_updates["inference_base_url"] = state["inference_base_url"]
|
||||
extra_updates = dict(entry.extra)
|
||||
for extra_key in ("obtained_at", "expires_in", "agent_key_id",
|
||||
"agent_key_expires_in", "agent_key_reused",
|
||||
"agent_key_obtained_at"):
|
||||
val = state.get(extra_key)
|
||||
if val is not None:
|
||||
extra_updates[extra_key] = val
|
||||
updated = replace(entry, extra=extra_updates, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync Nous entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
|
||||
"""Write refreshed pool entry tokens back to auth.json providers.
|
||||
|
||||
@@ -680,9 +561,6 @@ class CredentialPool:
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
)
|
||||
elif self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
nous_state = {
|
||||
"access_token": entry.access_token,
|
||||
"refresh_token": entry.refresh_token,
|
||||
@@ -757,26 +635,6 @@ class CredentialPool:
|
||||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For nous: another process may have consumed the refresh token
|
||||
# between our proactive sync and the HTTP call. Re-sync from
|
||||
# auth.json and adopt the fresh tokens if available.
|
||||
if self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug("Nous refresh failed but auth.json has newer tokens — adopting")
|
||||
updated = replace(
|
||||
synced,
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
last_error_reason=None,
|
||||
last_error_message=None,
|
||||
last_error_reset_at=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
@@ -840,29 +698,6 @@ class CredentialPool:
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For nous entries, sync from auth.json before status checks.
|
||||
# Another process may have successfully refreshed via
|
||||
# resolve_nous_runtime_credentials(), making this entry's
|
||||
# exhausted status stale.
|
||||
if (self.provider == "nous"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For openai-codex entries, same pattern: the user may have
|
||||
# re-authed via `hermes model` / `hermes auth` after a 429/401,
|
||||
# leaving fresh tokens on disk while the pool entry is still
|
||||
# frozen behind last_error_reset_at (can be hours in the
|
||||
# future for ChatGPT weekly windows).
|
||||
if (self.provider == "openai-codex"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_codex_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
@@ -904,11 +739,8 @@ class CredentialPool:
|
||||
|
||||
if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
|
||||
entry = min(available, key=lambda e: e.request_count)
|
||||
# Increment usage counter so subsequent selections distribute load
|
||||
updated = replace(entry, request_count=entry.request_count + 1)
|
||||
self._replace_entry(entry, updated)
|
||||
self._current_id = entry.id
|
||||
return updated
|
||||
return entry
|
||||
|
||||
if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
|
||||
entry = available[0]
|
||||
@@ -1224,18 +1056,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
"inference_base_url": state.get("inference_base_url"),
|
||||
"agent_key": state.get("agent_key"),
|
||||
"agent_key_expires_at": state.get("agent_key_expires_at"),
|
||||
# Carry the mint/refresh timestamps into the pool so
|
||||
# freshness-sensitive consumers (self-heal hooks, pool
|
||||
# pruning by age) can distinguish just-minted credentials
|
||||
# from stale ones. Without these, fresh device_code
|
||||
# entries get obtained_at=None and look older than they
|
||||
# are (#15099).
|
||||
"obtained_at": state.get("obtained_at"),
|
||||
"expires_in": state.get("expires_in"),
|
||||
"agent_key_id": state.get("agent_key_id"),
|
||||
"agent_key_expires_in": state.get("agent_key_expires_in"),
|
||||
"agent_key_reused": state.get("agent_key_reused"),
|
||||
"agent_key_obtained_at": state.get("agent_key_obtained_at"),
|
||||
"tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
|
||||
"label": seeded_label,
|
||||
},
|
||||
@@ -1246,10 +1066,9 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
# env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN). They don't live in
|
||||
# the auth store or credential pool, so we resolve them here.
|
||||
try:
|
||||
from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
|
||||
from hermes_cli.copilot_auth import resolve_copilot_token
|
||||
token, source = resolve_copilot_token()
|
||||
if token:
|
||||
api_token = get_copilot_api_token(token)
|
||||
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
@@ -1261,7 +1080,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": api_token,
|
||||
"access_token": token,
|
||||
"base_url": pconfig.inference_base_url if pconfig else "",
|
||||
"label": source,
|
||||
},
|
||||
@@ -1299,48 +1118,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
except Exception as exc:
|
||||
logger.debug("Qwen OAuth token seed failed: %s", exc)
|
||||
|
||||
elif provider == "minimax-oauth":
|
||||
# MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
|
||||
# Seed the pool so `/auth list` reflects the logged-in state and the
|
||||
# standard `hermes auth remove minimax-oauth <N>` flow works.
|
||||
# Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
|
||||
# always refreshes on expiry, so instead read raw state here to avoid
|
||||
# surprise network calls during provider discovery.
|
||||
try:
|
||||
from hermes_cli.auth import get_provider_auth_state
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if state and state.get("access_token"):
|
||||
source_name = "oauth"
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
expires_at_ms = None
|
||||
try:
|
||||
from datetime import datetime as _dt
|
||||
raw = state.get("expires_at", "")
|
||||
if raw:
|
||||
expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
|
||||
except Exception:
|
||||
expires_at_ms = None
|
||||
base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"access_token": state["access_token"],
|
||||
"refresh_token": state.get("refresh_token"),
|
||||
"expires_at_ms": expires_at_ms,
|
||||
"base_url": base_url,
|
||||
"label": state.get("label", "") or label_from_token(
|
||||
state.get("access_token", ""), source_name
|
||||
),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("MiniMax OAuth token seed failed: %s", exc)
|
||||
|
||||
elif provider == "openai-codex":
|
||||
# Respect user suppression — `hermes auth remove openai-codex` marks
|
||||
# the device_code source as suppressed so it won't be re-seeded from
|
||||
@@ -1391,8 +1168,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
def _is_source_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
if provider == "openrouter":
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
|
||||
token = os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||
if token:
|
||||
source = "env:OPENROUTER_API_KEY"
|
||||
if _is_source_suppressed(provider, source):
|
||||
@@ -1418,7 +1194,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
|
||||
env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
|
||||
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
if provider == "anthropic":
|
||||
@@ -1429,8 +1205,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
]
|
||||
|
||||
for env_var in env_vars:
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value(env_var) or "").strip()
|
||||
token = os.getenv(env_var, "").strip()
|
||||
if not token:
|
||||
continue
|
||||
source = f"env:{env_var}"
|
||||
|
||||
@@ -47,6 +47,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
|
||||
@@ -252,19 +253,6 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
|
||||
return result
|
||||
|
||||
|
||||
def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
|
||||
"""MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
|
||||
|
||||
Same pattern as Nous: single-source OAuth state with refresh tokens.
|
||||
Suppression of the `oauth` source ensures the pool reseed path
|
||||
(_seed_from_singletons) doesn't instantly undo the removal.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
|
||||
|
||||
@@ -402,11 +390,6 @@ def _register_all_sources() -> None:
|
||||
remove_fn=_remove_qwen_cli,
|
||||
description="~/.qwen/oauth_creds.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="minimax-oauth", source_id="oauth",
|
||||
remove_fn=_remove_minimax_oauth,
|
||||
description="auth.json providers.minimax-oauth",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="config:",
|
||||
match_fn=lambda src: src.startswith("config:") or src == "model_config",
|
||||
|
||||
-1315
File diff suppressed because it is too large
Load Diff
@@ -42,11 +42,9 @@ class FailoverReason(enum.Enum):
|
||||
# Context / payload
|
||||
context_overflow = "context_overflow" # Context too large — compress, not failover
|
||||
payload_too_large = "payload_too_large" # 413 — compress payload
|
||||
image_too_large = "image_too_large" # Native image part exceeds provider's per-image limit — shrink and retry
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
@@ -54,7 +52,6 @@ class FailoverReason(enum.Enum):
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
|
||||
oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
|
||||
|
||||
# Catch-all
|
||||
unknown = "unknown" # Unclassifiable — retry with backoff
|
||||
@@ -92,7 +89,6 @@ class ClassifiedError:
|
||||
_BILLING_PATTERNS = [
|
||||
"insufficient credits",
|
||||
"insufficient_quota",
|
||||
"insufficient balance",
|
||||
"credit balance",
|
||||
"credits have been exhausted",
|
||||
"top up your credits",
|
||||
@@ -150,20 +146,6 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
|
||||
"error code: 413",
|
||||
]
|
||||
|
||||
# Image-size patterns. Matched against 400 bodies (not 413) because most
|
||||
# providers return a 400 with a specific image-too-big message before the
|
||||
# whole request hits the 413 size limit. Anthropic's wording is the most
|
||||
# important here (hard 5 MB per image, returned as
|
||||
# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
|
||||
_IMAGE_TOO_LARGE_PATTERNS = [
|
||||
"image exceeds", # Anthropic: "image exceeds 5 MB maximum"
|
||||
"image too large", # generic
|
||||
"image_too_large", # error_code variant
|
||||
"image size exceeds", # variant
|
||||
# "request_too_large" on a request known to contain an image → image is
|
||||
# the likely culprit; we still try the shrink path before giving up.
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
@@ -212,29 +194,6 @@ _MODEL_NOT_FOUND_PATTERNS = [
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# OpenRouter aggregator policy-block patterns.
|
||||
#
|
||||
# When a user's OpenRouter account privacy setting (or a per-request
|
||||
# `provider.data_collection: deny` preference) excludes the only endpoint
|
||||
# serving a model, OpenRouter returns 404 with a *specific* message that is
|
||||
# distinct from "model not found":
|
||||
#
|
||||
# "No endpoints available matching your guardrail restrictions and
|
||||
# data policy. Configure: https://openrouter.ai/settings/privacy"
|
||||
#
|
||||
# We classify this as `provider_policy_blocked` rather than
|
||||
# `model_not_found` because:
|
||||
# - The model *exists* — model_not_found is misleading in logs
|
||||
# - Provider fallback won't help: the account-level setting applies to
|
||||
# every call on the same OpenRouter account
|
||||
# - The error body already contains the fix URL, so the user gets
|
||||
# actionable guidance without us rewriting the message
|
||||
_PROVIDER_POLICY_BLOCKED_PATTERNS = [
|
||||
"no endpoints available matching your guardrail",
|
||||
"no endpoints available matching your data policy",
|
||||
"no endpoints found matching your data policy",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
@@ -360,11 +319,6 @@ def classify_api_error(
|
||||
"""
|
||||
status_code = _extract_status_code(error)
|
||||
error_type = type(error).__name__
|
||||
# Copilot/GitHub Models RateLimitError may not set .status_code; force 429
|
||||
# so downstream rate-limit handling (classifier reason, pool rotation,
|
||||
# fallback gating) fires correctly instead of misclassifying as generic.
|
||||
if status_code is None and error_type == "RateLimitError":
|
||||
status_code = 429
|
||||
body = _extract_error_body(error)
|
||||
error_code = _extract_error_code(body)
|
||||
|
||||
@@ -451,25 +405,6 @@ def classify_api_error(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejects the 1M-context beta header.
|
||||
# Observed error body: "The long context beta is not yet available for
|
||||
# this subscription." Returned as HTTP 400 from native Anthropic when
|
||||
# the subscription doesn't include 1M context, even though the request
|
||||
# carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
|
||||
# in run_agent.py rebuilds the Anthropic client with the beta stripped
|
||||
# and retries once. Pattern is narrow enough that it won't collide with
|
||||
# the 429 tier-gate pattern above (different status, different phrase).
|
||||
if (
|
||||
status_code == 400
|
||||
and "long context beta" in error_msg
|
||||
and "not yet available" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.oauth_long_context_beta_forbidden,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
@@ -588,17 +523,6 @@ def _classify_by_status(
|
||||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
# OpenRouter policy-block 404 — distinct from "model not found".
|
||||
# The model exists; the user's account privacy setting excludes the
|
||||
# only endpoint serving it. Falling back to another provider won't
|
||||
# help (same account setting applies). The error body already
|
||||
# contains the fix URL, so just surface it.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -707,15 +631,6 @@ def _classify_400(
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
|
||||
# Must be checked BEFORE context_overflow because messages can trip both
|
||||
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.image_too_large,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Context overflow from 400
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
@@ -725,12 +640,6 @@ def _classify_400(
|
||||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -843,13 +752,6 @@ def _classify_by_message(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Image-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.image_too_large,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Usage-limit patterns need the same disambiguation as 402: some providers
|
||||
# surface "usage limit" errors without an HTTP status code. A transient
|
||||
# signal ("try again", "resets at", …) means it's a periodic quota, not
|
||||
@@ -910,15 +812,6 @@ def _classify_by_message(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Provider policy-block (aggregator-side guardrail) — check before
|
||||
# model_not_found so we don't mis-label as a missing model.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
@@ -30,6 +30,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
@@ -41,6 +42,7 @@ from agent import google_oauth
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
from agent.google_code_assist import (
|
||||
CODE_ASSIST_ENDPOINT,
|
||||
FREE_TIER_ID,
|
||||
CodeAssistError,
|
||||
ProjectContext,
|
||||
resolve_project_context,
|
||||
|
||||
@@ -44,97 +44,6 @@ def is_native_gemini_base_url(base_url: str) -> bool:
|
||||
return not normalized.endswith("/openai")
|
||||
|
||||
|
||||
def probe_gemini_tier(
|
||||
api_key: str,
|
||||
base_url: str = DEFAULT_GEMINI_BASE_URL,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
timeout: float = 10.0,
|
||||
) -> str:
|
||||
"""Probe a Google AI Studio API key and return its tier.
|
||||
|
||||
Returns one of:
|
||||
|
||||
- ``"free"`` -- key is on the free tier (unusable with Hermes)
|
||||
- ``"paid"`` -- key is on a paid tier
|
||||
- ``"unknown"`` -- probe failed; callers should proceed without blocking.
|
||||
"""
|
||||
key = (api_key or "").strip()
|
||||
if not key:
|
||||
return "unknown"
|
||||
|
||||
normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
|
||||
if not normalized_base:
|
||||
normalized_base = DEFAULT_GEMINI_BASE_URL
|
||||
if normalized_base.lower().endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
|
||||
url = f"{normalized_base}/models/{model}:generateContent"
|
||||
payload = {
|
||||
"contents": [{"role": "user", "parts": [{"text": "hi"}]}],
|
||||
"generationConfig": {"maxOutputTokens": 1},
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=timeout) as client:
|
||||
resp = client.post(
|
||||
url,
|
||||
params={"key": key},
|
||||
json=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("probe_gemini_tier: network error: %s", exc)
|
||||
return "unknown"
|
||||
|
||||
headers_lower = {k.lower(): v for k, v in resp.headers.items()}
|
||||
rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
|
||||
if rpd_header:
|
||||
try:
|
||||
rpd_val = int(rpd_header)
|
||||
except (TypeError, ValueError):
|
||||
rpd_val = None
|
||||
# Published free-tier daily caps (Dec 2025):
|
||||
# gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
|
||||
# Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
|
||||
if rpd_val is not None and rpd_val <= 1000:
|
||||
return "free"
|
||||
if rpd_val is not None and rpd_val > 1000:
|
||||
return "paid"
|
||||
|
||||
if resp.status_code == 429:
|
||||
body_text = ""
|
||||
try:
|
||||
body_text = resp.text or ""
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if "free_tier" in body_text.lower():
|
||||
return "free"
|
||||
return "paid"
|
||||
|
||||
if 200 <= resp.status_code < 300:
|
||||
return "paid"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_free_tier_quota_error(error_message: str) -> bool:
|
||||
"""Return True when a Gemini 429 message indicates free-tier exhaustion."""
|
||||
if not error_message:
|
||||
return False
|
||||
return "free_tier" in error_message.lower()
|
||||
|
||||
|
||||
_FREE_TIER_GUIDANCE = (
|
||||
"\n\nYour Google API key is on the free tier (<= 250 requests/day for "
|
||||
"gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
|
||||
"so the free tier is exhausted in a handful of messages and cannot sustain "
|
||||
"an agent session. Enable billing on your Google Cloud project and "
|
||||
"regenerate the key in a billing-enabled project: "
|
||||
"https://aistudio.google.com/apikey"
|
||||
)
|
||||
|
||||
|
||||
class GeminiAPIError(Exception):
|
||||
"""Error shape compatible with Hermes retry/error classification."""
|
||||
|
||||
@@ -741,12 +650,6 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
|
||||
else:
|
||||
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
# Free-tier quota exhaustion -> append actionable guidance so users who
|
||||
# bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
|
||||
# that the free tier cannot sustain an agent session.
|
||||
if status == 429 and is_free_tier_quota_error(err_message or body_text):
|
||||
message = message + _FREE_TIER_GUIDANCE
|
||||
|
||||
return GeminiAPIError(
|
||||
message,
|
||||
code=code,
|
||||
@@ -801,13 +704,6 @@ class GeminiNativeClient:
|
||||
http_client: Optional[httpx.Client] = None,
|
||||
**_: Any,
|
||||
) -> None:
|
||||
if not (api_key or "").strip():
|
||||
raise RuntimeError(
|
||||
"Gemini native client requires an API key, but none was provided. "
|
||||
"Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
|
||||
"(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
|
||||
"to configure the Google provider."
|
||||
)
|
||||
self.api_key = api_key
|
||||
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
|
||||
if normalized_base.endswith("/openai"):
|
||||
|
||||
+1
-15
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
|
||||
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
|
||||
@@ -73,20 +73,6 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
|
||||
]
|
||||
continue
|
||||
cleaned[key] = value
|
||||
|
||||
# Gemini's Schema validator requires every ``enum`` entry to be a string,
|
||||
# even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
|
||||
# OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
|
||||
# ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
|
||||
# so we only drop the ``enum`` when it would collide with Gemini's rule.
|
||||
# Keeping ``type: integer`` plus the human-readable description gives the
|
||||
# model enough guidance; the tool handler still validates the value.
|
||||
enum_val = cleaned.get("enum")
|
||||
type_val = cleaned.get("type")
|
||||
if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
|
||||
if any(not isinstance(item, str) for item in enum_val):
|
||||
cleaned.pop("enum", None)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
@@ -49,13 +49,14 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import socket
|
||||
import stat
|
||||
import threading
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
@@ -97,7 +98,6 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"
|
||||
|
||||
# Regex patterns for fallback scraping from an installed gemini-cli.
|
||||
import re as _re
|
||||
from utils import atomic_replace
|
||||
_CLIENT_ID_PATTERN = _re.compile(
|
||||
r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
|
||||
)
|
||||
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
|
||||
fh.flush()
|
||||
os.fsync(fh.fileno())
|
||||
os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
atomic_replace(tmp_path, path)
|
||||
os.replace(tmp_path, path)
|
||||
finally:
|
||||
try:
|
||||
if tmp_path.exists():
|
||||
|
||||
@@ -1,236 +0,0 @@
|
||||
"""Routing helpers for inbound user-attached images.
|
||||
|
||||
Two modes:
|
||||
|
||||
native — attach images as OpenAI-style ``image_url`` content parts on the
|
||||
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
|
||||
OpenAI chat.completions) already translate these into their
|
||||
vendor-specific multimodal formats.
|
||||
|
||||
text — run ``vision_analyze`` on each image up-front and prepend the
|
||||
description to the user's text. The model never sees the pixels;
|
||||
it only sees a lossy text summary. This is the pre-existing
|
||||
behaviour and still the right choice for non-vision models.
|
||||
|
||||
The decision is made once per message turn by :func:`decide_image_input_mode`.
|
||||
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
|
||||
| ``text``, default ``auto``) and the active model's capability metadata.
|
||||
|
||||
In ``auto`` mode:
|
||||
- If the user has explicitly configured ``auxiliary.vision.provider``
|
||||
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
|
||||
regardless of the main model — they've opted in to a specific vision
|
||||
backend for a reason (cost, quality, local-only, etc.).
|
||||
- Otherwise, if the active model reports ``supports_vision=True`` in its
|
||||
models.dev metadata, we attach natively.
|
||||
- Otherwise (non-vision model, no explicit override), we fall back to text.
|
||||
|
||||
This keeps ``vision_analyze`` surfaced as a tool in every session — skills
|
||||
and agent flows that chain it (browser screenshots, deeper inspection of
|
||||
URL-referenced images, style-gating loops) keep working. The routing only
|
||||
affects *how user-attached images on the current turn* are presented to the
|
||||
main model.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
return "auto"
|
||||
val = raw.strip().lower()
|
||||
if val in _VALID_MODES:
|
||||
return val
|
||||
return "auto"
|
||||
|
||||
|
||||
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
|
||||
"""True when the user configured a specific auxiliary vision backend.
|
||||
|
||||
An explicit override means the user *wants* the text pipeline (they're
|
||||
paying for a dedicated vision model), so we don't silently bypass it.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return False
|
||||
aux = cfg.get("auxiliary") or {}
|
||||
if not isinstance(aux, dict):
|
||||
return False
|
||||
vision = aux.get("vision") or {}
|
||||
if not isinstance(vision, dict):
|
||||
return False
|
||||
|
||||
provider = str(vision.get("provider") or "").strip().lower()
|
||||
model = str(vision.get("model") or "").strip()
|
||||
base_url = str(vision.get("base_url") or "").strip()
|
||||
|
||||
# "auto" / "" / blank = not explicit
|
||||
if provider in ("", "auto") and not model and not base_url:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown."""
|
||||
if not provider or not model:
|
||||
return None
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
caps = get_model_capabilities(provider, model)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
|
||||
return None
|
||||
if caps is None:
|
||||
return None
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
|
||||
def decide_image_input_mode(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""Return ``"native"`` or ``"text"`` for the given turn.
|
||||
|
||||
Args:
|
||||
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
|
||||
model: active model slug as it would be sent to the provider.
|
||||
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
|
||||
"""
|
||||
mode_cfg = "auto"
|
||||
if isinstance(cfg, dict):
|
||||
agent_cfg = cfg.get("agent") or {}
|
||||
if isinstance(agent_cfg, dict):
|
||||
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
|
||||
|
||||
if mode_cfg == "native":
|
||||
return "native"
|
||||
if mode_cfg == "text":
|
||||
return "text"
|
||||
|
||||
# auto
|
||||
if _explicit_aux_vision_override(cfg):
|
||||
return "text"
|
||||
|
||||
supports = _lookup_supports_vision(provider, model)
|
||||
if supports is True:
|
||||
return "native"
|
||||
return "text"
|
||||
|
||||
|
||||
# Image size handling is REACTIVE rather than proactive: we attempt native
|
||||
# attachment at full size regardless of provider, and rely on
|
||||
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
|
||||
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
|
||||
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
|
||||
#
|
||||
# Why reactive: our knowledge of provider ceilings is partial and evolving
|
||||
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
|
||||
# A proactive per-provider table would be stale the moment a provider raises
|
||||
# or lowers its limit, and silently degrading quality for users on providers
|
||||
# that would have accepted the full image is the worse failure mode.
|
||||
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
|
||||
# it fires, which is cheaper than permanent quality loss.
|
||||
|
||||
|
||||
def _guess_mime(path: Path) -> str:
|
||||
mime, _ = mimetypes.guess_type(str(path))
|
||||
if mime and mime.startswith("image/"):
|
||||
return mime
|
||||
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
|
||||
# the suffix looks imagey.
|
||||
suffix = path.suffix.lower()
|
||||
return {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
}.get(suffix, "image/jpeg")
|
||||
|
||||
|
||||
def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
"""Encode a local image as a base64 data URL at its native size.
|
||||
|
||||
Size limits are NOT enforced here — the agent retry loop
|
||||
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
|
||||
provider's first rejection. Keeping this simple means providers that
|
||||
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
|
||||
quality tax just because one other provider is stricter.
|
||||
|
||||
Returns None only if the file can't be read (missing, permission
|
||||
denied, etc.); the caller reports those paths in ``skipped``.
|
||||
"""
|
||||
try:
|
||||
raw = path.read_bytes()
|
||||
except Exception as exc:
|
||||
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
||||
return None
|
||||
mime = _guess_mime(path)
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
|
||||
def build_native_content_parts(
|
||||
user_text: str,
|
||||
image_paths: List[str],
|
||||
) -> Tuple[List[Dict[str, Any]], List[str]]:
|
||||
"""Build an OpenAI-style ``content`` list for a user turn.
|
||||
|
||||
Shape:
|
||||
[{"type": "text", "text": "..."},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
|
||||
...]
|
||||
|
||||
Images are attached at their native size. If a provider rejects the
|
||||
request because an image is too large (e.g. Anthropic's 5 MB per-image
|
||||
ceiling), the agent's retry loop transparently shrinks and retries
|
||||
once — see ``run_agent._try_shrink_image_parts_in_messages``.
|
||||
|
||||
Returns (content_parts, skipped_paths). Skipped paths are files that
|
||||
couldn't be read from disk.
|
||||
"""
|
||||
parts: List[Dict[str, Any]] = []
|
||||
skipped: List[str] = []
|
||||
|
||||
text = (user_text or "").strip()
|
||||
if text:
|
||||
parts.append({"type": "text", "text": text})
|
||||
|
||||
for raw_path in image_paths:
|
||||
p = Path(raw_path)
|
||||
if not p.exists() or not p.is_file():
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
data_url = _file_to_data_url(p)
|
||||
if not data_url:
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": data_url},
|
||||
})
|
||||
|
||||
# If the text was empty, add a neutral prompt so the turn isn't just images.
|
||||
if not text and any(p.get("type") == "image_url" for p in parts):
|
||||
parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
|
||||
|
||||
return parts, skipped
|
||||
|
||||
|
||||
__all__ = [
|
||||
"decide_image_input_mode",
|
||||
"build_native_content_parts",
|
||||
]
|
||||
@@ -1,48 +0,0 @@
|
||||
"""LM Studio reasoning-effort resolution shared by the chat-completions
|
||||
transport and run_agent's iteration-limit summary path.
|
||||
|
||||
LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
|
||||
``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
|
||||
graduated models). We map the user's ``reasoning_config`` onto LM Studio's
|
||||
OpenAI-compatible vocabulary, then clamp against the model's allowed set so
|
||||
the server doesn't 400 on an unsupported effort.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
# LM Studio accepts these top-level reasoning_effort values via its
|
||||
# OpenAI-compatible chat.completions endpoint.
|
||||
_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
|
||||
|
||||
# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
|
||||
# Map them onto the OpenAI-compatible request vocabulary.
|
||||
_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
|
||||
|
||||
|
||||
def resolve_lmstudio_effort(
|
||||
reasoning_config: Optional[dict],
|
||||
allowed_options: Optional[List[str]],
|
||||
) -> Optional[str]:
|
||||
"""Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
|
||||
|
||||
``None`` means "omit the field": the user picked a level the model can't
|
||||
honor, so let LM Studio fall back to the model's declared default rather
|
||||
than silently substituting a different effort. When ``allowed_options`` is
|
||||
falsy (probe failed), skip clamping and send the resolved effort anyway.
|
||||
"""
|
||||
effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
effort = "none"
|
||||
else:
|
||||
raw = (reasoning_config.get("effort") or "").strip().lower()
|
||||
raw = _LM_EFFORT_ALIASES.get(raw, raw)
|
||||
if raw in _LM_VALID_EFFORTS:
|
||||
effort = raw
|
||||
if allowed_options:
|
||||
allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
|
||||
if effort not in allowed:
|
||||
return None
|
||||
return effort
|
||||
+8
-192
@@ -28,9 +28,9 @@ Usage in run_agent.py:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
@@ -62,124 +62,15 @@ def sanitize_context(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
class StreamingContextScrubber:
|
||||
"""Stateful scrubber for streaming text that may contain split memory-context spans.
|
||||
|
||||
The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
|
||||
a ``<memory-context>`` opened in one delta and closed in a later delta
|
||||
leaks its payload to the UI because the non-greedy block regex needs
|
||||
both tags in one string. This scrubber runs a small state machine
|
||||
across deltas, holding back partial-tag tails and discarding
|
||||
everything inside a span (including the system-note line).
|
||||
|
||||
Usage::
|
||||
|
||||
scrubber = StreamingContextScrubber()
|
||||
for delta in stream:
|
||||
visible = scrubber.feed(delta)
|
||||
if visible:
|
||||
emit(visible)
|
||||
trailing = scrubber.flush() # at end of stream
|
||||
if trailing:
|
||||
emit(trailing)
|
||||
|
||||
The scrubber is re-entrant per agent instance. Callers building new
|
||||
top-level responses (new turn) should create a fresh scrubber or call
|
||||
``reset()``.
|
||||
"""
|
||||
|
||||
_OPEN_TAG = "<memory-context>"
|
||||
_CLOSE_TAG = "</memory-context>"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._in_span: bool = False
|
||||
self._buf: str = ""
|
||||
|
||||
def reset(self) -> None:
|
||||
self._in_span = False
|
||||
self._buf = ""
|
||||
|
||||
def feed(self, text: str) -> str:
|
||||
"""Return the visible portion of ``text`` after scrubbing.
|
||||
|
||||
Any trailing fragment that could be the start of an open/close tag
|
||||
is held back in the internal buffer and surfaced on the next
|
||||
``feed()`` call or discarded/emitted by ``flush()``.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
buf = self._buf + text
|
||||
self._buf = ""
|
||||
out: list[str] = []
|
||||
|
||||
while buf:
|
||||
if self._in_span:
|
||||
idx = buf.lower().find(self._CLOSE_TAG)
|
||||
if idx == -1:
|
||||
# Hold back a potential partial close tag; drop the rest
|
||||
held = self._max_partial_suffix(buf, self._CLOSE_TAG)
|
||||
self._buf = buf[-held:] if held else ""
|
||||
return "".join(out)
|
||||
# Found close — skip span content + tag, continue
|
||||
buf = buf[idx + len(self._CLOSE_TAG):]
|
||||
self._in_span = False
|
||||
else:
|
||||
idx = buf.lower().find(self._OPEN_TAG)
|
||||
if idx == -1:
|
||||
# No open tag — hold back a potential partial open tag
|
||||
held = self._max_partial_suffix(buf, self._OPEN_TAG)
|
||||
if held:
|
||||
out.append(buf[:-held])
|
||||
self._buf = buf[-held:]
|
||||
else:
|
||||
out.append(buf)
|
||||
return "".join(out)
|
||||
# Emit text before the tag, enter span
|
||||
if idx > 0:
|
||||
out.append(buf[:idx])
|
||||
buf = buf[idx + len(self._OPEN_TAG):]
|
||||
self._in_span = True
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def flush(self) -> str:
|
||||
"""Emit any held-back buffer at end-of-stream.
|
||||
|
||||
If we're still inside an unterminated span the remaining content is
|
||||
discarded (safer: leaking partial memory context is worse than a
|
||||
truncated answer). Otherwise the held-back partial-tag tail is
|
||||
emitted verbatim (it turned out not to be a real tag).
|
||||
"""
|
||||
if self._in_span:
|
||||
self._buf = ""
|
||||
self._in_span = False
|
||||
return ""
|
||||
tail = self._buf
|
||||
self._buf = ""
|
||||
return tail
|
||||
|
||||
@staticmethod
|
||||
def _max_partial_suffix(buf: str, tag: str) -> int:
|
||||
"""Return the length of the longest buf-suffix that is a tag-prefix.
|
||||
|
||||
Case-insensitive. Returns 0 if no suffix could start the tag.
|
||||
"""
|
||||
tag_lower = tag.lower()
|
||||
buf_lower = buf.lower()
|
||||
max_check = min(len(buf_lower), len(tag_lower) - 1)
|
||||
for i in range(max_check, 0, -1):
|
||||
if tag_lower.startswith(buf_lower[-i:]):
|
||||
return i
|
||||
return 0
|
||||
|
||||
|
||||
def build_memory_context_block(raw_context: str) -> str:
|
||||
"""Wrap prefetched memory in a fenced block with system note."""
|
||||
"""Wrap prefetched memory in a fenced block with system note.
|
||||
|
||||
The fence prevents the model from treating recalled context as user
|
||||
discourse. Injected at API-call time only — never persisted.
|
||||
"""
|
||||
if not raw_context or not raw_context.strip():
|
||||
return ""
|
||||
clean = sanitize_context(raw_context)
|
||||
if clean != raw_context:
|
||||
logger.warning("memory provider returned pre-wrapped context; stripped")
|
||||
return (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, "
|
||||
@@ -402,41 +293,6 @@ class MemoryManager:
|
||||
provider.name, e,
|
||||
)
|
||||
|
||||
def on_session_switch(
|
||||
self,
|
||||
new_session_id: str,
|
||||
*,
|
||||
parent_session_id: str = "",
|
||||
reset: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Notify all providers that the agent's session_id has rotated.
|
||||
|
||||
Fires on ``/resume``, ``/branch``, ``/reset``, ``/new``, and
|
||||
context compression — any path that reassigns
|
||||
``AIAgent.session_id`` without tearing the provider down.
|
||||
|
||||
Providers keep running; they only need to refresh cached
|
||||
per-session state so subsequent writes land in the correct
|
||||
session's record. See ``MemoryProvider.on_session_switch`` for
|
||||
the full contract.
|
||||
"""
|
||||
if not new_session_id:
|
||||
return
|
||||
for provider in self._providers:
|
||||
try:
|
||||
provider.on_session_switch(
|
||||
new_session_id,
|
||||
parent_session_id=parent_session_id,
|
||||
reset=reset,
|
||||
**kwargs,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_session_switch failed: %s",
|
||||
provider.name, e,
|
||||
)
|
||||
|
||||
def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
|
||||
"""Notify all providers before context compression.
|
||||
|
||||
@@ -456,39 +312,7 @@ class MemoryManager:
|
||||
)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
@staticmethod
|
||||
def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
|
||||
"""Return how to pass metadata to a provider's memory-write hook."""
|
||||
try:
|
||||
signature = inspect.signature(provider.on_memory_write)
|
||||
except (TypeError, ValueError):
|
||||
return "keyword"
|
||||
|
||||
params = list(signature.parameters.values())
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
|
||||
return "keyword"
|
||||
if "metadata" in signature.parameters:
|
||||
return "keyword"
|
||||
|
||||
accepted = [
|
||||
p for p in params
|
||||
if p.kind in (
|
||||
inspect.Parameter.POSITIONAL_ONLY,
|
||||
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
||||
inspect.Parameter.KEYWORD_ONLY,
|
||||
)
|
||||
]
|
||||
if len(accepted) >= 4:
|
||||
return "positional"
|
||||
return "legacy"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Notify external providers when the built-in memory tool writes.
|
||||
|
||||
Skips the builtin provider itself (it's the source of the write).
|
||||
@@ -497,15 +321,7 @@ class MemoryManager:
|
||||
if provider.name == "builtin":
|
||||
continue
|
||||
try:
|
||||
metadata_mode = self._provider_memory_write_metadata_mode(provider)
|
||||
if metadata_mode == "keyword":
|
||||
provider.on_memory_write(
|
||||
action, target, content, metadata=dict(metadata or {})
|
||||
)
|
||||
elif metadata_mode == "positional":
|
||||
provider.on_memory_write(action, target, content, dict(metadata or {}))
|
||||
else:
|
||||
provider.on_memory_write(action, target, content)
|
||||
provider.on_memory_write(action, target, content)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_memory_write failed: %s",
|
||||
|
||||
@@ -25,9 +25,8 @@ Lifecycle (called by MemoryManager, wired in run_agent.py):
|
||||
Optional hooks (override to opt in):
|
||||
on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
|
||||
on_session_end(messages) — end-of-session extraction
|
||||
on_session_switch(new_session_id, **kwargs) — mid-process session_id rotation
|
||||
on_pre_compress(messages) -> str — extract before context compression
|
||||
on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
|
||||
on_memory_write(action, target, content) — mirror built-in memory writes
|
||||
on_delegation(task, result, **kwargs) — parent-side observation of subagent work
|
||||
"""
|
||||
|
||||
@@ -35,7 +34,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -161,45 +160,6 @@ class MemoryProvider(ABC):
|
||||
(CLI exit, /reset, gateway session expiry).
|
||||
"""
|
||||
|
||||
def on_session_switch(
|
||||
self,
|
||||
new_session_id: str,
|
||||
*,
|
||||
parent_session_id: str = "",
|
||||
reset: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Called when the agent switches session_id mid-process.
|
||||
|
||||
Fires on ``/resume``, ``/branch``, ``/reset``, ``/new`` (CLI), the
|
||||
gateway equivalents, and context compression — any path that
|
||||
reassigns ``AIAgent.session_id`` without tearing the provider down.
|
||||
|
||||
Providers that cache per-session state in ``initialize()``
|
||||
(``_session_id``, ``_document_id``, accumulated turn buffers,
|
||||
counters) should update or reset that state here so subsequent
|
||||
writes land in the correct session's record.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
new_session_id:
|
||||
The session_id the agent just switched to.
|
||||
parent_session_id:
|
||||
The previous session_id, if meaningful — set for ``/branch``
|
||||
(fork lineage), context compression (continuation lineage),
|
||||
and ``/resume`` (the session we're leaving). Empty string
|
||||
when no lineage applies.
|
||||
reset:
|
||||
``True`` when this is a genuinely new conversation, not a
|
||||
resumption of an existing one. Fired by ``/reset`` / ``/new``.
|
||||
Providers should flush accumulated per-session buffers
|
||||
(``_session_turns``, ``_turn_counter``, etc.) when this is
|
||||
set. ``False`` for ``/resume`` / ``/branch`` / compression
|
||||
where the logical conversation continues under the new id.
|
||||
|
||||
Default is no-op for backward compatibility.
|
||||
"""
|
||||
|
||||
def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
|
||||
"""Called before context compression discards old messages.
|
||||
|
||||
@@ -260,21 +220,12 @@ class MemoryProvider(ABC):
|
||||
should all have ``env_var`` set and this method stays no-op).
|
||||
"""
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Called when the built-in memory tool writes an entry.
|
||||
|
||||
action: 'add', 'replace', or 'remove'
|
||||
target: 'memory' or 'user'
|
||||
content: the entry content
|
||||
metadata: structured provenance for the write, when available. Common
|
||||
keys include ``write_origin``, ``execution_context``, ``session_id``,
|
||||
``parent_session_id``, ``platform``, and ``tool_name``.
|
||||
|
||||
Use to mirror built-in memory writes to your backend.
|
||||
"""
|
||||
|
||||
+57
-309
@@ -6,7 +6,6 @@ and run_agent.py for pre-flight context checks.
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
@@ -22,37 +21,16 @@ from hermes_constants import OPENROUTER_MODELS_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_requests_verify() -> bool | str:
|
||||
"""Resolve SSL verify setting for `requests` calls from env vars.
|
||||
|
||||
The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
|
||||
by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
|
||||
and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
|
||||
that a single env var can cover both `requests` and `httpx` callsites
|
||||
inside the same process.
|
||||
|
||||
Returns either a filesystem path to a CA bundle, or True to defer to
|
||||
the requests default (certifi).
|
||||
"""
|
||||
for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"):
|
||||
val = os.getenv(env_var)
|
||||
if val and os.path.isfile(val):
|
||||
return val
|
||||
return True
|
||||
|
||||
# Provider names that can appear as a "provider:" prefix before a model ID.
|
||||
# Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b")
|
||||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"tencent-tokenhub",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
@@ -61,9 +39,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"ollama",
|
||||
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"tencent", "tokenhub", "tencent-cloud", "tencentmaas",
|
||||
"arcee-ai", "arceeai",
|
||||
"gmi-cloud", "gmicloud",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
"nvidia", "nim", "nvidia-nim", "nemotron",
|
||||
"qwen-portal",
|
||||
@@ -110,11 +86,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
|
||||
_ENDPOINT_MODEL_CACHE_TTL = 300
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start at 256K (covers GPT-5.x, many current large-context models) and
|
||||
# step down on context-length errors until one works. Tier[0] is also the
|
||||
# default fallback when no detection method succeeds.
|
||||
# We start at 128K (a safe default for most modern models) and step down
|
||||
# on context-length errors until one works.
|
||||
CONTEXT_PROBE_TIERS = [
|
||||
256_000,
|
||||
128_000,
|
||||
64_000,
|
||||
32_000,
|
||||
@@ -149,11 +123,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
|
||||
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
|
||||
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
|
||||
# This hardcoded value is only reached when every probe misses.
|
||||
"gpt-5.5": 1050000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -169,17 +138,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
# DeepSeek
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
@@ -210,8 +169,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
|
||||
# Kimi
|
||||
"kimi": 262144,
|
||||
# Tencent — Hy3 Preview (Hunyuan) with 256K context window
|
||||
"hy3-preview": 256000,
|
||||
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
|
||||
"nemotron": 131072,
|
||||
# Arcee
|
||||
@@ -226,12 +183,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"moonshotai/Kimi-K2.6": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 262144,
|
||||
"mimo-v2-pro": 1048576,
|
||||
"mimo-v2.5-pro": 1048576,
|
||||
"mimo-v2.5": 1048576,
|
||||
"mimo-v2-omni": 262144,
|
||||
"mimo-v2-flash": 262144,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 256000,
|
||||
"mimo-v2-pro": 1000000,
|
||||
"mimo-v2-omni": 256000,
|
||||
"mimo-v2-flash": 256000,
|
||||
"mimo-v2.5-pro": 1000000,
|
||||
"mimo-v2.5": 1000000,
|
||||
"zai-org/GLM-5": 202752,
|
||||
}
|
||||
|
||||
@@ -313,8 +270,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"integrate.api.nvidia.com": "nvidia",
|
||||
"api.xiaomimimo.com": "xiaomi",
|
||||
"xiaomimimo.com": "xiaomi",
|
||||
"api.gmi-serving.com": "gmi",
|
||||
"tokenhub.tencentmaas.com": "tencent-tokenhub",
|
||||
"ollama.com": "ollama-cloud",
|
||||
}
|
||||
|
||||
@@ -536,7 +491,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
|
||||
return _model_metadata_cache
|
||||
|
||||
try:
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
@@ -603,7 +558,6 @@ def fetch_endpoint_model_metadata(
|
||||
server_url.rstrip("/") + "/api/v1/models",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
verify=_resolve_requests_verify(),
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
@@ -625,6 +579,8 @@ def fetch_endpoint_model_metadata(
|
||||
if isinstance(ctx, int) and ctx > 0:
|
||||
context_length = ctx
|
||||
break
|
||||
if context_length is None:
|
||||
context_length = _extract_context_length(model)
|
||||
if context_length is not None:
|
||||
entry["context_length"] = context_length
|
||||
|
||||
@@ -650,7 +606,7 @@ def fetch_endpoint_model_metadata(
|
||||
for candidate in candidates:
|
||||
url = candidate.rstrip("/") + "/models"
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
cache: Dict[str, Dict[str, Any]] = {}
|
||||
@@ -681,10 +637,9 @@ def fetch_endpoint_model_metadata(
|
||||
try:
|
||||
# Try /v1/props first (current llama.cpp); fall back to /props for older builds
|
||||
base = candidate.rstrip("/").replace("/v1", "")
|
||||
_verify = _resolve_requests_verify()
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify)
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
|
||||
if not props_resp.ok:
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify)
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5)
|
||||
if props_resp.ok:
|
||||
props = props_resp.json()
|
||||
gen_settings = props.get("default_generation_settings", {})
|
||||
@@ -708,29 +663,6 @@ def fetch_endpoint_model_metadata(
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_endpoint_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
api_key: str = "",
|
||||
) -> Optional[int]:
|
||||
"""Resolve context length from an endpoint's live ``/models`` metadata."""
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
return None
|
||||
|
||||
|
||||
def _get_context_cache_path() -> Path:
|
||||
"""Return path to the persistent context length cache file."""
|
||||
from hermes_constants import get_hermes_home
|
||||
@@ -779,22 +711,6 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
|
||||
return cache.get(key)
|
||||
|
||||
|
||||
def _invalidate_cached_context_length(model: str, base_url: str) -> None:
|
||||
"""Drop a stale cache entry so it gets re-resolved on the next lookup."""
|
||||
key = f"{model}@{base_url}"
|
||||
cache = _load_context_cache()
|
||||
if key not in cache:
|
||||
return
|
||||
del cache[key]
|
||||
path = _get_context_cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w") as f:
|
||||
yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to invalidate context length cache entry %s: %s", key, e)
|
||||
|
||||
|
||||
def get_next_probe_tier(current_length: int) -> Optional[int]:
|
||||
"""Return the next lower probe tier, or None if already at minimum."""
|
||||
for tier in CONTEXT_PROBE_TIERS:
|
||||
@@ -1014,7 +930,10 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
||||
ctx = cfg.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
break
|
||||
# Fall back to max_context_length (theoretical model max)
|
||||
ctx = m.get("max_context_length") or m.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
|
||||
# LM Studio / vLLM / llama.cpp: try /v1/models/{model}
|
||||
resp = client.get(f"{server_url}/v1/models/{model}")
|
||||
@@ -1069,7 +988,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
}
|
||||
resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
|
||||
resp = requests.get(url, headers=headers, timeout=10)
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
data = resp.json()
|
||||
@@ -1083,116 +1002,6 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
||||
return None
|
||||
|
||||
|
||||
# Known ChatGPT Codex OAuth context windows (observed via live
|
||||
# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
|
||||
# `context_window` values, which are what Codex actually enforces — the
|
||||
# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
|
||||
# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
|
||||
#
|
||||
# Used as a fallback when the live probe fails (no token, network error).
|
||||
# Longest keys first so substring match picks the most specific entry.
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
|
||||
"gpt-5.1-codex-max": 272_000,
|
||||
"gpt-5.1-codex-mini": 272_000,
|
||||
"gpt-5.3-codex": 272_000,
|
||||
"gpt-5.2-codex": 272_000,
|
||||
"gpt-5.4-mini": 272_000,
|
||||
"gpt-5.5": 272_000,
|
||||
"gpt-5.4": 272_000,
|
||||
"gpt-5.2": 272_000,
|
||||
"gpt-5": 272_000,
|
||||
}
|
||||
|
||||
|
||||
_codex_oauth_context_cache: Dict[str, int] = {}
|
||||
_codex_oauth_context_cache_time: float = 0.0
|
||||
_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
|
||||
"""Probe the ChatGPT Codex /models endpoint for per-slug context windows.
|
||||
|
||||
Codex OAuth imposes its own context limits that differ from the direct
|
||||
OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
|
||||
`context_window` field in each model entry is the authoritative source.
|
||||
|
||||
Returns a ``{slug: context_window}`` dict. Empty on failure.
|
||||
"""
|
||||
global _codex_oauth_context_cache, _codex_oauth_context_cache_time
|
||||
now = time.time()
|
||||
if (
|
||||
_codex_oauth_context_cache
|
||||
and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
|
||||
):
|
||||
return _codex_oauth_context_cache
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=10,
|
||||
verify=_resolve_requests_verify(),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.debug(
|
||||
"Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
|
||||
resp.status_code,
|
||||
)
|
||||
return {}
|
||||
data = resp.json()
|
||||
except Exception as exc:
|
||||
logger.debug("Codex /models probe failed: %s", exc)
|
||||
return {}
|
||||
|
||||
entries = data.get("models", []) if isinstance(data, dict) else []
|
||||
result: Dict[str, int] = {}
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
ctx = item.get("context_window")
|
||||
if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
|
||||
result[slug.strip()] = ctx
|
||||
|
||||
if result:
|
||||
_codex_oauth_context_cache = result
|
||||
_codex_oauth_context_cache_time = now
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_codex_oauth_context_length(
|
||||
model: str, access_token: str = ""
|
||||
) -> Optional[int]:
|
||||
"""Resolve a Codex OAuth model's real context window.
|
||||
|
||||
Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
|
||||
have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
|
||||
"""
|
||||
model_bare = _strip_provider_prefix(model).strip()
|
||||
if not model_bare:
|
||||
return None
|
||||
|
||||
if access_token:
|
||||
live = _fetch_codex_oauth_context_lengths(access_token)
|
||||
if model_bare in live:
|
||||
return live[model_bare]
|
||||
# Case-insensitive match in case casing drifts
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in live.items():
|
||||
if slug.lower() == model_lower:
|
||||
return ctx
|
||||
|
||||
# Fallback: longest-key-first substring match over hardcoded defaults.
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in sorted(
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
|
||||
):
|
||||
if slug in model_lower:
|
||||
return ctx
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_nous_context_length(model: str) -> Optional[int]:
|
||||
"""Resolve Nous Portal model context length via OpenRouter metadata.
|
||||
|
||||
@@ -1232,14 +1041,12 @@ def get_model_context_length(
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
provider: str = "",
|
||||
custom_providers: list | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
Resolution order:
|
||||
0. Explicit config override (model.context_length or custom_providers per-model)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
1b. AWS Bedrock static table (must precede custom-endpoint probe)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. Local server query (for local endpoints)
|
||||
4. Anthropic /v1/models API (API-key users only, not OAuth)
|
||||
@@ -1247,76 +1054,22 @@ def get_model_context_length(
|
||||
6. Nous suffix-match via OpenRouter cache
|
||||
7. models.dev registry lookup (provider-aware)
|
||||
8. Thin hardcoded defaults (broad family patterns)
|
||||
9. Default fallback (256K)
|
||||
9. Default fallback (128K)
|
||||
"""
|
||||
# 0. Explicit config override — user knows best
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 0b. custom_providers per-model override — check before any probe.
|
||||
# This closes the gap where /model switch and display paths used to fall
|
||||
# back to 128K despite the user having a per-model context_length set.
|
||||
# See #15779.
|
||||
if custom_providers and base_url and model:
|
||||
try:
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
cp_ctx = get_custom_provider_context_length(
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if cp_ctx:
|
||||
return cp_ctx
|
||||
except Exception:
|
||||
pass # fall through to probing
|
||||
|
||||
# Normalise provider-prefixed model names (e.g. "local:model-name" →
|
||||
# "model-name") so cache lookups and server queries use the bare ID that
|
||||
# local servers actually know about. Ollama "model:tag" colons are preserved.
|
||||
model = _strip_provider_prefix(model)
|
||||
|
||||
# 1. Check persistent cache (model+provider)
|
||||
# LM Studio is excluded — its loaded context length is transient (the
|
||||
# user can reload the model with a different context_length at any time
|
||||
# via /api/v1/models/load), so a stale cached value would mask reloads.
|
||||
if base_url and provider != "lmstudio":
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
if cached is not None:
|
||||
# Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
|
||||
# resolved gpt-5.x to the direct-API value (e.g. 1.05M) via
|
||||
# models.dev and persisted it. Codex OAuth caps at 272K for every
|
||||
# slug, so any cached Codex entry at or above 400K is a leftover
|
||||
# from the old resolution path. Drop it and fall through to the
|
||||
# live /models probe in step 5 below.
|
||||
if provider == "openai-codex" and cached >= 400_000:
|
||||
logger.info(
|
||||
"Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); "
|
||||
"re-resolving via live /models probe",
|
||||
model, base_url, f"{cached:,}",
|
||||
)
|
||||
_invalidate_cached_context_length(model, base_url)
|
||||
else:
|
||||
return cached
|
||||
|
||||
# 1b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels API doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py that reflects
|
||||
# AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
|
||||
# Anthropic API). This must run BEFORE the custom-endpoint probe at
|
||||
# step 2 — bedrock-runtime.<region>.amazonaws.com is not in
|
||||
# _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
|
||||
# fail the /models probe (Bedrock doesn't expose that shape), and fall
|
||||
# back to the 128K default before reaching the original step 4b branch.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
return cached
|
||||
|
||||
# 2. Active endpoint metadata for truly custom/unknown endpoints.
|
||||
# Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
|
||||
@@ -1324,16 +1077,28 @@ def get_model_context_length(
|
||||
# returns 128k) instead of the model's full context (400k). models.dev
|
||||
# has the correct per-provider values and is checked at step 5+.
|
||||
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
|
||||
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if context_length is not None:
|
||||
return context_length
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
# Single-model servers: if only one model is loaded, use it
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
# Fuzzy match: substring in either direction
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
if not _is_known_provider_base_url(base_url):
|
||||
# 3. Try querying local server directly
|
||||
if is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
if local_ctx and local_ctx > 0:
|
||||
if provider != "lmstudio":
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
logger.info(
|
||||
"Could not detect context length for model %r at %s — "
|
||||
@@ -1351,7 +1116,19 @@ def get_model_context_length(
|
||||
if ctx:
|
||||
return ctx
|
||||
|
||||
# 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
|
||||
# 4b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
|
||||
# 5. Provider-aware lookups (before generic OpenRouter cache)
|
||||
# These are provider-specific and take priority over the generic OR cache,
|
||||
@@ -1365,38 +1142,10 @@ def get_model_context_length(
|
||||
if inferred:
|
||||
effective_provider = inferred
|
||||
|
||||
# 5a. Copilot live /models API — max_prompt_tokens from the user's account.
|
||||
# This catches account-specific models (e.g. claude-opus-4.6-1m) that
|
||||
# don't exist in models.dev. For models that ARE in models.dev, this
|
||||
# returns the provider-enforced limit which is what users can actually use.
|
||||
if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
|
||||
try:
|
||||
from hermes_cli.models import get_copilot_model_context
|
||||
ctx = get_copilot_model_context(model, api_key=api_key)
|
||||
if ctx:
|
||||
return ctx
|
||||
except Exception:
|
||||
pass # Fall through to models.dev
|
||||
|
||||
if effective_provider == "nous":
|
||||
ctx = _resolve_nous_context_length(model)
|
||||
if ctx:
|
||||
return ctx
|
||||
if effective_provider == "openai-codex":
|
||||
# Codex OAuth enforces lower context limits than the direct OpenAI
|
||||
# API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
|
||||
# on Codex). Authoritative source is Codex's own /models endpoint.
|
||||
codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
|
||||
if codex_ctx:
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider == "gmi" and base_url:
|
||||
# GMI exposes authoritative context_length via /models, but it is not
|
||||
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
|
||||
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if ctx is not None:
|
||||
return ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
@@ -1406,7 +1155,7 @@ def get_model_context_length(
|
||||
# 6. OpenRouter live API metadata (provider-unaware fallback)
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
|
||||
return metadata[model].get("context_length", 128000)
|
||||
|
||||
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
||||
# Only check `default_model in model` (is the key a substring of the input).
|
||||
@@ -1423,11 +1172,10 @@ def get_model_context_length(
|
||||
if base_url and is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
if local_ctx and local_ctx > 0:
|
||||
if provider != "lmstudio":
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
|
||||
# 10. Default fallback — 256K
|
||||
# 10. Default fallback — 128K
|
||||
return DEFAULT_FALLBACK_CONTEXT
|
||||
|
||||
|
||||
|
||||
@@ -149,7 +149,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"stepfun": "stepfun",
|
||||
"kimi-coding-cn": "kimi-for-coding",
|
||||
"minimax": "minimax",
|
||||
"minimax-oauth": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
"deepseek": "deepseek",
|
||||
"alibaba": "alibaba",
|
||||
@@ -419,9 +418,6 @@ def list_provider_models(provider: str) -> List[str]:
|
||||
|
||||
Returns an empty list if the provider is unknown or has no data.
|
||||
"""
|
||||
from hermes_cli.models import normalize_provider
|
||||
provider = normalize_provider(provider) or provider
|
||||
|
||||
models = _get_provider_models(provider)
|
||||
if models is None:
|
||||
return []
|
||||
|
||||
@@ -1,212 +0,0 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
|
||||
|
||||
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
|
||||
tool calling. Requests that violate it fail with HTTP 400:
|
||||
|
||||
tools.function.parameters is not a valid moonshot flavored json schema,
|
||||
details: <...>
|
||||
|
||||
Known rejection modes documented at
|
||||
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
|
||||
and MoonshotAI/kimi-cli#1595:
|
||||
|
||||
1. Every property schema must carry a ``type``. Standard JSON Schema allows
|
||||
type to be omitted (the value is then unconstrained); Moonshot refuses.
|
||||
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
|
||||
the parent. Presence of both causes "type should be defined in anyOf
|
||||
items instead of the parent schema".
|
||||
3. ``$ref`` nodes may not carry sibling keywords. Moonshot expands the
|
||||
reference before validation and then rejects the node if sibling keys
|
||||
like ``description`` remain on the same node as ``$ref``. Strip every
|
||||
sibling from ``$ref`` nodes so only ``{"$ref": "..."}`` survives.
|
||||
(Ported from anomalyco/opencode#24730.)
|
||||
4. ``items`` may not be a tuple-style array (``items: [schemaA, schemaB]``
|
||||
for positional element schemas). Moonshot's schema engine requires a
|
||||
single object schema applied to every array element. Collapse tuple
|
||||
``items`` to the first element schema (or ``{}`` if the tuple is empty).
|
||||
(Ported from anomalyco/opencode#24730.)
|
||||
|
||||
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
|
||||
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
|
||||
applies at MCP registration time for all providers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Keys whose values are maps of name → schema (not schemas themselves).
|
||||
# When we recurse, we walk the values of these maps as schemas, but we do
|
||||
# NOT apply the missing-type repair to the map itself.
|
||||
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
|
||||
|
||||
# Keys whose values are lists of schemas.
|
||||
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
|
||||
|
||||
# Keys whose values are a single nested schema.
|
||||
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
|
||||
|
||||
|
||||
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
"""Recursively apply Moonshot repairs to a schema node.
|
||||
|
||||
``is_schema=True`` means this dict is a JSON Schema node and gets the
|
||||
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
|
||||
it's a container map (e.g. the value of ``properties``) and we only
|
||||
recurse into its values.
|
||||
"""
|
||||
if isinstance(node, list):
|
||||
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
|
||||
# every element is itself a schema.
|
||||
return [_repair_schema(item, is_schema=True) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
# Walk the dict, deciding per-key whether recursion is into a schema
|
||||
# node, a container map, or a scalar.
|
||||
repaired: Dict[str, Any] = {}
|
||||
for key, value in node.items():
|
||||
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
|
||||
# Map of name → schema. Don't treat the map itself as a schema
|
||||
# (it has no type / properties of its own), but each value is.
|
||||
repaired[key] = {
|
||||
sub_key: _repair_schema(sub_val, is_schema=True)
|
||||
for sub_key, sub_val in value.items()
|
||||
}
|
||||
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
|
||||
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
|
||||
elif key == "items" and isinstance(value, list):
|
||||
# Rule 4: tuple-style ``items`` arrays (positional element
|
||||
# schemas) are not accepted by Moonshot. Collapse to the
|
||||
# first element schema if present, else to ``{}``. This
|
||||
# matches opencode's behaviour for moonshotai / kimi models.
|
||||
first = value[0] if value else {}
|
||||
if isinstance(first, dict):
|
||||
repaired[key] = _repair_schema(first, is_schema=True)
|
||||
else:
|
||||
repaired[key] = first
|
||||
elif key in _SCHEMA_NODE_KEYS:
|
||||
# items / not / additionalProperties: single nested schema.
|
||||
# additionalProperties can also be a bool — leave those alone.
|
||||
if isinstance(value, dict):
|
||||
repaired[key] = _repair_schema(value, is_schema=True)
|
||||
else:
|
||||
repaired[key] = value
|
||||
else:
|
||||
# Scalars (description, title, format, enum values, etc.) pass through.
|
||||
repaired[key] = value
|
||||
|
||||
if not is_schema:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
return repaired
|
||||
|
||||
# Rule 3: $ref nodes must not have sibling keywords. Strip everything
|
||||
# except $ref itself so Moonshot's validator (which expands the ref
|
||||
# before checking) doesn't reject the node for redundant keys like
|
||||
# ``description`` / ``type`` / ``default`` appearing alongside $ref.
|
||||
if "$ref" in repaired:
|
||||
return {"$ref": repaired["$ref"]}
|
||||
return _fill_missing_type(repaired)
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Infer a reasonable ``type`` if this schema node has none."""
|
||||
if "type" in node and node["type"] not in (None, ""):
|
||||
return node
|
||||
|
||||
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
|
||||
# → type of first enum value, else fall back to ``string`` (safest scalar).
|
||||
if "properties" in node or "required" in node or "additionalProperties" in node:
|
||||
inferred = "object"
|
||||
elif "items" in node or "prefixItems" in node:
|
||||
inferred = "array"
|
||||
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
|
||||
sample = node["enum"][0]
|
||||
if isinstance(sample, bool):
|
||||
inferred = "boolean"
|
||||
elif isinstance(sample, int):
|
||||
inferred = "integer"
|
||||
elif isinstance(sample, float):
|
||||
inferred = "number"
|
||||
else:
|
||||
inferred = "string"
|
||||
else:
|
||||
inferred = "string"
|
||||
|
||||
return {**node, "type": inferred}
|
||||
|
||||
|
||||
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a Moonshot-compatible object schema.
|
||||
|
||||
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
|
||||
applied. Input is not mutated.
|
||||
"""
|
||||
if not isinstance(parameters, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
|
||||
if not isinstance(repaired, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
# Top-level must be an object schema
|
||||
if repaired.get("type") != "object":
|
||||
repaired["type"] = "object"
|
||||
if "properties" not in repaired:
|
||||
repaired["properties"] = {}
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
|
||||
if not tools:
|
||||
return tools
|
||||
|
||||
sanitized: List[Dict[str, Any]] = []
|
||||
any_change = False
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
fn = tool.get("function")
|
||||
if not isinstance(fn, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
params = fn.get("parameters")
|
||||
repaired = sanitize_moonshot_tool_parameters(params)
|
||||
if repaired is not params:
|
||||
any_change = True
|
||||
new_fn = {**fn, "parameters": repaired}
|
||||
sanitized.append({**tool, "function": new_fn})
|
||||
else:
|
||||
sanitized.append(tool)
|
||||
|
||||
return sanitized if any_change else tools
|
||||
|
||||
|
||||
def is_moonshot_model(model: str | None) -> bool:
|
||||
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
|
||||
|
||||
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
|
||||
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
|
||||
Detection by model name covers Nous / OpenRouter / other aggregators that
|
||||
route to Moonshot's inference, where the base URL is the aggregator's, not
|
||||
``api.moonshot.ai``.
|
||||
"""
|
||||
if not model:
|
||||
return False
|
||||
bare = model.strip().lower()
|
||||
# Last path segment (covers aggregator-prefixed slugs)
|
||||
tail = bare.rsplit("/", 1)[-1]
|
||||
if tail.startswith("kimi-") or tail == "kimi":
|
||||
return True
|
||||
# Vendor-prefixed forms commonly used on aggregators
|
||||
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
|
||||
return True
|
||||
return False
|
||||
+1
-144
@@ -18,7 +18,6 @@ import os
|
||||
import tempfile
|
||||
import time
|
||||
from typing import Any, Mapping, Optional
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -119,7 +118,7 @@ def record_nous_rate_limit(
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(state, f)
|
||||
atomic_replace(tmp_path, path)
|
||||
os.replace(tmp_path, path)
|
||||
except Exception:
|
||||
# Clean up temp file on failure
|
||||
try:
|
||||
@@ -181,145 +180,3 @@ def format_remaining(seconds: float) -> str:
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -1,193 +0,0 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
if mode == "steer":
|
||||
return (
|
||||
"💡 First-time tip — I steered your message into the current run; "
|
||||
"it will arrive after the next tool call instead of interrupting. "
|
||||
"Send `/busy interrupt` or `/busy queue` to change this, or "
|
||||
"`/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"`/busy steer` to inject them mid-run without interrupting, or "
|
||||
"`/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead, "
|
||||
"or /busy steer to inject mid-run. This tip only shows once."
|
||||
)
|
||||
if mode == "steer":
|
||||
return (
|
||||
"(tip) Your message was steered into the current run; it arrives "
|
||||
"after the next tool call. Use /busy interrupt or /busy queue to "
|
||||
"change this. This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead, "
|
||||
"or /busy steer to inject mid-run. This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def openclaw_residue_hint_cli() -> str:
|
||||
"""Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
|
||||
|
||||
Points users at ``hermes claw migrate`` (non-destructive port of config,
|
||||
memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
|
||||
follow-up step for users who have already migrated and want to archive
|
||||
the old directory — with a warning that archiving breaks OpenClaw.
|
||||
"""
|
||||
return (
|
||||
"A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
|
||||
"To port your config, memory, and skills over to Hermes, run "
|
||||
"`hermes claw migrate`.\n"
|
||||
"If you've already migrated and want to archive the old directory, "
|
||||
"run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
|
||||
"OpenClaw will stop working after this).\n"
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
|
||||
"""Return True if an OpenClaw workspace directory is present in ``$HOME``.
|
||||
|
||||
Pure filesystem check — no side effects. ``home`` override exists for tests.
|
||||
"""
|
||||
base = home or Path.home()
|
||||
try:
|
||||
return (base / ".openclaw").is_dir()
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"OPENCLAW_RESIDUE_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"openclaw_residue_hint_cli",
|
||||
"detect_openclaw_residue",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
@@ -141,12 +141,6 @@ DEFAULT_AGENT_IDENTITY = (
|
||||
"Be targeted and efficient in your exploration and investigations."
|
||||
)
|
||||
|
||||
HERMES_AGENT_HELP_GUIDANCE = (
|
||||
"If the user asks about configuring, setting up, or using Hermes Agent "
|
||||
"itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
|
||||
"before answering. Docs: https://hermes-agent.nousresearch.com/docs"
|
||||
)
|
||||
|
||||
MEMORY_GUIDANCE = (
|
||||
"You have persistent memory across sessions. Save durable facts using the memory "
|
||||
"tool: user preferences, environment details, tool quirks, and stable conventions. "
|
||||
@@ -182,64 +176,6 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# You are a Kanban worker\n"
|
||||
"You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
@@ -368,10 +304,6 @@ PLATFORM_HINTS = {
|
||||
"Standard markdown is automatically converted to Telegram format. "
|
||||
"Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
|
||||
"`inline code`, ```code blocks```, [links](url), and ## headers. "
|
||||
"Telegram has NO table syntax — prefer bullet lists or labeled "
|
||||
"key: value pairs over pipe tables (any tables you do emit are "
|
||||
"auto-rewritten into row-group bullets, which you can produce "
|
||||
"directly for cleaner output). "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images "
|
||||
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
|
||||
@@ -438,32 +370,6 @@ PLATFORM_HINTS = {
|
||||
"MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
|
||||
".heic) appear as photos and other files arrive as attachments."
|
||||
),
|
||||
"mattermost": (
|
||||
"You are in a Mattermost workspace communicating with your user. "
|
||||
"Mattermost renders standard Markdown — headings, bold, italic, code "
|
||||
"blocks, and tables all work. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are uploaded as photo "
|
||||
"attachments, audio and video as file attachments. "
|
||||
"Image URLs in markdown format  are rendered as inline previews automatically."
|
||||
),
|
||||
"matrix": (
|
||||
"You are in a Matrix room communicating with your user. "
|
||||
"Matrix renders Markdown — bold, italic, code blocks, and links work; "
|
||||
"the adapter converts your Markdown to HTML for rich display. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
|
||||
"audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
|
||||
"and other files as downloadable attachments."
|
||||
),
|
||||
"feishu": (
|
||||
"You are in a Feishu (Lark) workspace communicating with your user. "
|
||||
"Feishu renders Markdown in messages — bold, italic, code blocks, and "
|
||||
"links are supported. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
|
||||
"inline, audio files as voice messages, and other files as attachments."
|
||||
),
|
||||
"weixin": (
|
||||
"You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
|
||||
"it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
|
||||
@@ -490,29 +396,6 @@ PLATFORM_HINTS = {
|
||||
"your response. Images are sent as native photos, and other files arrive as downloadable "
|
||||
"documents."
|
||||
),
|
||||
"yuanbao": (
|
||||
"You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
|
||||
"Markdown formatting is supported (code blocks, tables, bold/italic). "
|
||||
"You CAN send media files natively — to deliver a file to the user, include "
|
||||
"MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
|
||||
"Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
|
||||
"and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
|
||||
"(max 50 MB). You can also include image URLs in markdown format  and "
|
||||
"they will be downloaded and sent as native photos. "
|
||||
"Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
|
||||
"whenever a file delivery is appropriate.\n\n"
|
||||
"Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
|
||||
"When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
|
||||
"you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
|
||||
" 1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
|
||||
" '捂脸', '合十') to discover matching sticker_ids.\n"
|
||||
" 2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
|
||||
" TIMFaceElem that renders as a native sticker in the chat.\n"
|
||||
"DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
|
||||
"them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
|
||||
"image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
|
||||
"— when a sticker is the right response, use yb_send_sticker."
|
||||
),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -916,11 +799,6 @@ def build_skills_system_prompt(
|
||||
"Skills also encode the user's preferred approach, conventions, and quality standards "
|
||||
"for tasks like code review, planning, and testing — load them even for tasks you "
|
||||
"already know how to do, because the skill defines how it should be done here.\n"
|
||||
"Whenever the user asks you to configure, set up, install, enable, disable, modify, "
|
||||
"or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
|
||||
"skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
|
||||
"first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
|
||||
"`hermes setup`) so you don't have to guess or invent workarounds.\n"
|
||||
"If a skill has issues, fix it with skill_manage(action='patch').\n"
|
||||
"After difficult/iterative tasks, offer to save as a skill. "
|
||||
"If a skill you loaded was missing steps, had wrong commands, or needed "
|
||||
|
||||
+8
-62
@@ -56,12 +56,8 @@ _SENSITIVE_BODY_KEYS = frozenset({
|
||||
})
|
||||
|
||||
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
|
||||
# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
|
||||
# mid-session. OFF by default — user must opt in via
|
||||
# `security.redact_secrets: true` in config.yaml (bridged to this env var
|
||||
# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
|
||||
# in ~/.hermes/.env.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")
|
||||
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
|
||||
|
||||
# Known API key prefixes -- match the prefix + contiguous token chars
|
||||
_PREFIX_PATTERNS = [
|
||||
@@ -184,59 +180,11 @@ _PREFIX_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
def mask_secret(
|
||||
value: str,
|
||||
*,
|
||||
head: int = 4,
|
||||
tail: int = 4,
|
||||
floor: int = 12,
|
||||
placeholder: str = "***",
|
||||
empty: str = "",
|
||||
) -> str:
|
||||
"""Mask a secret for display, preserving ``head`` and ``tail`` characters.
|
||||
|
||||
Canonical helper for display-time redaction across Hermes — used by
|
||||
``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
|
||||
a secret needs to be shown truncated for debuggability while still
|
||||
keeping the bulk hidden.
|
||||
|
||||
Args:
|
||||
value: The secret to mask. ``None``/empty returns ``empty``.
|
||||
head: Leading characters to preserve. Default 4.
|
||||
tail: Trailing characters to preserve. Default 4.
|
||||
floor: Values shorter than ``head + tail + floor_margin`` are
|
||||
fully masked (returns ``placeholder``). Default 12 —
|
||||
matches the existing config/status/dump convention.
|
||||
placeholder: Value returned for too-short inputs. Default ``"***"``.
|
||||
empty: Value returned when ``value`` is falsy (None, ""). The
|
||||
caller can override this to e.g. ``color("(not set)",
|
||||
Colors.DIM)`` for user-facing display.
|
||||
|
||||
Examples:
|
||||
>>> mask_secret("sk-proj-abcdef1234567890")
|
||||
'sk-p...7890'
|
||||
>>> mask_secret("short") # fully masked
|
||||
'***'
|
||||
>>> mask_secret("") # empty default
|
||||
''
|
||||
>>> mask_secret("", empty="(not set)") # empty override
|
||||
'(not set)'
|
||||
>>> mask_secret("long-token", head=6, tail=4, floor=18)
|
||||
'***'
|
||||
"""
|
||||
if not value:
|
||||
return empty
|
||||
if len(value) < floor:
|
||||
return placeholder
|
||||
return f"{value[:head]}...{value[-tail:]}"
|
||||
|
||||
|
||||
def _mask_token(token: str) -> str:
|
||||
"""Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
|
||||
# Empty input: historically this returned "***" rather than "". Preserve.
|
||||
if not token:
|
||||
"""Mask a token, preserving prefix for long tokens."""
|
||||
if len(token) < 18:
|
||||
return "***"
|
||||
return mask_secret(token, head=6, tail=4, floor=18)
|
||||
return f"{token[:6]}...{token[-4:]}"
|
||||
|
||||
|
||||
def _redact_query_string(query: str) -> str:
|
||||
@@ -305,13 +253,11 @@ def _redact_form_body(text: str) -> str:
|
||||
return _redact_query_string(text.strip())
|
||||
|
||||
|
||||
def redact_sensitive_text(text: str, *, force: bool = False) -> str:
|
||||
def redact_sensitive_text(text: str) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
Safe to call on any string -- non-matching text passes through unchanged.
|
||||
Disabled by default — enable via security.redact_secrets: true in config.yaml.
|
||||
Set force=True for safety boundaries that must never return raw secrets
|
||||
regardless of the user's global logging redaction preference.
|
||||
Disabled when security.redact_secrets is false in config.yaml.
|
||||
"""
|
||||
if text is None:
|
||||
return None
|
||||
@@ -319,7 +265,7 @@ def redact_sensitive_text(text: str, *, force: bool = False) -> str:
|
||||
text = str(text)
|
||||
if not text:
|
||||
return text
|
||||
if not (force or _REDACT_ENABLED):
|
||||
if not _REDACT_ENABLED:
|
||||
return text
|
||||
|
||||
# Known prefixes (sk-, ghp_, etc.)
|
||||
|
||||
@@ -76,7 +76,6 @@ except ImportError: # pragma: no cover
|
||||
fcntl = None # type: ignore[assignment]
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -569,7 +568,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
|
||||
try:
|
||||
with os.fdopen(fd, "w") as fh:
|
||||
fh.write(json.dumps(data, indent=2, sort_keys=True))
|
||||
atomic_replace(tmp_path, p)
|
||||
os.replace(tmp_path, p)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -755,11 +754,7 @@ def _resolve_effective_accept(
|
||||
if env in ("1", "true", "yes", "on"):
|
||||
return True
|
||||
cfg_val = cfg.get("hooks_auto_accept", False)
|
||||
if isinstance(cfg_val, bool):
|
||||
return cfg_val
|
||||
if isinstance(cfg_val, str):
|
||||
return cfg_val.strip().lower() in ("1", "true", "yes", "on")
|
||||
return False
|
||||
return bool(cfg_val)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
+138
-96
@@ -1,29 +1,154 @@
|
||||
"""Shared slash command helpers for skills.
|
||||
"""Shared slash command helpers for skills and built-in prompt-style modes.
|
||||
|
||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||
can invoke skills via /skill-name commands.
|
||||
can invoke skills via /skill-name commands and prompt-only built-ins like
|
||||
/plan.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_preprocessing import (
|
||||
expand_inline_shell as _expand_inline_shell,
|
||||
load_skills_config as _load_skills_config,
|
||||
substitute_template_vars as _substitute_template_vars,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only — no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def _load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available —
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return f"[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def _expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return _run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
user_instruction: str = "",
|
||||
*,
|
||||
now: datetime | None = None,
|
||||
) -> Path:
|
||||
"""Return the default workspace-relative markdown path for a /plan invocation.
|
||||
|
||||
Relative paths are intentional: file tools are task/backend-aware and resolve
|
||||
them against the active working directory for local, docker, ssh, modal,
|
||||
daytona, and similar terminal backends. That keeps the plan with the active
|
||||
workspace instead of the Hermes host's global home directory.
|
||||
"""
|
||||
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
|
||||
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
|
||||
if slug:
|
||||
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
|
||||
slug = slug or "conversation-plan"
|
||||
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
|
||||
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
|
||||
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
@@ -42,9 +167,7 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
|
||||
else:
|
||||
normalized = raw_identifier.lstrip("/")
|
||||
|
||||
loaded_skill = json.loads(
|
||||
skill_view(normalized, task_id=task_id, preprocess=False)
|
||||
)
|
||||
loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@@ -222,7 +345,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
|
||||
from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
disabled = _get_disabled_skill_names()
|
||||
seen_names: set = set()
|
||||
|
||||
@@ -233,8 +356,8 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
dirs_to_scan.extend(get_external_skills_dirs())
|
||||
|
||||
for scan_dir in dirs_to_scan:
|
||||
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
|
||||
for skill_md in scan_dir.rglob("SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
|
||||
continue
|
||||
try:
|
||||
content = skill_md.read_text(encoding='utf-8')
|
||||
@@ -284,71 +407,6 @@ def get_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
return _skill_commands
|
||||
|
||||
|
||||
def reload_skills() -> Dict[str, Any]:
|
||||
"""Re-scan the skills directory and return a diff of what changed.
|
||||
|
||||
Rescans ``~/.hermes/skills/`` and any ``skills.external_dirs`` so the
|
||||
slash-command map (``agent.skill_commands._skill_commands``) reflects
|
||||
skills added or removed on disk.
|
||||
|
||||
This does NOT invalidate the skills system-prompt cache. Skills are
|
||||
called by name via ``/skill-name``, ``skills_list``, or ``skill_view``
|
||||
— they don't need to be in the system prompt for the model to use them.
|
||||
Keeping the prompt cache intact preserves prefix caching across the
|
||||
reload, so a user invoking ``/reload-skills`` pays no cache-reset cost.
|
||||
|
||||
Returns:
|
||||
Dict with keys::
|
||||
|
||||
{
|
||||
"added": [{"name": str, "description": str}, ...],
|
||||
"removed": [{"name": str, "description": str}, ...],
|
||||
"unchanged": [skill names present before and after],
|
||||
"total": total skill count after rescan,
|
||||
"commands": total /slash-skill count after rescan,
|
||||
}
|
||||
|
||||
``description`` is the skill's full SKILL.md frontmatter
|
||||
``description:`` field — the same string the system prompt renders
|
||||
as `` - name: description`` for pre-existing skills.
|
||||
"""
|
||||
# Snapshot pre-reload state (name -> description) from the current
|
||||
# slash-command cache. Using dicts lets the post-rescan diff carry
|
||||
# descriptions for newly-visible or just-removed skills without a
|
||||
# second disk walk.
|
||||
def _snapshot(cmds: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
|
||||
out: Dict[str, str] = {}
|
||||
for slash_key, info in cmds.items():
|
||||
bare = slash_key.lstrip("/")
|
||||
out[bare] = (info or {}).get("description") or ""
|
||||
return out
|
||||
|
||||
before = _snapshot(_skill_commands)
|
||||
|
||||
# Rescan the skills dir. ``scan_skill_commands`` resets
|
||||
# ``_skill_commands = {}`` internally and repopulates it.
|
||||
new_commands = scan_skill_commands()
|
||||
|
||||
after = _snapshot(new_commands)
|
||||
|
||||
added_names = sorted(set(after) - set(before))
|
||||
removed_names = sorted(set(before) - set(after))
|
||||
unchanged = sorted(set(after) & set(before))
|
||||
|
||||
added = [{"name": n, "description": after[n]} for n in added_names]
|
||||
# For removed skills, use the description we had cached pre-rescan
|
||||
# (the skill file is gone so we can't re-read it).
|
||||
removed = [{"name": n, "description": before[n]} for n in removed_names]
|
||||
|
||||
return {
|
||||
"added": added,
|
||||
"removed": removed,
|
||||
"unchanged": unchanged,
|
||||
"total": len(after),
|
||||
"commands": len(new_commands),
|
||||
}
|
||||
|
||||
|
||||
def resolve_skill_command_key(command: str) -> Optional[str]:
|
||||
"""Resolve a user-typed /command to its canonical skill_cmds key.
|
||||
|
||||
@@ -393,16 +451,8 @@ def build_skill_invocation_message(
|
||||
return f"[Failed to load skill: {skill_info['name']}]"
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
|
||||
# Track active usage for Curator lifecycle management (#17782)
|
||||
try:
|
||||
from tools.skill_usage import bump_use
|
||||
bump_use(skill_name)
|
||||
except Exception:
|
||||
pass # Non-critical — skill invocation proceeds regardless
|
||||
|
||||
activation_note = (
|
||||
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
"you to follow its instructions. The full skill content is loaded below.]"
|
||||
)
|
||||
return _build_skill_message(
|
||||
@@ -440,16 +490,8 @@ def build_preloaded_skills_prompt(
|
||||
continue
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
|
||||
# Track active usage for Curator lifecycle management (#17782)
|
||||
try:
|
||||
from tools.skill_usage import bump_use
|
||||
bump_use(skill_name)
|
||||
except Exception:
|
||||
pass # Non-critical
|
||||
|
||||
activation_note = (
|
||||
f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
|
||||
f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
|
||||
"preloaded. Treat its instructions as active guidance for the duration of this "
|
||||
"session unless the user overrides them.]"
|
||||
)
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
"""Shared SKILL.md preprocessing helpers."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only -- no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available --
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return "[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def preprocess_skill_content(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None = None,
|
||||
skills_cfg: dict | None = None,
|
||||
) -> str:
|
||||
"""Apply configured SKILL.md template and inline-shell preprocessing."""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
|
||||
if cfg.get("template_vars", True):
|
||||
content = substitute_template_vars(content, skill_dir, session_id)
|
||||
if cfg.get("inline_shell", False):
|
||||
timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = expand_inline_shell(content, skill_dir, timeout)
|
||||
return content
|
||||
+3
-11
@@ -24,7 +24,7 @@ PLATFORM_MAP = {
|
||||
"windows": "win32",
|
||||
}
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -200,9 +200,6 @@ def get_external_skills_dirs() -> List[Path]:
|
||||
if not isinstance(raw_dirs, list):
|
||||
return []
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
hermes_home = get_hermes_home()
|
||||
local_skills = get_skills_dir().resolve()
|
||||
seen: Set[Path] = set()
|
||||
result: List[Path] = []
|
||||
@@ -213,12 +210,7 @@ def get_external_skills_dirs() -> List[Path]:
|
||||
continue
|
||||
# Expand ~ and environment variables
|
||||
expanded = os.path.expanduser(os.path.expandvars(entry))
|
||||
p = Path(expanded)
|
||||
# Resolve relative paths against HERMES_HOME, not cwd
|
||||
if not p.is_absolute():
|
||||
p = (hermes_home / p).resolve()
|
||||
else:
|
||||
p = p.resolve()
|
||||
p = Path(expanded).resolve()
|
||||
if p == local_skills:
|
||||
continue
|
||||
if p in seen:
|
||||
@@ -440,7 +432,7 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
||||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
|
||||
Excludes ``.git``, ``.github``, ``.hub`` directories.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
|
||||
@@ -6,18 +6,12 @@ adds latency to the user-facing reply.
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Callable, Optional
|
||||
from typing import Optional
|
||||
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Callback signature: (task_name, exception) -> None. Used to surface
|
||||
# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
|
||||
# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
|
||||
# become visible instead of piling up as NULL session titles.
|
||||
FailureCallback = Callable[[str, BaseException], None]
|
||||
|
||||
_TITLE_PROMPT = (
|
||||
"Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
|
||||
"following exchange. The title should capture the main topic or intent. "
|
||||
@@ -25,23 +19,11 @@ _TITLE_PROMPT = (
|
||||
)
|
||||
|
||||
|
||||
def generate_title(
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
timeout: float = 30.0,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> Optional[str]:
|
||||
def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
|
||||
"""Generate a session title from the first exchange.
|
||||
|
||||
Uses the main runtime's model when available, falling back to the
|
||||
auxiliary LLM client (cheapest/fastest available model).
|
||||
Uses the auxiliary LLM client (cheapest/fastest available model).
|
||||
Returns the title string or None on failure.
|
||||
|
||||
``failure_callback`` is invoked with ``(task, exception)`` when the
|
||||
auxiliary call raises — the caller typically wires this to
|
||||
``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
|
||||
of silently accumulating untitled sessions.
|
||||
"""
|
||||
# Truncate long messages to keep the request small
|
||||
user_snippet = user_message[:500] if user_message else ""
|
||||
@@ -56,10 +38,9 @@ def generate_title(
|
||||
response = call_llm(
|
||||
task="title_generation",
|
||||
messages=messages,
|
||||
max_tokens=500,
|
||||
max_tokens=30,
|
||||
temperature=0.3,
|
||||
timeout=timeout,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
title = (response.choices[0].message.content or "").strip()
|
||||
# Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
|
||||
@@ -71,15 +52,7 @@ def generate_title(
|
||||
title = title[:77] + "..."
|
||||
return title if title else None
|
||||
except Exception as e:
|
||||
# Log at WARNING so this shows up in agent.log without debug mode.
|
||||
# Full detail at debug level for operators who need the stack.
|
||||
logger.warning("Title generation failed: %s", e)
|
||||
logger.debug("Title generation traceback", exc_info=True)
|
||||
if failure_callback is not None:
|
||||
try:
|
||||
failure_callback("title generation", e)
|
||||
except Exception:
|
||||
logger.debug("Title generation failure_callback raised", exc_info=True)
|
||||
logger.debug("Title generation failed: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
@@ -88,8 +61,6 @@ def auto_title_session(
|
||||
session_id: str,
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> None:
|
||||
"""Generate and set a session title if one doesn't already exist.
|
||||
|
||||
@@ -110,9 +81,7 @@ def auto_title_session(
|
||||
except Exception:
|
||||
return
|
||||
|
||||
title = generate_title(
|
||||
user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
|
||||
)
|
||||
title = generate_title(user_message, assistant_response)
|
||||
if not title:
|
||||
return
|
||||
|
||||
@@ -129,8 +98,6 @@ def maybe_auto_title(
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
conversation_history: list,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> None:
|
||||
"""Fire-and-forget title generation after the first exchange.
|
||||
|
||||
@@ -152,7 +119,6 @@ def maybe_auto_title(
|
||||
thread = threading.Thread(
|
||||
target=auto_title_session,
|
||||
args=(session_db, session_id, user_message, assistant_response),
|
||||
kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
|
||||
daemon=True,
|
||||
name="auto-title",
|
||||
)
|
||||
|
||||
@@ -23,14 +23,9 @@ def get_transport(api_mode: str):
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
# The registry can be partially populated when a specific transport
|
||||
# module was imported directly (for example chat_completions before
|
||||
# codex). Discover on misses, not only when the registry is empty, so
|
||||
# test/order-dependent imports do not make valid api_modes unavailable.
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
|
||||
@@ -58,7 +58,6 @@ class AnthropicTransport(ProviderTransport):
|
||||
context_length: int | None
|
||||
base_url: str | None
|
||||
fast_mode: bool
|
||||
drop_context_1m_beta: bool
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
@@ -74,58 +73,36 @@ class AnthropicTransport(ProviderTransport):
|
||||
context_length=params.get("context_length"),
|
||||
base_url=params.get("base_url"),
|
||||
fast_mode=params.get("fast_mode", False),
|
||||
drop_context_1m_beta=params.get("drop_context_1m_beta", False),
|
||||
)
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize Anthropic response to NormalizedResponse.
|
||||
|
||||
Parses content blocks (text, thinking, tool_use), maps stop_reason
|
||||
to OpenAI finish_reason, and collects reasoning_details in provider_data.
|
||||
Calls the adapter's v1 normalize and maps the (SimpleNamespace, finish_reason)
|
||||
tuple to the shared NormalizedResponse type.
|
||||
"""
|
||||
import json
|
||||
from agent.anthropic_adapter import _to_plain_data
|
||||
from agent.transports.types import ToolCall
|
||||
from agent.anthropic_adapter import normalize_anthropic_response
|
||||
from agent.transports.types import build_tool_call
|
||||
|
||||
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
|
||||
_MCP_PREFIX = "mcp_"
|
||||
assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
|
||||
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
reasoning_details = []
|
||||
tool_calls = []
|
||||
|
||||
for block in response.content:
|
||||
if block.type == "text":
|
||||
text_parts.append(block.text)
|
||||
elif block.type == "thinking":
|
||||
reasoning_parts.append(block.thinking)
|
||||
block_dict = _to_plain_data(block)
|
||||
if isinstance(block_dict, dict):
|
||||
reasoning_details.append(block_dict)
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
|
||||
name = name[len(_MCP_PREFIX):]
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=block.id,
|
||||
name=name,
|
||||
arguments=json.dumps(block.input),
|
||||
)
|
||||
)
|
||||
|
||||
finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
|
||||
tool_calls = None
|
||||
if assistant_msg.tool_calls:
|
||||
tool_calls = [
|
||||
build_tool_call(id=tc.id, name=tc.function.name, arguments=tc.function.arguments)
|
||||
for tc in assistant_msg.tool_calls
|
||||
]
|
||||
|
||||
provider_data = {}
|
||||
if reasoning_details:
|
||||
provider_data["reasoning_details"] = reasoning_details
|
||||
if getattr(assistant_msg, "reasoning_details", None):
|
||||
provider_data["reasoning_details"] = assistant_msg.reasoning_details
|
||||
|
||||
return NormalizedResponse(
|
||||
content="\n".join(text_parts) if text_parts else None,
|
||||
tool_calls=tool_calls or None,
|
||||
content=assistant_msg.content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
|
||||
reasoning=getattr(assistant_msg, "reasoning", None),
|
||||
usage=None,
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
||||
@@ -12,93 +12,11 @@ reasoning configuration, temperature handling, and extra_body assembly.
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.lmstudio_reasoning import resolve_lmstudio_effort
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
|
||||
"""Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
|
||||
if reasoning_config is None or not isinstance(reasoning_config, dict):
|
||||
return None
|
||||
|
||||
normalized_model = (model or "").strip().lower()
|
||||
if normalized_model.startswith("google/"):
|
||||
normalized_model = normalized_model.split("/", 1)[1]
|
||||
|
||||
# ``thinking_config`` is a Gemini-only request parameter. The same
|
||||
# ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
|
||||
# those reject the field with HTTP 400 "Unknown name 'thinking_config':
|
||||
# Cannot find field" — including the polite ``{"includeThoughts": False}``
|
||||
# form. Omit the field entirely on non-Gemini models. (#17426)
|
||||
if not normalized_model.startswith("gemini"):
|
||||
return None
|
||||
|
||||
if reasoning_config.get("enabled") is False:
|
||||
# Gemini can hide thought parts even when internal thinking still
|
||||
# happens; omit thinkingLevel to avoid model-specific validation quirks.
|
||||
return {"includeThoughts": False}
|
||||
|
||||
effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
|
||||
if effort == "none":
|
||||
return {"includeThoughts": False}
|
||||
|
||||
thinking_config: Dict[str, Any] = {"includeThoughts": True}
|
||||
|
||||
# Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
|
||||
# coarse effort levels. ``includeThoughts`` alone is enough to surface
|
||||
# thought parts without risking request validation errors.
|
||||
if normalized_model.startswith("gemini-2.5-"):
|
||||
return thinking_config
|
||||
|
||||
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
|
||||
effort = "medium"
|
||||
|
||||
# Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
|
||||
# is stricter (low/high). Clamp Hermes' wider effort set to what each
|
||||
# family accepts so we never forward an undocumented level verbatim.
|
||||
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
|
||||
if "flash" in normalized_model:
|
||||
if effort in {"minimal", "low"}:
|
||||
thinking_config["thinkingLevel"] = "low"
|
||||
elif effort in {"high", "xhigh"}:
|
||||
thinking_config["thinkingLevel"] = "high"
|
||||
else:
|
||||
thinking_config["thinkingLevel"] = "medium"
|
||||
elif "pro" in normalized_model:
|
||||
thinking_config["thinkingLevel"] = (
|
||||
"high" if effort in {"high", "xhigh"} else "low"
|
||||
)
|
||||
|
||||
return thinking_config
|
||||
|
||||
|
||||
def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
|
||||
"""Convert Gemini thinking config keys to the OpenAI-compat field names."""
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
|
||||
translated: Dict[str, Any] = {}
|
||||
if isinstance(config.get("includeThoughts"), bool):
|
||||
translated["include_thoughts"] = config["includeThoughts"]
|
||||
if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
|
||||
translated["thinking_level"] = config["thinkingLevel"].strip().lower()
|
||||
if isinstance(config.get("thinkingBudget"), (int, float)):
|
||||
translated["thinking_budget"] = int(config["thinkingBudget"])
|
||||
return translated or None
|
||||
|
||||
|
||||
def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
|
||||
normalized = str(base_url or "").strip().rstrip("/").lower()
|
||||
if not normalized:
|
||||
return False
|
||||
if "generativelanguage.googleapis.com" not in normalized:
|
||||
return False
|
||||
return normalized.endswith("/openai")
|
||||
|
||||
|
||||
class ChatCompletionsTransport(ProviderTransport):
|
||||
"""Transport for api_mode='chat_completions'.
|
||||
|
||||
@@ -112,15 +30,15 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` on the
|
||||
message, ``call_id``/``response_item_id`` on tool_calls) that strict
|
||||
chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
if "codex_reasoning_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
@@ -140,7 +58,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
@@ -182,7 +99,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
is_github_models: bool
|
||||
is_nvidia_nim: bool
|
||||
is_kimi: bool
|
||||
is_lmstudio: bool
|
||||
is_custom_provider: bool
|
||||
ollama_num_ctx: int | None
|
||||
# Provider routing
|
||||
@@ -196,7 +112,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# Reasoning
|
||||
supports_reasoning: bool
|
||||
github_reasoning_extra: dict | None
|
||||
lmstudio_reasoning_options: list[str] | None # raw allowed_options from /api/v1/models
|
||||
# Claude on OpenRouter/Nous max output
|
||||
anthropic_max_output: int | None
|
||||
# Extra
|
||||
@@ -257,11 +172,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
|
||||
# etc.) compatible, in addition to direct moonshot.ai endpoints.
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > provider default
|
||||
@@ -271,7 +181,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
anthropic_max_out = params.get("anthropic_max_output")
|
||||
is_nvidia_nim = params.get("is_nvidia_nim", False)
|
||||
is_kimi = params.get("is_kimi", False)
|
||||
is_tokenhub = params.get("is_tokenhub", False)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
@@ -303,41 +212,12 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
_kimi_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _kimi_effort
|
||||
|
||||
# Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
|
||||
if is_tokenhub:
|
||||
_tokenhub_thinking_off = bool(
|
||||
reasoning_config
|
||||
and isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _tokenhub_thinking_off:
|
||||
_tokenhub_effort = "high"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in ("low", "medium", "high"):
|
||||
_tokenhub_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _tokenhub_effort
|
||||
|
||||
# LM Studio: top-level reasoning_effort. Only emit when the model
|
||||
# declares reasoning support via /api/v1/models capabilities (gated
|
||||
# upstream by params["supports_reasoning"]). resolve_lmstudio_effort
|
||||
# is shared with run_agent's summary path so both stay in sync.
|
||||
if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
|
||||
_lm_effort = resolve_lmstudio_effort(
|
||||
reasoning_config,
|
||||
params.get("lmstudio_reasoning_options"),
|
||||
)
|
||||
if _lm_effort is not None:
|
||||
api_kwargs["reasoning_effort"] = _lm_effort
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: Dict[str, Any] = {}
|
||||
|
||||
is_openrouter = params.get("is_openrouter", False)
|
||||
is_nous = params.get("is_nous", False)
|
||||
is_github_models = params.get("is_github_models", False)
|
||||
provider_name = str(params.get("provider_name") or "").strip().lower()
|
||||
base_url = params.get("base_url")
|
||||
|
||||
provider_prefs = params.get("provider_preferences")
|
||||
if provider_prefs and is_openrouter:
|
||||
@@ -353,9 +233,8 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
# Reasoning. LM Studio is handled above via top-level reasoning_effort,
|
||||
# so skip emitting extra_body.reasoning for it.
|
||||
if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
|
||||
# Reasoning
|
||||
if params.get("supports_reasoning", False):
|
||||
if is_github_models:
|
||||
gh_reasoning = params.get("github_reasoning_extra")
|
||||
if gh_reasoning is not None:
|
||||
@@ -391,23 +270,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if is_qwen:
|
||||
extra_body["vl_high_resolution_images"] = True
|
||||
|
||||
if provider_name == "gemini":
|
||||
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if _is_gemini_openai_compat_base_url(base_url):
|
||||
thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
|
||||
if thinking_config:
|
||||
openai_compat_extra = extra_body.get("extra_body", {})
|
||||
google_extra = openai_compat_extra.get("google", {})
|
||||
google_extra["thinking_config"] = thinking_config
|
||||
openai_compat_extra["google"] = google_extra
|
||||
extra_body["extra_body"] = openai_compat_extra
|
||||
elif raw_thinking_config:
|
||||
extra_body["thinking_config"] = raw_thinking_config
|
||||
elif provider_name == "google-gemini-cli":
|
||||
thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if thinking_config:
|
||||
extra_body["thinking_config"] = thinking_config
|
||||
|
||||
# Merge any pre-built extra_body additions
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
@@ -477,13 +339,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# so keep them apart in provider_data rather than merging.
|
||||
reasoning = getattr(msg, "reasoning", None)
|
||||
reasoning_content = getattr(msg, "reasoning_content", None)
|
||||
if reasoning_content is None and hasattr(msg, "model_extra"):
|
||||
model_extra = getattr(msg, "model_extra", None) or {}
|
||||
if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
|
||||
reasoning_content = model_extra["reasoning_content"]
|
||||
|
||||
provider_data: Dict[str, Any] = {}
|
||||
if reasoning_content is not None:
|
||||
if reasoning_content:
|
||||
provider_data["reasoning_content"] = reasoning_content
|
||||
rd = getattr(msg, "reasoning_details", None)
|
||||
if rd:
|
||||
|
||||
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class ResponsesApiTransport(ProviderTransport):
|
||||
@@ -120,24 +120,6 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
if is_codex_backend:
|
||||
prompt_cache_key = kwargs.get("prompt_cache_key")
|
||||
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
||||
if cache_scope_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["session_id"] = cache_scope_id
|
||||
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
@@ -151,6 +133,8 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
"""Normalize Codex Responses API response to NormalizedResponse."""
|
||||
from agent.codex_responses_adapter import (
|
||||
_normalize_codex_response,
|
||||
_extract_responses_message_text,
|
||||
_extract_responses_reasoning_text,
|
||||
)
|
||||
|
||||
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
|
||||
@@ -176,8 +160,6 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
provider_data = {}
|
||||
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
||||
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
||||
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
|
||||
provider_data["codex_message_items"] = msg.codex_message_items
|
||||
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
||||
provider_data["reasoning_details"] = msg.reasoning_details
|
||||
|
||||
|
||||
@@ -37,44 +37,6 @@ class ToolCall:
|
||||
arguments: str # JSON string
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The agent loop reads tc.function.name / tc.function.arguments
|
||||
# throughout run_agent.py (45+ sites). These properties let
|
||||
# NormalizedResponse pass through without the _nr_to_assistant_message
|
||||
# shim, while keeping ToolCall's canonical fields flat.
|
||||
@property
|
||||
def type(self) -> str:
|
||||
return "function"
|
||||
|
||||
@property
|
||||
def function(self) -> "ToolCall":
|
||||
"""Return self so tc.function.name / tc.function.arguments work."""
|
||||
return self
|
||||
|
||||
@property
|
||||
def call_id(self) -> Optional[str]:
|
||||
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
|
||||
return (self.provider_data or {}).get("call_id")
|
||||
|
||||
@property
|
||||
def response_item_id(self) -> Optional[str]:
|
||||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
@property
|
||||
def extra_content(self) -> Optional[Dict[str, Any]]:
|
||||
"""Gemini extra_content (thought_signature) from provider_data.
|
||||
|
||||
Gemini 3 thinking models attach ``extra_content`` with a
|
||||
``thought_signature`` to each tool call. This signature must be
|
||||
replayed on subsequent API calls — without it the API rejects the
|
||||
request with HTTP 400. The chat_completions transport stores this
|
||||
in ``provider_data["extra_content"]``; this property exposes it so
|
||||
``_build_assistant_message`` can ``getattr(tc, "extra_content")``
|
||||
uniformly.
|
||||
"""
|
||||
return (self.provider_data or {}).get("extra_content")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Usage:
|
||||
@@ -97,7 +59,7 @@ class NormalizedResponse:
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
@@ -108,29 +70,6 @@ class NormalizedResponse:
|
||||
usage: Optional[Usage] = None
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The shim _nr_to_assistant_message() mapped these from provider_data.
|
||||
# These properties let NormalizedResponse pass through directly.
|
||||
@property
|
||||
def reasoning_content(self) -> Optional[str]:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_content")
|
||||
|
||||
@property
|
||||
def reasoning_details(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_details")
|
||||
|
||||
@property
|
||||
def codex_reasoning_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
@property
|
||||
def codex_message_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_message_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
|
||||
@@ -359,25 +359,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
||||
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||
pricing_version="bedrock-pricing-2026-04",
|
||||
),
|
||||
# MiniMax
|
||||
(
|
||||
"minimax",
|
||||
"minimax-m2.7",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("0.30"),
|
||||
output_cost_per_million=Decimal("1.20"),
|
||||
source="official_docs_snapshot",
|
||||
pricing_version="minimax-pricing-2026-04",
|
||||
),
|
||||
(
|
||||
"minimax-cn",
|
||||
"minimax-m2.7",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("0.30"),
|
||||
output_cost_per_million=Decimal("1.20"),
|
||||
source="official_docs_snapshot",
|
||||
pricing_version="minimax-pricing-2026-04",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -419,8 +400,6 @@ def resolve_billing_route(
|
||||
return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name == "openai":
|
||||
return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name in {"minimax", "minimax-cn"}:
|
||||
return BillingRoute(provider=provider_name, model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name in {"custom", "local"} or (base and "localhost" in base):
|
||||
return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
|
||||
return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
|
||||
|
||||
+6
-2
@@ -951,9 +951,13 @@ class BatchRunner:
|
||||
root_logger.setLevel(original_level)
|
||||
|
||||
# Aggregate all batch statistics and update checkpoint
|
||||
all_completed_prompts = list(completed_prompts_set)
|
||||
total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
|
||||
|
||||
|
||||
for batch_result in results:
|
||||
# Add newly completed prompts
|
||||
all_completed_prompts.extend(batch_result.get("completed_prompts", []))
|
||||
|
||||
# Aggregate tool stats
|
||||
for tool_name, stats in batch_result.get("tool_stats", {}).items():
|
||||
if tool_name not in total_tool_stats:
|
||||
@@ -973,7 +977,7 @@ class BatchRunner:
|
||||
|
||||
# Save final checkpoint (best-effort; incremental writes already happened)
|
||||
try:
|
||||
checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
|
||||
checkpoint_data["completed_prompts"] = all_completed_prompts
|
||||
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
|
||||
except Exception as ckpt_err:
|
||||
print(f"âš ï¸ Warning: Failed to save final checkpoint: {ckpt_err}")
|
||||
|
||||
+18
-68
@@ -30,13 +30,14 @@ model:
|
||||
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
|
||||
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||
# "lmstudio" - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
|
||||
#
|
||||
# Local servers (LM Studio, Ollama, vLLM, llama.cpp):
|
||||
# "custom" - Any other OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# LM Studio is first-class and uses provider: "lmstudio".
|
||||
# It works with both no-auth and auth-enabled server modes.
|
||||
# "custom" - Any OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# Example for LM Studio:
|
||||
# provider: "lmstudio"
|
||||
# base_url: "http://localhost:1234/v1"
|
||||
# No API key needed — local servers typically ignore auth.
|
||||
#
|
||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||
provider: "auto"
|
||||
@@ -180,11 +181,6 @@ terminal:
|
||||
# lifetime_seconds: 300
|
||||
# docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
# docker_mount_cwd_to_workspace: true # Explicit opt-in: mount your launch cwd into /workspace
|
||||
# # Optional: run the container as your host user's uid:gid so files written
|
||||
# # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
|
||||
# # caps too since no gosu privilege drop is needed. Leave off if your
|
||||
# # chosen docker_image expects to start as root.
|
||||
# docker_run_as_host_user: true
|
||||
# # Optional: explicitly forward selected env vars into Docker.
|
||||
# # These values come from your current shell first, then ~/.hermes/.env.
|
||||
# # Warning: anything forwarded here is visible to commands run in the container.
|
||||
@@ -330,16 +326,6 @@ compression:
|
||||
# To pin a specific model/provider for compression summaries, use the
|
||||
# auxiliary section below (auxiliary.compression.provider / model).
|
||||
|
||||
# =============================================================================
|
||||
# Anthropic prompt caching TTL
|
||||
# =============================================================================
|
||||
# When prompt caching is active (Claude via OpenRouter or native Anthropic),
|
||||
# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
|
||||
# "1h". Other values are ignored and "5m" is used.
|
||||
#
|
||||
prompt_caching:
|
||||
cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
|
||||
|
||||
# =============================================================================
|
||||
# Auxiliary Models (Advanced — Experimental)
|
||||
# =============================================================================
|
||||
@@ -521,13 +507,6 @@ agent:
|
||||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Max app-level retry attempts for API errors (connection drops, provider
|
||||
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
|
||||
# to 1 if you use fallback providers and want fast failover on flaky
|
||||
# primaries (default 3). The OpenAI SDK does its own low-level retries
|
||||
# underneath this wrapper — this is the Hermes-level loop.
|
||||
# api_max_retries: 3
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
@@ -570,7 +549,7 @@ agent:
|
||||
# - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
|
||||
# - A list of individual toolsets to compose your own (see list below)
|
||||
#
|
||||
# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot, teams
|
||||
# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
|
||||
#
|
||||
# Examples:
|
||||
#
|
||||
@@ -600,7 +579,6 @@ agent:
|
||||
# signal: hermes-signal (same as telegram)
|
||||
# homeassistant: hermes-homeassistant (same as telegram)
|
||||
# qqbot: hermes-qqbot (same as telegram)
|
||||
# teams: hermes-teams (same as telegram)
|
||||
#
|
||||
platform_toolsets:
|
||||
cli: [hermes-cli]
|
||||
@@ -611,8 +589,6 @@ platform_toolsets:
|
||||
signal: [hermes-signal]
|
||||
homeassistant: [hermes-homeassistant]
|
||||
qqbot: [hermes-qqbot]
|
||||
yuanbao: [hermes-yuanbao]
|
||||
teams: [hermes-teams]
|
||||
|
||||
# =============================================================================
|
||||
# Gateway Platform Settings
|
||||
@@ -797,16 +773,9 @@ code_execution:
|
||||
# Supports single tasks and batch mode (default 3 parallel, configurable).
|
||||
delegation:
|
||||
max_iterations: 50 # Max tool-calling turns per child (default: 50)
|
||||
# max_concurrent_children: 3 # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
|
||||
# WARNING: values above 10 multiply API cost linearly.
|
||||
# max_spawn_depth: 1 # Delegation tree depth cap (range: 1-3, default: 1 = flat).
|
||||
# Raise to 2 to allow workers to spawn their own subagents.
|
||||
# Requires role="orchestrator" on intermediate agents.
|
||||
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
|
||||
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
|
||||
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
|
||||
# subagent_auto_approve: false # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
|
||||
# or auto-approve "once" (true) instead of blocking on stdin.
|
||||
# The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
|
||||
# Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
|
||||
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
|
||||
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
|
||||
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
|
||||
@@ -831,9 +800,7 @@ delegation:
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
|
||||
# true: Compact single-line banner
|
||||
# false: Full ASCII banner with tool/skill summary (default)
|
||||
# Use compact banner mode
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
@@ -847,19 +814,12 @@ display:
|
||||
# Gateway-only natural mid-turn assistant updates.
|
||||
# When true, completed assistant status messages are sent as separate chat
|
||||
# messages. This is independent of tool_progress and gateway streaming.
|
||||
# true: Send mid-turn assistant updates as separate messages (default)
|
||||
# false: Only send the final response
|
||||
interim_assistant_messages: true
|
||||
|
||||
# What Enter does when Hermes is already busy (CLI and gateway platforms).
|
||||
# What Enter does when Hermes is already busy in the CLI.
|
||||
# interrupt: Interrupt the current run and redirect Hermes (default)
|
||||
# queue: Queue your message for the next turn
|
||||
# steer: Inject your message mid-run via /steer, arriving at the agent
|
||||
# after the next tool call — no interrupt, no role violation.
|
||||
# Falls back to 'queue' if the agent isn't running yet or if
|
||||
# images are attached (steer only carries text).
|
||||
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
|
||||
# Toggle at runtime with /busy <interrupt|queue|steer>.
|
||||
# Ctrl+C always interrupts regardless of this setting.
|
||||
busy_input_mode: interrupt
|
||||
|
||||
# Background process notifications (gateway/messaging only).
|
||||
@@ -875,22 +835,17 @@ display:
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
# true: Ring the terminal bell on each response
|
||||
# false: Silent (default)
|
||||
bell_on_complete: false
|
||||
|
||||
# Show model reasoning/thinking before each response.
|
||||
# When enabled, a dim box shows the model's thought process above the response.
|
||||
# Toggle at runtime with /reasoning show or /reasoning hide.
|
||||
# true: Show the reasoning box
|
||||
# false: Hide reasoning (default)
|
||||
show_reasoning: false
|
||||
|
||||
# Stream tokens to the terminal as they arrive instead of waiting for the
|
||||
# full response. The response box opens on first token and text appears
|
||||
# line-by-line. Tool calls are still captured silently.
|
||||
# true: Stream tokens as they arrive (default)
|
||||
# false: Wait for the full response before rendering
|
||||
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
|
||||
streaming: true
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
@@ -900,15 +855,10 @@ display:
|
||||
# response box label, and branding text. Change at runtime with /skin <name>.
|
||||
#
|
||||
# Built-in skins:
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# daylight — Bright light-mode theme
|
||||
# warm-lightmode — Warm paper-tone light-mode theme
|
||||
# poseidon — Sea-green/teal Olympian theme
|
||||
# sisyphus — Earthy stone-and-moss theme
|
||||
# charizard — Fiery orange dragon theme
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
#
|
||||
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
|
||||
# Schema (all fields optional, missing values inherit from default):
|
||||
@@ -934,7 +884,7 @@ display:
|
||||
# agent_name: "My Agent" # Banner title and branding
|
||||
# welcome: "Welcome message" # Shown at CLI startup
|
||||
# response_label: " ⚔ Agent " # Response box header label
|
||||
# prompt_symbol: "⚔" # Prompt symbol (bare token; renderers add trailing space)
|
||||
# prompt_symbol: "⚔ ❯ " # Prompt symbol
|
||||
# tool_prefix: "╎" # Tool output line prefix (default: ┊)
|
||||
#
|
||||
skin: default
|
||||
|
||||
+7
-115
@@ -16,12 +16,11 @@ import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Optional, Dict, List, Any, Union
|
||||
from typing import Optional, Dict, List, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from hermes_time import now as _hermes_now
|
||||
from utils import atomic_replace
|
||||
|
||||
try:
|
||||
from croniter import croniter
|
||||
@@ -312,22 +311,8 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
|
||||
|
||||
elif schedule["kind"] == "cron":
|
||||
if not HAS_CRONITER:
|
||||
logger.warning(
|
||||
"Cannot compute next run for cron schedule %r: 'croniter' is "
|
||||
"not installed. croniter is a core dependency as of v0.9.x; "
|
||||
"reinstall hermes-agent or run 'pip install croniter' in your "
|
||||
"runtime env.",
|
||||
schedule.get("expr"),
|
||||
)
|
||||
return None
|
||||
# Use last_run_at as the croniter base when available, consistent
|
||||
# with interval jobs. This ensures that after a crash/restart,
|
||||
# the next run is anchored to the actual last execution time
|
||||
# rather than to an arbitrary restart time.
|
||||
base_time = now
|
||||
if last_run_at:
|
||||
base_time = _ensure_aware(datetime.fromisoformat(last_run_at))
|
||||
cron = croniter(schedule["expr"], base_time)
|
||||
cron = croniter(schedule["expr"], now)
|
||||
next_run = cron.get_next(datetime)
|
||||
return next_run.isoformat()
|
||||
|
||||
@@ -376,7 +361,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, JOBS_FILE)
|
||||
os.replace(tmp_path, JOBS_FILE)
|
||||
_secure_file(JOBS_FILE)
|
||||
except BaseException:
|
||||
try:
|
||||
@@ -386,39 +371,6 @@ def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
raise
|
||||
|
||||
|
||||
def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
|
||||
"""Normalize and validate a cron job workdir.
|
||||
|
||||
Rules:
|
||||
- Empty / None → None (feature off, preserves old behaviour).
|
||||
- ``~`` is expanded. Relative paths are rejected — cron jobs run detached
|
||||
from any shell cwd, so relative paths have no stable meaning.
|
||||
- The path must exist and be a directory at create/update time. We do
|
||||
NOT re-check at run time (a user might briefly unmount the dir; the
|
||||
scheduler will just fall back to old behaviour with a logged warning).
|
||||
|
||||
Returns the absolute path string, or None when disabled.
|
||||
Raises ValueError on invalid input.
|
||||
"""
|
||||
if workdir is None:
|
||||
return None
|
||||
raw = str(workdir).strip()
|
||||
if not raw:
|
||||
return None
|
||||
expanded = Path(raw).expanduser()
|
||||
if not expanded.is_absolute():
|
||||
raise ValueError(
|
||||
f"Cron workdir must be an absolute path (got {raw!r}). "
|
||||
f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous."
|
||||
)
|
||||
resolved = expanded.resolve()
|
||||
if not resolved.exists():
|
||||
raise ValueError(f"Cron workdir does not exist: {resolved}")
|
||||
if not resolved.is_dir():
|
||||
raise ValueError(f"Cron workdir is not a directory: {resolved}")
|
||||
return str(resolved)
|
||||
|
||||
|
||||
def create_job(
|
||||
prompt: str,
|
||||
schedule: str,
|
||||
@@ -432,9 +384,6 @@ def create_job(
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
context_from: Optional[Union[str, List[str]]] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new cron job.
|
||||
@@ -454,18 +403,6 @@ def create_job(
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
context_from: Optional job ID (or list of job IDs) whose most recent output
|
||||
is injected into the prompt as context before each run.
|
||||
Useful for chaining cron jobs: job A finds data, job B processes it.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
workdir: Optional absolute path. When set, the job runs as if launched
|
||||
from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
|
||||
that directory are injected into the system prompt, and the
|
||||
terminal/file/code_exec tools use it as their working directory
|
||||
(via TERMINAL_CWD). When unset, the old behaviour is preserved
|
||||
(no context files injected, tools use the scheduler's cwd).
|
||||
|
||||
Returns:
|
||||
The created job dict
|
||||
@@ -496,17 +433,6 @@ def create_job(
|
||||
normalized_base_url = normalized_base_url or None
|
||||
normalized_script = str(script).strip() if isinstance(script, str) else None
|
||||
normalized_script = normalized_script or None
|
||||
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
|
||||
# Normalize context_from: accept str or list of str, store as list or None
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from.strip()] if context_from.strip() else None
|
||||
elif isinstance(context_from, list):
|
||||
context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
|
||||
else:
|
||||
context_from = None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
@@ -519,7 +445,6 @@ def create_job(
|
||||
"provider": normalized_provider,
|
||||
"base_url": normalized_base_url,
|
||||
"script": normalized_script,
|
||||
"context_from": context_from,
|
||||
"schedule": parsed_schedule,
|
||||
"schedule_display": parsed_schedule.get("display", schedule),
|
||||
"repeat": {
|
||||
@@ -539,8 +464,6 @@ def create_job(
|
||||
# Delivery configuration
|
||||
"deliver": deliver,
|
||||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
"enabled_toolsets": normalized_toolsets,
|
||||
"workdir": normalized_workdir,
|
||||
}
|
||||
|
||||
jobs = load_jobs()
|
||||
@@ -574,15 +497,6 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
|
||||
if job["id"] != job_id:
|
||||
continue
|
||||
|
||||
# Validate / normalize workdir if present in updates. Empty string or
|
||||
# None both mean "clear the field" (restore old behaviour).
|
||||
if "workdir" in updates:
|
||||
_wd = updates["workdir"]
|
||||
if _wd in (None, "", False):
|
||||
updates["workdir"] = None
|
||||
else:
|
||||
updates["workdir"] = _normalize_workdir(_wd)
|
||||
|
||||
updated = _apply_skill_fields({**job, **updates})
|
||||
schedule_changed = "schedule" in updates
|
||||
|
||||
@@ -713,32 +627,10 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
|
||||
# Compute next run
|
||||
job["next_run_at"] = compute_next_run(job["schedule"], now)
|
||||
|
||||
# If no next run, decide whether this is terminal completion
|
||||
# (one-shot) or a transient failure (recurring schedule couldn't
|
||||
# compute — e.g. 'croniter' missing from the runtime env).
|
||||
# Recurring jobs must NEVER be silently disabled: that turns a
|
||||
# missing runtime dep into "job completed" and the user's
|
||||
# schedule quietly goes off. See issue #16265.
|
||||
# If no next run (one-shot completed), disable
|
||||
if job["next_run_at"] is None:
|
||||
kind = job.get("schedule", {}).get("kind")
|
||||
if kind in ("cron", "interval"):
|
||||
job["state"] = "error"
|
||||
if not job.get("last_error"):
|
||||
job["last_error"] = (
|
||||
"Failed to compute next run for recurring "
|
||||
"schedule (is the 'croniter' package "
|
||||
"installed in the gateway's Python env?)"
|
||||
)
|
||||
logger.error(
|
||||
"Job '%s' (%s) could not compute next_run_at; "
|
||||
"leaving enabled and marking state=error so the "
|
||||
"job is not silently disabled.",
|
||||
job.get("name", job["id"]),
|
||||
kind,
|
||||
)
|
||||
else:
|
||||
job["enabled"] = False
|
||||
job["state"] = "completed"
|
||||
job["enabled"] = False
|
||||
job["state"] = "completed"
|
||||
elif job.get("state") != "paused":
|
||||
job["state"] = "scheduled"
|
||||
|
||||
@@ -872,7 +764,7 @@ def save_job_output(job_id: str, output: str):
|
||||
f.write(output)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, output_file)
|
||||
os.replace(tmp_path, output_file)
|
||||
_secure_file(output_file)
|
||||
except BaseException:
|
||||
try:
|
||||
|
||||
+53
-299
@@ -40,44 +40,13 @@ from hermes_time import now as _hermes_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
|
||||
"""Resolve the toolset list for a cron job.
|
||||
|
||||
Precedence:
|
||||
1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
|
||||
Keeps the agent's job-scoped toolset override intact — #6130.
|
||||
2. Per-platform ``hermes tools`` config for the ``cron`` platform.
|
||||
Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
|
||||
so users can gate cron toolsets globally without recreating every job.
|
||||
3. ``None`` on any lookup failure — AIAgent loads the full default set
|
||||
(legacy behavior before this change, preserved as the safety net).
|
||||
|
||||
_DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
|
||||
``_get_platform_tools`` for unconfigured platforms, so fresh installs
|
||||
get cron WITHOUT ``moa`` by default (issue reported by Norbert —
|
||||
surprise $4.63 run).
|
||||
"""
|
||||
per_job = job.get("enabled_toolsets")
|
||||
if per_job:
|
||||
return per_job
|
||||
try:
|
||||
from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load
|
||||
return sorted(_get_platform_tools(cfg or {}, "cron"))
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Cron toolset resolution failed, falling back to full default toolset: %s",
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
# Valid delivery platforms — used to validate user-supplied platform names
|
||||
# in cron delivery targets, preventing env var enumeration via crafted names.
|
||||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
"telegram", "discord", "slack", "whatsapp", "signal",
|
||||
"matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
|
||||
"wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
|
||||
"qqbot", "yuanbao",
|
||||
"qqbot",
|
||||
})
|
||||
|
||||
# Platforms that support a configured cron/notification home target, mapped to
|
||||
@@ -198,9 +167,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
||||
if resolved:
|
||||
parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
|
||||
if resolved_is_explicit:
|
||||
chat_id = parsed_chat_id
|
||||
if parsed_thread_id is not None:
|
||||
thread_id = parsed_thread_id
|
||||
chat_id, thread_id = parsed_chat_id, parsed_thread_id
|
||||
else:
|
||||
chat_id = resolved
|
||||
except Exception:
|
||||
@@ -233,32 +200,12 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
||||
}
|
||||
|
||||
|
||||
def _normalize_deliver_value(deliver) -> str:
|
||||
"""Normalize a stored/submitted ``deliver`` value to its canonical string form.
|
||||
|
||||
The contract is that ``deliver`` is a string (``"local"``, ``"origin"``,
|
||||
``"telegram"``, ``"telegram:-1001:17"``, or comma-separated combinations).
|
||||
Historically some callers — MCP clients passing an array, direct edits of
|
||||
``jobs.json``, or stale code paths — have stored a list/tuple like
|
||||
``["telegram"]``. ``str(["telegram"])`` would serialize to the literal
|
||||
string ``"['telegram']"``, which is not a known platform and fails
|
||||
resolution silently. Flatten lists/tuples into a comma-separated string
|
||||
so both forms work. Returns ``"local"`` for anything falsy.
|
||||
"""
|
||||
if deliver is None or deliver == "":
|
||||
return "local"
|
||||
if isinstance(deliver, (list, tuple)):
|
||||
parts = [str(p).strip() for p in deliver if str(p).strip()]
|
||||
return ",".join(parts) if parts else "local"
|
||||
return str(deliver)
|
||||
|
||||
|
||||
def _resolve_delivery_targets(job: dict) -> List[dict]:
|
||||
"""Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
|
||||
deliver = _normalize_deliver_value(job.get("deliver", "local"))
|
||||
deliver = job.get("deliver", "local")
|
||||
if deliver == "local":
|
||||
return []
|
||||
parts = [p.strip() for p in deliver.split(",") if p.strip()]
|
||||
parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
|
||||
seen = set()
|
||||
targets = []
|
||||
for part in parts:
|
||||
@@ -277,21 +224,13 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
|
||||
return targets[0] if targets else None
|
||||
|
||||
|
||||
# Media extension sets — audio routing is centralized in gateway.platforms.base
|
||||
# via should_send_media_as_audio() so Telegram-specific rules stay in one place.
|
||||
# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
|
||||
_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
|
||||
_VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
|
||||
_IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'})
|
||||
|
||||
|
||||
def _send_media_via_adapter(
|
||||
adapter,
|
||||
chat_id: str,
|
||||
media_files: list,
|
||||
metadata: dict | None,
|
||||
loop,
|
||||
job: dict,
|
||||
platform=None,
|
||||
) -> None:
|
||||
def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None:
|
||||
"""Send extracted MEDIA files as native platform attachments via a live adapter.
|
||||
|
||||
Routes each file to the appropriate adapter method (send_voice, send_image_file,
|
||||
@@ -300,13 +239,10 @@ def _send_media_via_adapter(
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
|
||||
for media_path, _is_voice in media_files:
|
||||
try:
|
||||
ext = Path(media_path).suffix.lower()
|
||||
route_platform = platform if platform is not None else getattr(adapter, "platform", None)
|
||||
if should_send_media_as_audio(route_platform, ext, is_voice=_is_voice):
|
||||
if ext in _AUDIO_EXTS:
|
||||
coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata)
|
||||
elif ext in _VIDEO_EXTS:
|
||||
coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata)
|
||||
@@ -352,6 +288,26 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
from tools.send_message_tool import _send_to_platform
|
||||
from gateway.config import load_gateway_config, Platform
|
||||
|
||||
platform_map = {
|
||||
"telegram": Platform.TELEGRAM,
|
||||
"discord": Platform.DISCORD,
|
||||
"slack": Platform.SLACK,
|
||||
"whatsapp": Platform.WHATSAPP,
|
||||
"signal": Platform.SIGNAL,
|
||||
"matrix": Platform.MATRIX,
|
||||
"mattermost": Platform.MATTERMOST,
|
||||
"homeassistant": Platform.HOMEASSISTANT,
|
||||
"dingtalk": Platform.DINGTALK,
|
||||
"feishu": Platform.FEISHU,
|
||||
"wecom": Platform.WECOM,
|
||||
"wecom_callback": Platform.WECOM_CALLBACK,
|
||||
"weixin": Platform.WEIXIN,
|
||||
"email": Platform.EMAIL,
|
||||
"sms": Platform.SMS,
|
||||
"bluebubbles": Platform.BLUEBUBBLES,
|
||||
"qqbot": Platform.QQBOT,
|
||||
}
|
||||
|
||||
# Optionally wrap the content with a header/footer so the user knows this
|
||||
# is a cron delivery. Wrapping is on by default; set cron.wrap_response: false
|
||||
# in config.yaml for clean output.
|
||||
@@ -408,23 +364,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
job["id"], platform_name, chat_id, thread_id,
|
||||
)
|
||||
|
||||
# Built-in names resolve to their enum member; plugin platform names
|
||||
# create dynamic members via Platform._missing_().
|
||||
try:
|
||||
platform = Platform(platform_name.lower())
|
||||
except (ValueError, KeyError):
|
||||
platform = platform_map.get(platform_name.lower())
|
||||
if not platform:
|
||||
msg = f"unknown platform '{platform_name}'"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
delivery_errors.append(msg)
|
||||
continue
|
||||
|
||||
pconfig = config.platforms.get(platform)
|
||||
if not pconfig or not pconfig.enabled:
|
||||
msg = f"platform '{platform_name}' not configured/enabled"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
delivery_errors.append(msg)
|
||||
continue
|
||||
|
||||
# Prefer the live adapter when the gateway is running — this supports E2EE
|
||||
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
|
||||
runtime_adapter = (adapters or {}).get(platform)
|
||||
@@ -455,15 +401,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
|
||||
# Send extracted media files as native attachments via the live adapter
|
||||
if adapter_ok and media_files:
|
||||
_send_media_via_adapter(
|
||||
runtime_adapter,
|
||||
chat_id,
|
||||
media_files,
|
||||
send_metadata,
|
||||
loop,
|
||||
job,
|
||||
platform=platform,
|
||||
)
|
||||
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
|
||||
|
||||
if adapter_ok:
|
||||
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
|
||||
@@ -475,6 +413,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
)
|
||||
|
||||
if not delivered:
|
||||
pconfig = config.platforms.get(platform)
|
||||
if not pconfig or not pconfig.enabled:
|
||||
msg = f"platform '{platform_name}' not configured/enabled"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
delivery_errors.append(msg)
|
||||
continue
|
||||
|
||||
# Standalone path: run the async send in a fresh event loop (safe from any thread)
|
||||
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
|
||||
try:
|
||||
@@ -695,51 +640,10 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
f"{prompt}"
|
||||
)
|
||||
|
||||
# Inject output from referenced cron jobs as context.
|
||||
context_from = job.get("context_from")
|
||||
if context_from:
|
||||
from cron.jobs import OUTPUT_DIR
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from]
|
||||
for source_job_id in context_from:
|
||||
# Guard against path traversal — valid job IDs are 12-char hex strings
|
||||
if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
|
||||
logger.warning("context_from: skipping invalid job_id %r", source_job_id)
|
||||
continue
|
||||
try:
|
||||
job_output_dir = OUTPUT_DIR / source_job_id
|
||||
if not job_output_dir.exists():
|
||||
continue # silent skip — no output yet
|
||||
output_files = sorted(
|
||||
job_output_dir.glob("*.md"),
|
||||
key=lambda f: f.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not output_files:
|
||||
continue # silent skip — no output yet
|
||||
latest_output = output_files[0].read_text(encoding="utf-8").strip()
|
||||
# Truncate to 8K characters to avoid prompt bloat
|
||||
_MAX_CONTEXT_CHARS = 8000
|
||||
if len(latest_output) > _MAX_CONTEXT_CHARS:
|
||||
latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
|
||||
if latest_output:
|
||||
prompt = (
|
||||
f"## Output from job '{source_job_id}'\n"
|
||||
"The following is the most recent output from a preceding "
|
||||
"cron job. Use it as context for your analysis.\n\n"
|
||||
f"```\n{latest_output}\n```\n\n"
|
||||
f"{prompt}"
|
||||
)
|
||||
else:
|
||||
continue # silent skip — empty output
|
||||
except (OSError, PermissionError) as e:
|
||||
logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
|
||||
# silent skip — do not pollute the prompt with error messages
|
||||
|
||||
# Always prepend cron execution guidance so the agent knows how
|
||||
# delivery works and can suppress delivery when appropriate.
|
||||
cron_hint = (
|
||||
"[IMPORTANT: You are running as a scheduled cron job. "
|
||||
"[SYSTEM: You are running as a scheduled cron job. "
|
||||
"DELIVERY: Your final response will be automatically delivered "
|
||||
"to the user — do NOT use send_message or try to deliver "
|
||||
"the output yourself. Just produce your report/output as your "
|
||||
@@ -775,7 +679,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
parts.append("")
|
||||
parts.extend(
|
||||
[
|
||||
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
|
||||
"",
|
||||
content,
|
||||
]
|
||||
@@ -783,7 +687,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
|
||||
if skipped:
|
||||
notice = (
|
||||
f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
|
||||
f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
|
||||
f"and were skipped: {', '.join(skipped)}. "
|
||||
f"Start your response with a brief notice so the user is aware, e.g.: "
|
||||
f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
|
||||
@@ -845,8 +749,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
|
||||
logger.info("Prompt: %s", prompt[:100])
|
||||
|
||||
agent = None
|
||||
|
||||
# Mark this as a cron session so the approval system can apply cron_mode.
|
||||
# This env var is process-wide and persists for the lifetime of the
|
||||
# scheduler process — every job this process runs is a cron job.
|
||||
@@ -861,37 +763,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
chat_id=str(origin["chat_id"]) if origin else "",
|
||||
chat_name=origin.get("chat_name", "") if origin else "",
|
||||
)
|
||||
_cron_delivery_vars = (
|
||||
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
|
||||
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
|
||||
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
|
||||
)
|
||||
for _var_name in _cron_delivery_vars:
|
||||
_VAR_MAP[_var_name].set("")
|
||||
|
||||
# Per-job working directory. When set (and validated at create/update
|
||||
# time), we point TERMINAL_CWD at it so:
|
||||
# - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md /
|
||||
# .cursorrules from the job's project dir, AND
|
||||
# - the terminal, file, and code-exec tools run commands from there.
|
||||
#
|
||||
# tick() serializes workdir-jobs outside the parallel pool, so mutating
|
||||
# os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less
|
||||
# jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
|
||||
# (skip_context_files=True, tools use whatever cwd the scheduler has).
|
||||
_job_workdir = (job.get("workdir") or "").strip() or None
|
||||
if _job_workdir and not Path(_job_workdir).is_dir():
|
||||
# Directory was removed between create-time validation and now. Log
|
||||
# and drop back to old behaviour rather than crashing the job.
|
||||
logger.warning(
|
||||
"Job '%s': configured workdir %r no longer exists — running without it",
|
||||
job_id, _job_workdir,
|
||||
)
|
||||
_job_workdir = None
|
||||
_prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_")
|
||||
if _job_workdir:
|
||||
os.environ["TERMINAL_CWD"] = _job_workdir
|
||||
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
|
||||
|
||||
try:
|
||||
# Re-read .env and config.yaml fresh every run so provider/key
|
||||
@@ -906,11 +777,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
if delivery_target:
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(
|
||||
""
|
||||
if delivery_target.get("thread_id") is None
|
||||
else str(delivery_target["thread_id"])
|
||||
)
|
||||
if delivery_target.get("thread_id") is not None:
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
|
||||
|
||||
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
|
||||
|
||||
@@ -972,7 +840,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
from hermes_cli.auth import AuthError
|
||||
try:
|
||||
runtime_kwargs = {
|
||||
"requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
@@ -980,28 +847,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
if job.get("base_url"):
|
||||
runtime_kwargs["explicit_base_url"] = job.get("base_url")
|
||||
runtime = resolve_runtime_provider(**runtime_kwargs)
|
||||
except AuthError as auth_exc:
|
||||
# Primary provider auth failed — try fallback chain before giving up.
|
||||
logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
|
||||
fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
|
||||
fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
|
||||
runtime = None
|
||||
for entry in fb_list:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
try:
|
||||
fb_kwargs = {"requested": entry.get("provider")}
|
||||
if entry.get("base_url"):
|
||||
fb_kwargs["explicit_base_url"] = entry["base_url"]
|
||||
if entry.get("api_key"):
|
||||
fb_kwargs["explicit_api_key"] = entry["api_key"]
|
||||
runtime = resolve_runtime_provider(**fb_kwargs)
|
||||
logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider"))
|
||||
break
|
||||
except Exception as fb_exc:
|
||||
logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc)
|
||||
if runtime is None:
|
||||
raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
|
||||
except Exception as exc:
|
||||
message = format_runtime_provider_error(exc)
|
||||
raise RuntimeError(message) from exc
|
||||
@@ -1041,15 +886,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
# Cron jobs should always inherit the user's SOUL.md identity from
|
||||
# HERMES_HOME. When a workdir is configured, also inject project
|
||||
# context files (AGENTS.md / CLAUDE.md / .cursorrules) from there.
|
||||
# Without a workdir, keep cwd context discovery disabled.
|
||||
skip_context_files=not bool(_job_workdir),
|
||||
load_soul_identity=True,
|
||||
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
|
||||
skip_memory=True, # Cron system prompts would corrupt user representations
|
||||
platform="cron",
|
||||
session_id=_cron_session_id,
|
||||
@@ -1064,18 +903,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
#
|
||||
# Uses the agent's built-in activity tracker (updated by
|
||||
# _touch_activity() on every tool call, API call, and stream delta).
|
||||
_raw_cron_timeout = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
|
||||
if _raw_cron_timeout:
|
||||
try:
|
||||
_cron_timeout = float(_raw_cron_timeout)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(
|
||||
"Invalid HERMES_CRON_TIMEOUT=%r; using default 600s",
|
||||
_raw_cron_timeout,
|
||||
)
|
||||
_cron_timeout = 600.0
|
||||
else:
|
||||
_cron_timeout = 600.0
|
||||
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
|
||||
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
|
||||
_POLL_INTERVAL = 5.0
|
||||
_cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
@@ -1144,27 +972,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
f"— last activity: {_last_desc}"
|
||||
)
|
||||
|
||||
# Guard against non-dict returns from run_conversation under error conditions
|
||||
if not isinstance(result, dict):
|
||||
raise RuntimeError(
|
||||
f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
|
||||
)
|
||||
|
||||
# If the agent itself reported failure (e.g. all retries exhausted on
|
||||
# API errors, model abort, mid-run interrupt), do not silently mark the
|
||||
# job as successful. run_agent populates `failed=True`/`completed=False`
|
||||
# on these paths and may put the error into `final_response`, which
|
||||
# would otherwise be delivered as if it were the agent's reply and the
|
||||
# job's `last_status` set to "ok". Raise so the except handler below
|
||||
# builds the proper failure tuple. (issue #17855)
|
||||
if result.get("failed") is True or result.get("completed") is False:
|
||||
_err_text = (
|
||||
result.get("error")
|
||||
or (result.get("final_response") or "").strip()
|
||||
or "agent reported failure"
|
||||
)
|
||||
raise RuntimeError(_err_text)
|
||||
|
||||
final_response = result.get("final_response", "") or ""
|
||||
# Strip leaked placeholder text that upstream may inject on empty completions.
|
||||
if final_response.strip() == "(No response generated)":
|
||||
@@ -1214,18 +1021,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
return False, output, "", error_msg
|
||||
|
||||
finally:
|
||||
# Restore TERMINAL_CWD to whatever it was before this job ran. We
|
||||
# only ever mutate it when the job has a workdir; see the setup block
|
||||
# at the top of run_job for the serialization guarantee.
|
||||
if _job_workdir:
|
||||
if _prior_terminal_cwd == "_UNSET_":
|
||||
os.environ.pop("TERMINAL_CWD", None)
|
||||
else:
|
||||
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
|
||||
# Clean up ContextVar session/delivery state for this job.
|
||||
clear_session_vars(_ctx_tokens)
|
||||
for _var_name in _cron_delivery_vars:
|
||||
_VAR_MAP[_var_name].set("")
|
||||
if _session_db:
|
||||
try:
|
||||
_session_db.end_session(_cron_session_id, "cron_complete")
|
||||
@@ -1235,24 +1032,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
_session_db.close()
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
|
||||
# Release subprocesses, terminal sandboxes, browser daemons, and the
|
||||
# main OpenAI/httpx client held by this ephemeral cron agent. Without
|
||||
# this, a gateway that ticks cron every N minutes leaks fds per job
|
||||
# until it hits EMFILE (#10200 / "too many open files").
|
||||
try:
|
||||
if agent is not None:
|
||||
agent.close()
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
|
||||
# Each cron run spins up a short-lived worker thread whose event loop
|
||||
# dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
|
||||
# httpx clients cached under that loop are now unusable — reap them
|
||||
# so their transports don't accumulate in the process-global cache.
|
||||
try:
|
||||
from agent.auxiliary_client import cleanup_stale_async_clients
|
||||
cleanup_stale_async_clients()
|
||||
except Exception as e:
|
||||
logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)
|
||||
|
||||
|
||||
def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
@@ -1369,39 +1148,14 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
mark_job_run(job["id"], False, str(e))
|
||||
return False
|
||||
|
||||
# Partition due jobs: those with a per-job workdir mutate
|
||||
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
|
||||
# so they MUST run sequentially to avoid corrupting each other. Jobs
|
||||
# without a workdir leave env untouched and stay parallel-safe.
|
||||
workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
|
||||
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
|
||||
|
||||
_results: list = []
|
||||
|
||||
# Sequential pass for workdir jobs.
|
||||
for job in workdir_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_results.append(_ctx.run(_process_job, job))
|
||||
|
||||
# Parallel pass for the rest — same behaviour as before.
|
||||
if parallel_jobs:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in parallel_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results.extend(f.result() for f in _futures)
|
||||
|
||||
# Best-effort sweep of MCP stdio subprocesses that survived their
|
||||
# session teardown during this tick. Runs AFTER every job has
|
||||
# finished so active sessions (including live user chats) are
|
||||
# never touched — only PIDs explicitly detected as orphans in
|
||||
# tools.mcp_tool._run_stdio's finally block are reaped.
|
||||
try:
|
||||
from tools.mcp_tool import _kill_orphaned_mcp_children
|
||||
_kill_orphaned_mcp_children()
|
||||
except Exception as _e:
|
||||
logger.debug("Post-tick MCP orphan cleanup failed: %s", _e)
|
||||
# Run all due jobs concurrently, each in its own ContextVar copy
|
||||
# so session/delivery state stays isolated per-thread.
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in due_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results = [f.result() for f in _futures]
|
||||
|
||||
return sum(_results)
|
||||
finally:
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
#
|
||||
# docker-compose.yml for Hermes Agent
|
||||
#
|
||||
# Usage:
|
||||
# HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
|
||||
#
|
||||
# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
|
||||
# files created inside the container stay readable/writable on the host.
|
||||
# The entrypoint remaps the internal `hermes` user to these values via
|
||||
# usermod/groupmod + gosu.
|
||||
#
|
||||
# Security notes:
|
||||
# - The dashboard service binds to 127.0.0.1 by default. It stores API
|
||||
# keys; exposing it on LAN without auth is unsafe. If you want remote
|
||||
# access, use an SSH tunnel or put it behind a reverse proxy that
|
||||
# adds authentication — do NOT pass --insecure --host 0.0.0.0.
|
||||
# - The gateway's API server is off unless you uncomment API_SERVER_KEY
|
||||
# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
|
||||
# this on an internet-facing host.
|
||||
#
|
||||
services:
|
||||
gateway:
|
||||
build: .
|
||||
image: hermes-agent
|
||||
container_name: hermes
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# To expose the OpenAI-compatible API server beyond localhost,
|
||||
# uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
|
||||
# - API_SERVER_HOST=0.0.0.0
|
||||
# - API_SERVER_KEY=${API_SERVER_KEY}
|
||||
# Microsoft Teams — uncomment and fill in to enable Teams gateway.
|
||||
# Register your bot at https://dev.botframework.com/ to get these values.
|
||||
# - TEAMS_CLIENT_ID=${TEAMS_CLIENT_ID}
|
||||
# - TEAMS_CLIENT_SECRET=${TEAMS_CLIENT_SECRET}
|
||||
# - TEAMS_TENANT_ID=${TEAMS_TENANT_ID}
|
||||
# - TEAMS_ALLOWED_USERS=${TEAMS_ALLOWED_USERS}
|
||||
# - TEAMS_PORT=3978
|
||||
command: ["gateway", "run"]
|
||||
|
||||
dashboard:
|
||||
image: hermes-agent
|
||||
container_name: hermes-dashboard
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- gateway
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
|
||||
command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
|
||||
+2
-20
@@ -22,18 +22,9 @@ if [ "$(id -u)" = "0" ]; then
|
||||
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
|
||||
# files created by previous runs (under the old UID) become inaccessible.
|
||||
# Always chown -R when UID was remapped; otherwise only if top-level is wrong.
|
||||
actual_hermes_uid=$(id -u hermes)
|
||||
needs_chown=false
|
||||
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
|
||||
needs_chown=true
|
||||
elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
needs_chown=true
|
||||
fi
|
||||
if [ "$needs_chown" = true ]; then
|
||||
echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
|
||||
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
|
||||
# In rootless Podman the container's "root" is mapped to an unprivileged
|
||||
# host UID — chown will fail. That's fine: the volume is already owned
|
||||
# by the mapped user on the host side.
|
||||
@@ -41,15 +32,6 @@ if [ "$(id -u)" = "0" ]; then
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
exec gosu hermes "$0" "$@"
|
||||
fi
|
||||
|
||||
Binary file not shown.
@@ -36,7 +36,6 @@
|
||||
|
||||
imports = [
|
||||
./nix/packages.nix
|
||||
./nix/overlays.nix
|
||||
./nix/nixosModules.nix
|
||||
./nix/checks.nix
|
||||
./nix/devShell.nix
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
|
||||
|
||||
This hook is always registered. It silently skips if no BOOT.md exists.
|
||||
To activate, create ``~/.hermes/BOOT.md`` with instructions for the
|
||||
agent to execute on every gateway restart.
|
||||
|
||||
Example BOOT.md::
|
||||
|
||||
# Startup Checklist
|
||||
|
||||
1. Check if any cron jobs failed overnight
|
||||
2. Send a status update to Discord #general
|
||||
3. If there are errors in /opt/app/deploy.log, summarize them
|
||||
|
||||
The agent runs in a background thread so it doesn't block gateway
|
||||
startup. If nothing needs attention, it replies with [SILENT] to
|
||||
suppress delivery.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
|
||||
logger = logging.getLogger("hooks.boot-md")
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
HERMES_HOME = get_hermes_home()
|
||||
BOOT_FILE = HERMES_HOME / "BOOT.md"
|
||||
|
||||
|
||||
def _build_boot_prompt(content: str) -> str:
|
||||
"""Wrap BOOT.md content in a system-level instruction."""
|
||||
return (
|
||||
"You are running a startup boot checklist. Follow the BOOT.md "
|
||||
"instructions below exactly.\n\n"
|
||||
"---\n"
|
||||
f"{content}\n"
|
||||
"---\n\n"
|
||||
"Execute each instruction. If you need to send a message to a "
|
||||
"platform, use the send_message tool.\n"
|
||||
"If nothing needs attention and there is nothing to report, "
|
||||
"reply with ONLY: [SILENT]"
|
||||
)
|
||||
|
||||
|
||||
def _run_boot_agent(content: str) -> None:
|
||||
"""Spawn a one-shot agent session to execute the boot instructions."""
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
|
||||
prompt = _build_boot_prompt(content)
|
||||
agent = AIAgent(
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
max_iterations=20,
|
||||
)
|
||||
result = agent.run_conversation(prompt)
|
||||
response = result.get("final_response", "")
|
||||
if response and "[SILENT]" not in response:
|
||||
logger.info("boot-md completed: %s", response[:200])
|
||||
else:
|
||||
logger.info("boot-md completed (nothing to report)")
|
||||
except Exception as e:
|
||||
logger.error("boot-md agent failed: %s", e)
|
||||
|
||||
|
||||
async def handle(event_type: str, context: dict) -> None:
|
||||
"""Gateway startup handler — run BOOT.md if it exists."""
|
||||
if not BOOT_FILE.exists():
|
||||
return
|
||||
|
||||
content = BOOT_FILE.read_text(encoding="utf-8").strip()
|
||||
if not content:
|
||||
return
|
||||
|
||||
logger.info("Running BOOT.md (%d chars)", len(content))
|
||||
|
||||
# Run in a background thread so we don't block gateway startup.
|
||||
thread = threading.Thread(
|
||||
target=_run_boot_agent,
|
||||
args=(content,),
|
||||
name="boot-md",
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
|
||||
# Build / refresh
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Build a channel directory from connected platform adapters and session data.
|
||||
|
||||
@@ -72,7 +72,7 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
if platform == Platform.DISCORD:
|
||||
platforms["discord"] = _build_discord(adapter)
|
||||
elif platform == Platform.SLACK:
|
||||
platforms["slack"] = await _build_slack(adapter)
|
||||
platforms["slack"] = _build_slack(adapter)
|
||||
except Exception as e:
|
||||
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
|
||||
|
||||
@@ -86,16 +86,6 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
continue
|
||||
platforms[plat_name] = _build_from_sessions(plat_name)
|
||||
|
||||
# Include plugin-registered platforms (dynamic enum members aren't in
|
||||
# Platform.__members__, so the loop above misses them).
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
for entry in platform_registry.plugin_entries():
|
||||
if entry.name not in _SKIP_SESSION_DISCOVERY and entry.name not in platforms:
|
||||
platforms[entry.name] = _build_from_sessions(entry.name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
directory = {
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
"platforms": platforms,
|
||||
@@ -146,66 +136,21 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
|
||||
return channels
|
||||
|
||||
|
||||
async def _build_slack(adapter) -> List[Dict[str, Any]]:
|
||||
"""List Slack channels the bot has joined across all workspaces.
|
||||
|
||||
Uses ``users.conversations`` against each workspace's web client. Pulls
|
||||
public + private channels the bot is a member of, then merges in DMs
|
||||
discovered from session history (IMs aren't useful to enumerate
|
||||
proactively).
|
||||
"""
|
||||
team_clients = getattr(adapter, "_team_clients", None) or {}
|
||||
if not team_clients:
|
||||
def _build_slack(adapter) -> List[Dict[str, str]]:
|
||||
"""List Slack channels the bot has joined."""
|
||||
# Slack adapter may expose a web client
|
||||
client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
|
||||
if not client:
|
||||
return _build_from_sessions("slack")
|
||||
|
||||
channels: List[Dict[str, Any]] = []
|
||||
seen_ids: set = set()
|
||||
try:
|
||||
from tools.send_message_tool import _send_slack # noqa: F401
|
||||
# Use the Slack Web API directly if available
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for team_id, client in team_clients.items():
|
||||
try:
|
||||
cursor: Optional[str] = None
|
||||
for _page in range(20): # safety cap on pagination
|
||||
response = await client.users_conversations(
|
||||
types="public_channel,private_channel",
|
||||
exclude_archived=True,
|
||||
limit=200,
|
||||
cursor=cursor,
|
||||
)
|
||||
if not response.get("ok"):
|
||||
logger.warning(
|
||||
"Channel directory: users.conversations not ok for team %s: %s",
|
||||
team_id,
|
||||
response.get("error", "unknown"),
|
||||
)
|
||||
break
|
||||
for ch in response.get("channels", []):
|
||||
cid = ch.get("id")
|
||||
name = ch.get("name")
|
||||
if not cid or not name or cid in seen_ids:
|
||||
continue
|
||||
seen_ids.add(cid)
|
||||
channels.append({
|
||||
"id": cid,
|
||||
"name": name,
|
||||
"type": "private" if ch.get("is_private") else "channel",
|
||||
})
|
||||
cursor = (response.get("response_metadata") or {}).get("next_cursor")
|
||||
if not cursor:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Channel directory: failed to list Slack channels for team %s: %s",
|
||||
team_id, e,
|
||||
)
|
||||
continue
|
||||
|
||||
# Merge in DM/group entries discovered from session history.
|
||||
for entry in _build_from_sessions("slack"):
|
||||
if entry.get("id") not in seen_ids:
|
||||
channels.append(entry)
|
||||
seen_ids.add(entry.get("id"))
|
||||
|
||||
return channels
|
||||
# Fallback to session data
|
||||
return _build_from_sessions("slack")
|
||||
|
||||
|
||||
def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
|
||||
@@ -278,14 +223,6 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
|
||||
if not channels:
|
||||
return None
|
||||
|
||||
# 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
|
||||
# raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
|
||||
# in _parse_target_ref hasn't recognized them as explicit.
|
||||
raw = name.strip()
|
||||
for ch in channels:
|
||||
if ch.get("id") == raw:
|
||||
return ch["id"]
|
||||
|
||||
query = _normalize_channel_query(name)
|
||||
|
||||
# 1. Exact name match, including the display labels shown by send_message(action="list")
|
||||
|
||||
+58
-277
@@ -13,7 +13,7 @@ import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Any, Callable
|
||||
from typing import Dict, List, Optional, Any
|
||||
from enum import Enum
|
||||
|
||||
from hermes_cli.config import get_hermes_home
|
||||
@@ -45,19 +45,8 @@ def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> st
|
||||
return default
|
||||
|
||||
|
||||
# Module-level cache for bundled platform plugin names (lives outside the
|
||||
# enum so it doesn't become an accidental enum member).
|
||||
_Platform__bundled_plugin_names: Optional[set] = None
|
||||
|
||||
|
||||
class Platform(Enum):
|
||||
"""Supported messaging platforms.
|
||||
|
||||
Built-in platforms have explicit members. Plugin platforms use dynamic
|
||||
members created on-demand by ``_missing_()`` so that
|
||||
``Platform("irc")`` works without modifying this enum. Dynamic members
|
||||
are cached in ``_value2member_map_`` for identity-stable comparisons.
|
||||
"""
|
||||
"""Supported messaging platforms."""
|
||||
LOCAL = "local"
|
||||
TELEGRAM = "telegram"
|
||||
DISCORD = "discord"
|
||||
@@ -78,77 +67,6 @@ class Platform(Enum):
|
||||
WEIXIN = "weixin"
|
||||
BLUEBUBBLES = "bluebubbles"
|
||||
QQBOT = "qqbot"
|
||||
YUANBAO = "yuanbao"
|
||||
@classmethod
|
||||
def _missing_(cls, value):
|
||||
"""Accept unknown platform names only for known plugin adapters.
|
||||
|
||||
Creates a pseudo-member cached in ``_value2member_map_`` so that
|
||||
``Platform("irc") is Platform("irc")`` holds True (identity-stable).
|
||||
Arbitrary strings are rejected to prevent enum pollution.
|
||||
"""
|
||||
if not isinstance(value, str) or not value.strip():
|
||||
return None
|
||||
# Normalise to lowercase to avoid case mismatches in config
|
||||
value = value.strip().lower()
|
||||
# Check cache first (another call may have created it already)
|
||||
if value in cls._value2member_map_:
|
||||
return cls._value2member_map_[value]
|
||||
|
||||
# Only create pseudo-members for bundled plugin platforms (discovered
|
||||
# via filesystem scan) or runtime-registered plugin platforms.
|
||||
global _Platform__bundled_plugin_names
|
||||
if _Platform__bundled_plugin_names is None:
|
||||
_Platform__bundled_plugin_names = cls._scan_bundled_plugin_platforms()
|
||||
if value in _Platform__bundled_plugin_names:
|
||||
pseudo = object.__new__(cls)
|
||||
pseudo._value_ = value
|
||||
pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
|
||||
cls._value2member_map_[value] = pseudo
|
||||
cls._member_map_[pseudo._name_] = pseudo
|
||||
return pseudo
|
||||
|
||||
# Runtime-registered plugins (e.g. user-installed, discovered after
|
||||
# the enum was defined).
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
if platform_registry.is_registered(value):
|
||||
pseudo = object.__new__(cls)
|
||||
pseudo._value_ = value
|
||||
pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
|
||||
cls._value2member_map_[value] = pseudo
|
||||
cls._member_map_[pseudo._name_] = pseudo
|
||||
return pseudo
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _scan_bundled_plugin_platforms(cls) -> set:
|
||||
"""Return names of bundled platform plugins under ``plugins/platforms/``."""
|
||||
names: set = set()
|
||||
try:
|
||||
platforms_dir = Path(__file__).parent.parent / "plugins" / "platforms"
|
||||
if platforms_dir.is_dir():
|
||||
for child in platforms_dir.iterdir():
|
||||
if (
|
||||
child.is_dir()
|
||||
and (child / "__init__.py").exists()
|
||||
and (
|
||||
(child / "plugin.yaml").exists()
|
||||
or (child / "plugin.yml").exists()
|
||||
)
|
||||
):
|
||||
names.add(child.name.lower())
|
||||
except Exception:
|
||||
pass
|
||||
return names
|
||||
|
||||
|
||||
# Snapshot of built-in platform values before any dynamic _missing_ lookups.
|
||||
# Used to distinguish real platforms from arbitrary strings.
|
||||
_BUILTIN_PLATFORM_VALUES = frozenset(m.value for m in Platform.__members__.values())
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -217,7 +135,7 @@ class SessionResetPolicy:
|
||||
mode=mode if mode is not None else "both",
|
||||
at_hour=at_hour if at_hour is not None else 4,
|
||||
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
|
||||
notify=_coerce_bool(notify, True),
|
||||
notify=notify if notify is not None else True,
|
||||
notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
|
||||
)
|
||||
|
||||
@@ -260,7 +178,7 @@ class PlatformConfig:
|
||||
home_channel = HomeChannel.from_dict(data["home_channel"])
|
||||
|
||||
return cls(
|
||||
enabled=_coerce_bool(data.get("enabled"), False),
|
||||
enabled=data.get("enabled", False),
|
||||
token=data.get("token"),
|
||||
api_key=data.get("api_key"),
|
||||
home_channel=home_channel,
|
||||
@@ -277,14 +195,6 @@ class StreamingConfig:
|
||||
edit_interval: float = 1.0 # Seconds between message edits (Telegram rate-limits at ~1/s)
|
||||
buffer_threshold: int = 40 # Chars before forcing an edit
|
||||
cursor: str = " ▉" # Cursor shown during streaming
|
||||
# Ported from openclaw/openclaw#72038. When >0, the final edit for
|
||||
# a long-running streamed response is delivered as a fresh message
|
||||
# if the original preview has been visible for at least this many
|
||||
# seconds, so the platform's visible timestamp reflects completion
|
||||
# time instead of the preview creation time. Currently applied to
|
||||
# Telegram only (other platforms ignore the setting). Default 60s
|
||||
# matches the OpenClaw rollout. Set to 0 to disable.
|
||||
fresh_final_after_seconds: float = 60.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
@@ -293,7 +203,6 @@ class StreamingConfig:
|
||||
"edit_interval": self.edit_interval,
|
||||
"buffer_threshold": self.buffer_threshold,
|
||||
"cursor": self.cursor,
|
||||
"fresh_final_after_seconds": self.fresh_final_after_seconds,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -306,50 +215,9 @@ class StreamingConfig:
|
||||
edit_interval=float(data.get("edit_interval", 1.0)),
|
||||
buffer_threshold=int(data.get("buffer_threshold", 40)),
|
||||
cursor=data.get("cursor", " ▉"),
|
||||
fresh_final_after_seconds=float(
|
||||
data.get("fresh_final_after_seconds", 60.0)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Built-in platform connection checkers
|
||||
# -----------------------------------------------------------------------------
|
||||
# Each callable receives a ``PlatformConfig`` and returns ``True`` when the
|
||||
# platform is sufficiently configured to be considered "connected". Platforms
|
||||
# that rely on the generic ``token or api_key`` check (Telegram, Discord,
|
||||
# Slack, Matrix, Mattermost, HomeAssistant) do not need an entry here.
|
||||
_PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] = {
|
||||
Platform.WEIXIN: lambda cfg: bool(
|
||||
cfg.extra.get("account_id") and (cfg.token or cfg.extra.get("token"))
|
||||
),
|
||||
Platform.WHATSAPP: lambda cfg: True, # bridge handles auth
|
||||
Platform.SIGNAL: lambda cfg: bool(cfg.extra.get("http_url")),
|
||||
Platform.EMAIL: lambda cfg: bool(cfg.extra.get("address")),
|
||||
Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
|
||||
Platform.API_SERVER: lambda cfg: True,
|
||||
Platform.WEBHOOK: lambda cfg: True,
|
||||
Platform.FEISHU: lambda cfg: bool(cfg.extra.get("app_id")),
|
||||
Platform.WECOM: lambda cfg: bool(cfg.extra.get("bot_id")),
|
||||
Platform.WECOM_CALLBACK: lambda cfg: bool(
|
||||
cfg.extra.get("corp_id") or cfg.extra.get("apps")
|
||||
),
|
||||
Platform.BLUEBUBBLES: lambda cfg: bool(
|
||||
cfg.extra.get("server_url") and cfg.extra.get("password")
|
||||
),
|
||||
Platform.QQBOT: lambda cfg: bool(
|
||||
cfg.extra.get("app_id") and cfg.extra.get("client_secret")
|
||||
),
|
||||
Platform.YUANBAO: lambda cfg: bool(
|
||||
cfg.extra.get("app_id") and cfg.extra.get("app_secret")
|
||||
),
|
||||
Platform.DINGTALK: lambda cfg: bool(
|
||||
(cfg.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID"))
|
||||
and (cfg.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET"))
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class GatewayConfig:
|
||||
"""
|
||||
@@ -403,43 +271,58 @@ class GatewayConfig:
|
||||
for platform, config in self.platforms.items():
|
||||
if not config.enabled:
|
||||
continue
|
||||
if self._is_platform_connected(platform, config):
|
||||
# Weixin requires both a token and an account_id
|
||||
if platform == Platform.WEIXIN:
|
||||
if config.extra.get("account_id") and (config.token or config.extra.get("token")):
|
||||
connected.append(platform)
|
||||
continue
|
||||
# Platforms that use token/api_key auth
|
||||
if config.token or config.api_key:
|
||||
connected.append(platform)
|
||||
# WhatsApp uses enabled flag only (bridge handles auth)
|
||||
elif platform == Platform.WHATSAPP:
|
||||
connected.append(platform)
|
||||
# Signal uses extra dict for config (http_url + account)
|
||||
elif platform == Platform.SIGNAL and config.extra.get("http_url"):
|
||||
connected.append(platform)
|
||||
# Email uses extra dict for config (address + imap_host + smtp_host)
|
||||
elif platform == Platform.EMAIL and config.extra.get("address"):
|
||||
connected.append(platform)
|
||||
# SMS uses api_key (Twilio auth token) — SID checked via env
|
||||
elif platform == Platform.SMS and os.getenv("TWILIO_ACCOUNT_SID"):
|
||||
connected.append(platform)
|
||||
# API Server uses enabled flag only (no token needed)
|
||||
elif platform == Platform.API_SERVER:
|
||||
connected.append(platform)
|
||||
# Webhook uses enabled flag only (secrets are per-route)
|
||||
elif platform == Platform.WEBHOOK:
|
||||
connected.append(platform)
|
||||
# Feishu uses extra dict for app credentials
|
||||
elif platform == Platform.FEISHU and config.extra.get("app_id"):
|
||||
connected.append(platform)
|
||||
# WeCom bot mode uses extra dict for bot credentials
|
||||
elif platform == Platform.WECOM and config.extra.get("bot_id"):
|
||||
connected.append(platform)
|
||||
# WeCom callback mode uses corp_id or apps list
|
||||
elif platform == Platform.WECOM_CALLBACK and (
|
||||
config.extra.get("corp_id") or config.extra.get("apps")
|
||||
):
|
||||
connected.append(platform)
|
||||
# BlueBubbles uses extra dict for local server config
|
||||
elif platform == Platform.BLUEBUBBLES and config.extra.get("server_url") and config.extra.get("password"):
|
||||
connected.append(platform)
|
||||
# QQBot uses extra dict for app credentials
|
||||
elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
|
||||
connected.append(platform)
|
||||
# DingTalk uses client_id/client_secret from config.extra or env vars
|
||||
elif platform == Platform.DINGTALK and (
|
||||
config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
|
||||
) and (
|
||||
config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
|
||||
):
|
||||
connected.append(platform)
|
||||
|
||||
return connected
|
||||
|
||||
def _is_platform_connected(self, platform: Platform, config: PlatformConfig) -> bool:
|
||||
"""Check whether a single platform is sufficiently configured."""
|
||||
# Weixin requires both a token and an account_id (checked first so
|
||||
# the generic token branch doesn't let it through without account_id).
|
||||
if platform == Platform.WEIXIN:
|
||||
return bool(
|
||||
config.extra.get("account_id")
|
||||
and (config.token or config.extra.get("token"))
|
||||
)
|
||||
|
||||
# Generic token/api_key auth covers Telegram, Discord, Slack, etc.
|
||||
if config.token or config.api_key:
|
||||
return True
|
||||
|
||||
# Platform-specific check
|
||||
checker = _PLATFORM_CONNECTED_CHECKERS.get(platform)
|
||||
if checker is not None:
|
||||
return checker(config)
|
||||
|
||||
# Plugin-registered platforms
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
entry = platform_registry.get(platform.value)
|
||||
if entry:
|
||||
if entry.is_connected is not None:
|
||||
return entry.is_connected(config)
|
||||
if entry.validate_config is not None:
|
||||
return entry.validate_config(config)
|
||||
return True
|
||||
except Exception:
|
||||
pass # Registry not yet initialised during early import
|
||||
|
||||
return False
|
||||
|
||||
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
|
||||
"""Get the home channel for a platform."""
|
||||
@@ -552,7 +435,7 @@ class GatewayConfig:
|
||||
reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
|
||||
quick_commands=quick_commands,
|
||||
sessions_dir=sessions_dir,
|
||||
always_log_local=_coerce_bool(data.get("always_log_local"), True),
|
||||
always_log_local=data.get("always_log_local", True),
|
||||
stt_enabled=_coerce_bool(stt_enabled, True),
|
||||
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
|
||||
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
|
||||
@@ -667,8 +550,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
existing = {}
|
||||
# Deep-merge extra dicts so gateway.json defaults survive
|
||||
merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
|
||||
if plat_name == Platform.SLACK.value and "enabled" in plat_block:
|
||||
merged_extra["_enabled_explicit"] = True
|
||||
merged = {**existing, **plat_block}
|
||||
if merged_extra:
|
||||
merged["extra"] = merged_extra
|
||||
@@ -689,8 +570,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
)
|
||||
if "reply_prefix" in platform_cfg:
|
||||
bridged["reply_prefix"] = platform_cfg["reply_prefix"]
|
||||
if "reply_in_thread" in platform_cfg:
|
||||
bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
|
||||
if "require_mention" in platform_cfg:
|
||||
bridged["require_mention"] = platform_cfg["require_mention"]
|
||||
if "free_response_channels" in platform_cfg:
|
||||
@@ -705,7 +584,7 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["group_policy"] = platform_cfg["group_policy"]
|
||||
if "group_allow_from" in platform_cfg:
|
||||
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
|
||||
if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
|
||||
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
|
||||
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
|
||||
if "channel_prompts" in platform_cfg:
|
||||
channel_prompts = platform_cfg["channel_prompts"]
|
||||
@@ -713,21 +592,16 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
|
||||
else:
|
||||
bridged["channel_prompts"] = channel_prompts
|
||||
enabled_was_explicit = "enabled" in platform_cfg
|
||||
if not bridged and not enabled_was_explicit:
|
||||
if not bridged:
|
||||
continue
|
||||
plat_data = platforms_data.setdefault(plat.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
plat_data = {}
|
||||
platforms_data[plat.value] = plat_data
|
||||
if enabled_was_explicit:
|
||||
plat_data["enabled"] = platform_cfg["enabled"]
|
||||
extra = plat_data.setdefault("extra", {})
|
||||
if not isinstance(extra, dict):
|
||||
extra = {}
|
||||
plat_data["extra"] = extra
|
||||
if plat == Platform.SLACK and enabled_was_explicit:
|
||||
extra["_enabled_explicit"] = True
|
||||
extra.update(bridged)
|
||||
|
||||
# Slack settings → env vars (env vars take precedence)
|
||||
@@ -735,8 +609,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(slack_cfg, dict):
|
||||
if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
|
||||
os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
|
||||
if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
|
||||
os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
|
||||
if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
|
||||
os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
|
||||
frc = slack_cfg.get("free_response_channels")
|
||||
@@ -815,21 +687,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
|
||||
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
|
||||
allowed_users = telegram_cfg.get("allow_from")
|
||||
if allowed_users is not None and not os.getenv("TELEGRAM_ALLOWED_USERS"):
|
||||
if isinstance(allowed_users, list):
|
||||
allowed_users = ",".join(str(v) for v in allowed_users)
|
||||
os.environ["TELEGRAM_ALLOWED_USERS"] = str(allowed_users)
|
||||
group_allowed_users = telegram_cfg.get("group_allow_from")
|
||||
if group_allowed_users is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
|
||||
if isinstance(group_allowed_users, list):
|
||||
group_allowed_users = ",".join(str(v) for v in group_allowed_users)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(group_allowed_users)
|
||||
group_allowed_chats = telegram_cfg.get("group_allowed_chats")
|
||||
if group_allowed_chats is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_CHATS"):
|
||||
if isinstance(group_allowed_chats, list):
|
||||
group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
|
||||
if "disable_link_previews" in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
@@ -1056,20 +913,8 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
slack_token = os.getenv("SLACK_BOT_TOKEN")
|
||||
if slack_token:
|
||||
if Platform.SLACK not in config.platforms:
|
||||
# No yaml config for Slack — env-only setup, enable it
|
||||
config.platforms[Platform.SLACK] = PlatformConfig()
|
||||
config.platforms[Platform.SLACK].enabled = True
|
||||
else:
|
||||
slack_config = config.platforms[Platform.SLACK]
|
||||
enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
|
||||
if not slack_config.enabled and not enabled_was_explicit:
|
||||
# Top-level Slack settings such as channel prompts should not
|
||||
# turn an env-token setup into a disabled platform. Only an
|
||||
# explicit slack.enabled/platforms.slack.enabled false should.
|
||||
slack_config.enabled = True
|
||||
# If yaml config exists, respect its enabled flag (don't override
|
||||
# explicit enabled: false). Token is still stored so skills that
|
||||
# send Slack messages can use it without activating the gateway adapter.
|
||||
config.platforms[Platform.SLACK].enabled = True
|
||||
config.platforms[Platform.SLACK].token = slack_token
|
||||
slack_home = os.getenv("SLACK_HOME_CHANNEL")
|
||||
if slack_home and Platform.SLACK in config.platforms:
|
||||
@@ -1426,48 +1271,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
|
||||
)
|
||||
|
||||
# Yuanbao — YUANBAO_APP_ID preferred
|
||||
yuanbao_app_id = os.getenv("YUANBAO_APP_ID") or os.getenv("YUANBAO_APP_KEY")
|
||||
yuanbao_app_secret = os.getenv("YUANBAO_APP_SECRET")
|
||||
if yuanbao_app_id and yuanbao_app_secret:
|
||||
if Platform.YUANBAO not in config.platforms:
|
||||
config.platforms[Platform.YUANBAO] = PlatformConfig()
|
||||
config.platforms[Platform.YUANBAO].enabled = True
|
||||
extra = config.platforms[Platform.YUANBAO].extra
|
||||
extra["app_id"] = yuanbao_app_id
|
||||
extra["app_secret"] = yuanbao_app_secret
|
||||
yuanbao_bot_id = os.getenv("YUANBAO_BOT_ID")
|
||||
if yuanbao_bot_id:
|
||||
extra["bot_id"] = yuanbao_bot_id
|
||||
yuanbao_ws_url = os.getenv("YUANBAO_WS_URL")
|
||||
if yuanbao_ws_url:
|
||||
extra["ws_url"] = yuanbao_ws_url
|
||||
yuanbao_api_domain = os.getenv("YUANBAO_API_DOMAIN")
|
||||
if yuanbao_api_domain:
|
||||
extra["api_domain"] = yuanbao_api_domain
|
||||
yuanbao_route_env = os.getenv("YUANBAO_ROUTE_ENV")
|
||||
if yuanbao_route_env:
|
||||
extra["route_env"] = yuanbao_route_env
|
||||
yuanbao_home = os.getenv("YUANBAO_HOME_CHANNEL")
|
||||
if yuanbao_home:
|
||||
config.platforms[Platform.YUANBAO].home_channel = HomeChannel(
|
||||
platform=Platform.YUANBAO,
|
||||
chat_id=yuanbao_home,
|
||||
name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
|
||||
if yuanbao_dm_policy:
|
||||
extra["dm_policy"] = yuanbao_dm_policy.strip().lower()
|
||||
yuanbao_dm_allow_from = os.getenv("YUANBAO_DM_ALLOW_FROM")
|
||||
if yuanbao_dm_allow_from:
|
||||
extra["dm_allow_from"] = yuanbao_dm_allow_from
|
||||
yuanbao_group_policy = os.getenv("YUANBAO_GROUP_POLICY")
|
||||
if yuanbao_group_policy:
|
||||
extra["group_policy"] = yuanbao_group_policy.strip().lower()
|
||||
yuanbao_group_allow_from = os.getenv("YUANBAO_GROUP_ALLOW_FROM")
|
||||
if yuanbao_group_allow_from:
|
||||
extra["group_allow_from"] = yuanbao_group_allow_from
|
||||
|
||||
# Session settings
|
||||
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
|
||||
if idle_minutes:
|
||||
@@ -1482,25 +1285,3 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
config.default_reset_policy.at_hour = int(reset_hour)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Registry-driven enable for plugin platforms. Built-ins have explicit
|
||||
# blocks above; plugins expose check_fn() which is the single source of
|
||||
# truth for "are my env vars set?". When it returns True, ensure the
|
||||
# platform is enabled so start() will create its adapter.
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
discover_plugins() # idempotent
|
||||
from gateway.platform_registry import platform_registry
|
||||
for entry in platform_registry.plugin_entries():
|
||||
try:
|
||||
if not entry.check_fn():
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.debug("check_fn for %s raised: %s", entry.name, e)
|
||||
continue
|
||||
platform = Platform(entry.name)
|
||||
if platform not in config.platforms:
|
||||
config.platforms[platform] = PlatformConfig()
|
||||
config.platforms[platform].enabled = True
|
||||
except Exception as e:
|
||||
logger.debug("Plugin platform enable pass failed: %s", e)
|
||||
|
||||
@@ -79,9 +79,7 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
|
||||
"discord": _TIER_HIGH,
|
||||
|
||||
# Tier 2 — edit support, often customer/workspace channels
|
||||
# Slack: tool_progress off by default — Bolt posts cannot be edited like CLI;
|
||||
# "new"/"all" spam permanent lines in channels (hermes-agent#14663).
|
||||
"slack": {**_TIER_MEDIUM, "tool_progress": "off"},
|
||||
"slack": _TIER_MEDIUM,
|
||||
"mattermost": _TIER_MEDIUM,
|
||||
"matrix": _TIER_MEDIUM,
|
||||
"feishu": _TIER_MEDIUM,
|
||||
|
||||
+15
-22
@@ -21,7 +21,6 @@ Errors in hooks are caught and logged but never block the main pipeline.
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
@@ -53,13 +52,19 @@ class HookRegistry:
|
||||
return list(self._loaded_hooks)
|
||||
|
||||
def _register_builtin_hooks(self) -> None:
|
||||
"""Register built-in hooks that are always active.
|
||||
"""Register built-in hooks that are always active."""
|
||||
try:
|
||||
from gateway.builtin_hooks.boot_md import handle as boot_md_handle
|
||||
|
||||
Currently empty — no shipped built-in hooks. Kept as the extension
|
||||
point for future always-on gateway hooks so they drop in without
|
||||
re-plumbing discover_and_load().
|
||||
"""
|
||||
return
|
||||
self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
|
||||
self._loaded_hooks.append({
|
||||
"name": "boot-md",
|
||||
"description": "Run ~/.hermes/BOOT.md on gateway startup",
|
||||
"events": ["gateway:startup"],
|
||||
"path": "(builtin)",
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
|
||||
|
||||
def discover_and_load(self) -> None:
|
||||
"""
|
||||
@@ -98,28 +103,16 @@ class HookRegistry:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module.
|
||||
# Register in sys.modules BEFORE exec_module so Pydantic /
|
||||
# dataclasses / typing introspection can resolve forward
|
||||
# references (triggered by `from __future__ import annotations`
|
||||
# in the handler). Without this, a handler that declares a
|
||||
# Pydantic BaseModel for webhook/event payloads fails at first
|
||||
# dispatch with "TypeAdapter ... is not fully defined".
|
||||
module_name = f"hermes_hook_{hook_name}"
|
||||
# Dynamically load the handler module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name, handler_path
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
try:
|
||||
spec.loader.exec_module(module)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
|
||||
+11
-57
@@ -28,7 +28,6 @@ def mirror_to_session(
|
||||
message_text: str,
|
||||
source_label: str = "cli",
|
||||
thread_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Append a delivery-mirror message to the target session's transcript.
|
||||
@@ -40,20 +39,9 @@ def mirror_to_session(
|
||||
All errors are caught -- this is never fatal.
|
||||
"""
|
||||
try:
|
||||
session_id = _find_session_id(
|
||||
platform,
|
||||
str(chat_id),
|
||||
thread_id=thread_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
|
||||
if not session_id:
|
||||
logger.debug(
|
||||
"Mirror: no session found for %s:%s:%s:%s",
|
||||
platform,
|
||||
chat_id,
|
||||
thread_id,
|
||||
user_id,
|
||||
)
|
||||
logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
|
||||
return False
|
||||
|
||||
mirror_msg = {
|
||||
@@ -71,33 +59,17 @@ def mirror_to_session(
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Mirror failed for %s:%s:%s:%s: %s",
|
||||
platform,
|
||||
chat_id,
|
||||
thread_id,
|
||||
user_id,
|
||||
e,
|
||||
)
|
||||
logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
|
||||
return False
|
||||
|
||||
|
||||
def _find_session_id(
|
||||
platform: str,
|
||||
chat_id: str,
|
||||
thread_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
|
||||
"""
|
||||
Find the active session_id for a platform + chat_id pair.
|
||||
|
||||
Scans sessions.json entries and matches where origin.chat_id == chat_id
|
||||
on the right platform. DM session keys don't embed the chat_id
|
||||
(e.g. "agent:main:telegram:dm"), so we check the origin dict.
|
||||
|
||||
When *user_id* is provided, prefer exact sender matches. If multiple
|
||||
same-chat candidates exist and none matches the user, return None instead
|
||||
of guessing and contaminating another participant's session.
|
||||
"""
|
||||
if not _SESSIONS_INDEX.exists():
|
||||
return None
|
||||
@@ -109,7 +81,8 @@ def _find_session_id(
|
||||
return None
|
||||
|
||||
platform_lower = platform.lower()
|
||||
candidates = []
|
||||
best_match = None
|
||||
best_updated = ""
|
||||
|
||||
for _key, entry in data.items():
|
||||
origin = entry.get("origin") or {}
|
||||
@@ -123,31 +96,12 @@ def _find_session_id(
|
||||
origin_thread_id = origin.get("thread_id")
|
||||
if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
|
||||
continue
|
||||
candidates.append(entry)
|
||||
updated = entry.get("updated_at", "")
|
||||
if updated > best_updated:
|
||||
best_updated = updated
|
||||
best_match = entry.get("session_id")
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
if user_id:
|
||||
exact_user_matches = [
|
||||
entry for entry in candidates
|
||||
if str((entry.get("origin") or {}).get("user_id") or "") == str(user_id)
|
||||
]
|
||||
if exact_user_matches:
|
||||
candidates = exact_user_matches
|
||||
elif len(candidates) > 1:
|
||||
return None
|
||||
elif len(candidates) > 1:
|
||||
distinct_user_ids = {
|
||||
str((entry.get("origin") or {}).get("user_id") or "").strip()
|
||||
for entry in candidates
|
||||
if str((entry.get("origin") or {}).get("user_id") or "").strip()
|
||||
}
|
||||
if len(distinct_user_ids) > 1:
|
||||
return None
|
||||
|
||||
best_entry = max(candidates, key=lambda entry: entry.get("updated_at", ""))
|
||||
return best_entry.get("session_id")
|
||||
return best_match
|
||||
|
||||
|
||||
def _append_to_jsonl(session_id: str, message: dict) -> None:
|
||||
|
||||
+1
-2
@@ -28,7 +28,6 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_constants import get_hermes_dir
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
# Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
|
||||
@@ -60,7 +59,7 @@ def _secure_write(path: Path, data: str) -> None:
|
||||
f.write(data)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, path)
|
||||
os.replace(tmp_path, str(path))
|
||||
try:
|
||||
os.chmod(path, 0o600)
|
||||
except OSError:
|
||||
|
||||
@@ -1,212 +0,0 @@
|
||||
"""
|
||||
Platform Adapter Registry
|
||||
|
||||
Allows platform adapters (built-in and plugin) to self-register so the gateway
|
||||
can discover and instantiate them without hardcoded if/elif chains.
|
||||
|
||||
Built-in adapters continue to use the existing if/elif in _create_adapter()
|
||||
for now. Plugin adapters register here via PluginContext.register_platform()
|
||||
and are looked up first -- if nothing is found the gateway falls through to
|
||||
the legacy code path.
|
||||
|
||||
Usage (plugin side):
|
||||
|
||||
from gateway.platform_registry import platform_registry, PlatformEntry
|
||||
|
||||
platform_registry.register(PlatformEntry(
|
||||
name="irc",
|
||||
label="IRC",
|
||||
adapter_factory=lambda cfg: IRCAdapter(cfg),
|
||||
check_fn=check_requirements,
|
||||
validate_config=lambda cfg: bool(cfg.extra.get("server")),
|
||||
required_env=["IRC_SERVER"],
|
||||
install_hint="pip install irc",
|
||||
))
|
||||
|
||||
Usage (gateway side):
|
||||
|
||||
adapter = platform_registry.create_adapter("irc", platform_config)
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlatformEntry:
|
||||
"""Metadata and factory for a single platform adapter."""
|
||||
|
||||
# Identifier used in config.yaml (e.g. "irc", "viber").
|
||||
name: str
|
||||
|
||||
# Human-readable label (e.g. "IRC", "Viber").
|
||||
label: str
|
||||
|
||||
# Factory callable: receives a PlatformConfig, returns an adapter instance.
|
||||
# Using a factory instead of a bare class lets plugins do custom init
|
||||
# (e.g. passing extra kwargs, wrapping in try/except).
|
||||
adapter_factory: Callable[[Any], Any]
|
||||
|
||||
# Returns True when the platform's dependencies are available.
|
||||
check_fn: Callable[[], bool]
|
||||
|
||||
# Optional: given a PlatformConfig, is it properly configured?
|
||||
# If None, the registry skips config validation and lets the adapter
|
||||
# fail at connect() time with a descriptive error.
|
||||
validate_config: Optional[Callable[[Any], bool]] = None
|
||||
|
||||
# Optional: given a PlatformConfig, is the platform connected/enabled?
|
||||
# Used by ``GatewayConfig.get_connected_platforms()`` and setup UI status.
|
||||
# If None, falls back to ``validate_config`` or ``check_fn``.
|
||||
is_connected: Optional[Callable[[Any], bool]] = None
|
||||
|
||||
# Env vars this platform needs (for ``hermes setup`` display).
|
||||
required_env: list = field(default_factory=list)
|
||||
|
||||
# Hint shown when check_fn returns False.
|
||||
install_hint: str = ""
|
||||
|
||||
# Optional setup function for interactive configuration.
|
||||
# Signature: () -> None (prompts user, saves env vars).
|
||||
# If None, falls back to _setup_standard_platform (needs token_var + vars)
|
||||
# or a generic "set these env vars" display.
|
||||
setup_fn: Optional[Callable[[], None]] = None
|
||||
|
||||
# "builtin" or "plugin"
|
||||
source: str = "plugin"
|
||||
|
||||
# Name of the plugin manifest that registered this entry (empty for
|
||||
# built-ins). Used by ``hermes gateway setup`` to auto-enable the
|
||||
# owning plugin when the user configures its platform.
|
||||
plugin_name: str = ""
|
||||
|
||||
# ── Auth env var names (for _is_user_authorized integration) ──
|
||||
# E.g. "IRC_ALLOWED_USERS" — checked for comma-separated user IDs.
|
||||
allowed_users_env: str = ""
|
||||
# E.g. "IRC_ALLOW_ALL_USERS" — if truthy, all users authorized.
|
||||
allow_all_env: str = ""
|
||||
|
||||
# ── Message limits ──
|
||||
# Max message length for smart-chunking. 0 = no limit.
|
||||
max_message_length: int = 0
|
||||
|
||||
# ── Privacy ──
|
||||
# If True, session descriptions redact PII (phone numbers, etc.)
|
||||
pii_safe: bool = False
|
||||
|
||||
# ── Display ──
|
||||
# Emoji for CLI/gateway display (e.g. "💬")
|
||||
emoji: str = "🔌"
|
||||
|
||||
# Whether this platform should appear in _UPDATE_ALLOWED_PLATFORMS
|
||||
# (allows /update command from this platform).
|
||||
allow_update_command: bool = True
|
||||
|
||||
# ── LLM guidance ──
|
||||
# Platform hint injected into the system prompt (e.g. "You are on IRC.
|
||||
# Do not use markdown."). Empty string = no hint.
|
||||
platform_hint: str = ""
|
||||
|
||||
|
||||
class PlatformRegistry:
|
||||
"""Central registry of platform adapters.
|
||||
|
||||
Thread-safe for reads (dict lookups are atomic under GIL).
|
||||
Writes happen at startup during sequential discovery.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._entries: dict[str, PlatformEntry] = {}
|
||||
|
||||
def register(self, entry: PlatformEntry) -> None:
|
||||
"""Register a platform adapter entry.
|
||||
|
||||
If an entry with the same name exists, it is replaced (last writer
|
||||
wins -- this lets plugins override built-in adapters if desired).
|
||||
"""
|
||||
if entry.name in self._entries:
|
||||
prev = self._entries[entry.name]
|
||||
logger.info(
|
||||
"Platform '%s' re-registered (was %s, now %s)",
|
||||
entry.name,
|
||||
prev.source,
|
||||
entry.source,
|
||||
)
|
||||
self._entries[entry.name] = entry
|
||||
logger.debug("Registered platform adapter: %s (%s)", entry.name, entry.source)
|
||||
|
||||
def unregister(self, name: str) -> bool:
|
||||
"""Remove a platform entry. Returns True if it existed."""
|
||||
return self._entries.pop(name, None) is not None
|
||||
|
||||
def get(self, name: str) -> Optional[PlatformEntry]:
|
||||
"""Look up a platform entry by name."""
|
||||
return self._entries.get(name)
|
||||
|
||||
def all_entries(self) -> list[PlatformEntry]:
|
||||
"""Return all registered platform entries."""
|
||||
return list(self._entries.values())
|
||||
|
||||
def plugin_entries(self) -> list[PlatformEntry]:
|
||||
"""Return only plugin-registered platform entries."""
|
||||
return [e for e in self._entries.values() if e.source == "plugin"]
|
||||
|
||||
def is_registered(self, name: str) -> bool:
|
||||
return name in self._entries
|
||||
|
||||
def create_adapter(self, name: str, config: Any) -> Optional[Any]:
|
||||
"""Create an adapter instance for the given platform name.
|
||||
|
||||
Returns None if:
|
||||
- No entry registered for *name*
|
||||
- check_fn() returns False (missing deps)
|
||||
- validate_config() returns False (misconfigured)
|
||||
- The factory raises an exception
|
||||
"""
|
||||
entry = self._entries.get(name)
|
||||
if entry is None:
|
||||
return None
|
||||
|
||||
if not entry.check_fn():
|
||||
hint = f" ({entry.install_hint})" if entry.install_hint else ""
|
||||
logger.warning(
|
||||
"Platform '%s' requirements not met%s",
|
||||
entry.label,
|
||||
hint,
|
||||
)
|
||||
return None
|
||||
|
||||
if entry.validate_config is not None:
|
||||
try:
|
||||
if not entry.validate_config(config):
|
||||
logger.warning(
|
||||
"Platform '%s' config validation failed",
|
||||
entry.label,
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Platform '%s' config validation error: %s",
|
||||
entry.label,
|
||||
e,
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
adapter = entry.adapter_factory(config)
|
||||
return adapter
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to create adapter for platform '%s': %s",
|
||||
entry.label,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# Module-level singleton
|
||||
platform_registry = PlatformRegistry()
|
||||
@@ -1,30 +1,9 @@
|
||||
# Adding a New Messaging Platform
|
||||
|
||||
There are two ways to add a platform to the Hermes gateway:
|
||||
|
||||
## Plugin Path (Recommended for Community/Third-Party)
|
||||
|
||||
Create a plugin directory in `~/.hermes/plugins/` with a `PLUGIN.yaml` and
|
||||
`adapter.py`. The adapter inherits from `BasePlatformAdapter` and registers
|
||||
via `ctx.register_platform()` in the `register(ctx)` entry point. This
|
||||
requires **zero changes to core Hermes code**.
|
||||
|
||||
The plugin system automatically handles: adapter creation, config parsing,
|
||||
user authorization, cron delivery, send_message routing, system prompt hints,
|
||||
status display, gateway setup, and more.
|
||||
|
||||
See `plugins/platforms/irc/` for a complete reference implementation, and
|
||||
`website/docs/developer-guide/adding-platform-adapters.md` for the full
|
||||
plugin guide with code examples.
|
||||
|
||||
---
|
||||
|
||||
## Built-in Path (Core Contributors Only)
|
||||
|
||||
Checklist for integrating a platform directly into the Hermes core.
|
||||
Use this as a reference when building a built-in adapter — every item here
|
||||
is a real integration point. Missing any of them will cause broken
|
||||
functionality, missing features, or inconsistent behavior.
|
||||
Checklist for integrating a new messaging platform into the Hermes gateway.
|
||||
Use this as a reference when building a new adapter — every item here is a
|
||||
real integration point that exists in the codebase. Missing any of them will
|
||||
cause broken functionality, missing features, or inconsistent behavior.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -10,12 +10,10 @@ Each adapter handles:
|
||||
|
||||
from .base import BasePlatformAdapter, MessageEvent, SendResult
|
||||
from .qqbot import QQAdapter
|
||||
from .yuanbao import YuanbaoAdapter
|
||||
|
||||
__all__ = [
|
||||
"BasePlatformAdapter",
|
||||
"MessageEvent",
|
||||
"SendResult",
|
||||
"QQAdapter",
|
||||
"YuanbaoAdapter",
|
||||
]
|
||||
|
||||
+69
-357
@@ -7,11 +7,8 @@ Exposes an HTTP server with endpoints:
|
||||
- GET /v1/responses/{response_id} — Retrieve a stored response
|
||||
- DELETE /v1/responses/{response_id} — Delete a stored response
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- GET /v1/capabilities — machine-readable API capabilities for external UIs
|
||||
- POST /v1/runs — start a run, returns run_id immediately (202)
|
||||
- GET /v1/runs/{run_id} — retrieve current run status
|
||||
- GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- GET /health — health check
|
||||
- GET /health/detailed — rich status for cross-container dashboard probing
|
||||
|
||||
@@ -589,11 +586,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
|
||||
# Creation timestamps for orphaned-run TTL sweep
|
||||
self._run_streams_created: Dict[str, float] = {}
|
||||
# Active run agent/task references for stop support
|
||||
self._active_run_agents: Dict[str, Any] = {}
|
||||
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
|
||||
# Pollable run status for dashboards and external control-plane UIs.
|
||||
self._run_statuses: Dict[str, Dict[str, Any]] = {}
|
||||
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
|
||||
|
||||
@staticmethod
|
||||
@@ -812,51 +804,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
],
|
||||
})
|
||||
|
||||
async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/capabilities — advertise the stable API surface.
|
||||
|
||||
External UIs and orchestrators use this endpoint to discover the API
|
||||
server's plugin-safe contract without scraping docs or assuming that
|
||||
every Hermes version exposes the same endpoints.
|
||||
"""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
return web.json_response({
|
||||
"object": "hermes.api_server.capabilities",
|
||||
"platform": "hermes-agent",
|
||||
"model": self._model_name,
|
||||
"auth": {
|
||||
"type": "bearer",
|
||||
"required": bool(self._api_key),
|
||||
},
|
||||
"features": {
|
||||
"chat_completions": True,
|
||||
"chat_completions_streaming": True,
|
||||
"responses_api": True,
|
||||
"responses_streaming": True,
|
||||
"run_submission": True,
|
||||
"run_status": True,
|
||||
"run_events_sse": True,
|
||||
"run_stop": True,
|
||||
"tool_progress_events": True,
|
||||
"session_continuity_header": "X-Hermes-Session-Id",
|
||||
"cors": bool(self._cors_origins),
|
||||
},
|
||||
"endpoints": {
|
||||
"health": {"method": "GET", "path": "/health"},
|
||||
"health_detailed": {"method": "GET", "path": "/health/detailed"},
|
||||
"models": {"method": "GET", "path": "/v1/models"},
|
||||
"chat_completions": {"method": "POST", "path": "/v1/chat/completions"},
|
||||
"responses": {"method": "POST", "path": "/v1/responses"},
|
||||
"runs": {"method": "POST", "path": "/v1/runs"},
|
||||
"run_status": {"method": "GET", "path": "/v1/runs/{run_id}"},
|
||||
"run_events": {"method": "GET", "path": "/v1/runs/{run_id}/events"},
|
||||
"run_stop": {"method": "POST", "path": "/v1/runs/{run_id}/stop"},
|
||||
},
|
||||
})
|
||||
|
||||
async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/chat/completions — OpenAI Chat Completions format."""
|
||||
auth_err = self._check_auth(request)
|
||||
@@ -981,62 +928,39 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if delta is not None:
|
||||
_stream_q.put(delta)
|
||||
|
||||
# Track which tool_call_ids we've emitted a "running" lifecycle
|
||||
# event for, so a "completed" event without a matching "running"
|
||||
# (e.g. internal/filtered tools) is silently dropped instead of
|
||||
# producing an orphaned event clients can't correlate.
|
||||
_started_tool_call_ids: set[str] = set()
|
||||
def _on_tool_progress(event_type, name, preview, args, **kwargs):
|
||||
"""Send tool progress as a separate SSE event.
|
||||
|
||||
def _on_tool_start(tool_call_id, function_name, function_args):
|
||||
"""Emit ``hermes.tool.progress`` with ``status: running``.
|
||||
Previously, progress markers like ``⏰ list`` were injected
|
||||
directly into ``delta.content``. OpenAI-compatible frontends
|
||||
(Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
|
||||
the assistant message and send it back on subsequent requests.
|
||||
After enough turns the model learns to *emit* the markers as
|
||||
plain text instead of issuing real tool calls — silently
|
||||
hallucinating tool results. See #6972.
|
||||
|
||||
Replaces the old ``tool_progress_callback("tool.started",
|
||||
...)`` emit so SSE consumers receive a single event per
|
||||
tool start, carrying both the legacy ``tool``/``emoji``/
|
||||
``label`` payload (for #6972 frontends) and the new
|
||||
``toolCallId``/``status`` correlation fields (#16588).
|
||||
|
||||
Skips tools whose names start with ``_`` so internal
|
||||
events (``_thinking``, …) stay off the wire — matching
|
||||
the prior ``_on_tool_progress`` filter exactly.
|
||||
The fix: push a tagged tuple ``("__tool_progress__", payload)``
|
||||
onto the stream queue. The SSE writer emits it as a custom
|
||||
``event: hermes.tool.progress`` line that compliant frontends
|
||||
can render for UX but will *not* persist into conversation
|
||||
history. Clients that don't understand the custom event type
|
||||
silently ignore it per the SSE specification.
|
||||
"""
|
||||
if not tool_call_id or function_name.startswith("_"):
|
||||
if event_type != "tool.started":
|
||||
return
|
||||
_started_tool_call_ids.add(tool_call_id)
|
||||
from agent.display import build_tool_preview, get_tool_emoji
|
||||
label = build_tool_preview(function_name, function_args) or function_name
|
||||
if name.startswith("_"):
|
||||
return
|
||||
from agent.display import get_tool_emoji
|
||||
emoji = get_tool_emoji(name)
|
||||
label = preview or name
|
||||
_stream_q.put(("__tool_progress__", {
|
||||
"tool": function_name,
|
||||
"emoji": get_tool_emoji(function_name),
|
||||
"tool": name,
|
||||
"emoji": emoji,
|
||||
"label": label,
|
||||
"toolCallId": tool_call_id,
|
||||
"status": "running",
|
||||
}))
|
||||
|
||||
def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
|
||||
"""Emit the matching ``status: completed`` event.
|
||||
|
||||
Dropped if the start was filtered (internal tool, missing
|
||||
id, or never seen) so clients never get an orphaned
|
||||
``completed`` they can't correlate to a prior ``running``.
|
||||
"""
|
||||
if not tool_call_id or tool_call_id not in _started_tool_call_ids:
|
||||
return
|
||||
_started_tool_call_ids.discard(tool_call_id)
|
||||
_stream_q.put(("__tool_progress__", {
|
||||
"tool": function_name,
|
||||
"toolCallId": tool_call_id,
|
||||
"status": "completed",
|
||||
}))
|
||||
|
||||
# Start agent in background. agent_ref is a mutable container
|
||||
# so the SSE writer can interrupt the agent on client disconnect.
|
||||
#
|
||||
# ``tool_progress_callback`` is intentionally not wired here:
|
||||
# it would duplicate every emit because ``run_agent`` fires it
|
||||
# side-by-side with ``tool_start_callback``/``tool_complete_callback``.
|
||||
# The structured callbacks are strictly richer (they carry the
|
||||
# tool_call id), so they own the chat-completions SSE channel.
|
||||
agent_ref = [None]
|
||||
agent_task = asyncio.ensure_future(self._run_agent(
|
||||
user_message=user_message,
|
||||
@@ -1044,8 +968,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
ephemeral_system_prompt=system_prompt,
|
||||
session_id=session_id,
|
||||
stream_delta_callback=_on_delta,
|
||||
tool_start_callback=_on_tool_start,
|
||||
tool_complete_callback=_on_tool_complete,
|
||||
tool_progress_callback=_on_tool_progress,
|
||||
agent_ref=agent_ref,
|
||||
))
|
||||
|
||||
@@ -1160,8 +1083,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
Tagged tuples ``("__tool_progress__", payload)`` are sent
|
||||
as a custom ``event: hermes.tool.progress`` SSE event so
|
||||
frontends can display them without storing the markers in
|
||||
conversation history. See #6972 for the original event,
|
||||
#16588 for the ``toolCallId``/``status`` lifecycle fields.
|
||||
conversation history. See #6972.
|
||||
"""
|
||||
if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
|
||||
event_data = json.dumps(item[1])
|
||||
@@ -1282,12 +1204,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
If the client disconnects mid-stream, ``agent.interrupt()`` is
|
||||
called so the agent stops issuing upstream LLM calls, then the
|
||||
asyncio task is cancelled. When ``store=True`` an initial
|
||||
``in_progress`` snapshot is persisted immediately after
|
||||
``response.created`` and disconnects update it to an
|
||||
``incomplete`` snapshot so GET /v1/responses/{id} and
|
||||
``previous_response_id`` chaining still have something to
|
||||
recover from.
|
||||
asyncio task is cancelled. When ``store=True`` the full response
|
||||
is persisted to the ResponseStore in a ``finally`` block so GET
|
||||
/v1/responses/{id} and ``previous_response_id`` chaining work the
|
||||
same as the batch path.
|
||||
"""
|
||||
import queue as _q
|
||||
|
||||
@@ -1349,60 +1269,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
final_response_text = ""
|
||||
agent_error: Optional[str] = None
|
||||
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
terminal_snapshot_persisted = False
|
||||
|
||||
def _persist_response_snapshot(
|
||||
response_env: Dict[str, Any],
|
||||
*,
|
||||
conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
if not store:
|
||||
return
|
||||
if conversation_history_snapshot is None:
|
||||
conversation_history_snapshot = list(conversation_history)
|
||||
conversation_history_snapshot.append({"role": "user", "content": user_message})
|
||||
self._response_store.put(response_id, {
|
||||
"response": response_env,
|
||||
"conversation_history": conversation_history_snapshot,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
def _persist_incomplete_if_needed() -> None:
|
||||
"""Persist an ``incomplete`` snapshot if no terminal one was written.
|
||||
|
||||
Called from both the client-disconnect (``ConnectionResetError``)
|
||||
and server-cancellation (``asyncio.CancelledError``) paths so
|
||||
GET /v1/responses/{id} and ``previous_response_id`` chaining keep
|
||||
working after abrupt stream termination.
|
||||
"""
|
||||
if not store or terminal_snapshot_persisted:
|
||||
return
|
||||
incomplete_text = "".join(final_text_parts) or final_response_text
|
||||
incomplete_items: List[Dict[str, Any]] = list(emitted_items)
|
||||
if incomplete_text:
|
||||
incomplete_items.append({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": incomplete_text}],
|
||||
})
|
||||
incomplete_env = _envelope("incomplete")
|
||||
incomplete_env["output"] = incomplete_items
|
||||
incomplete_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
incomplete_history = list(conversation_history)
|
||||
incomplete_history.append({"role": "user", "content": user_message})
|
||||
if incomplete_text:
|
||||
incomplete_history.append({"role": "assistant", "content": incomplete_text})
|
||||
_persist_response_snapshot(
|
||||
incomplete_env,
|
||||
conversation_history_snapshot=incomplete_history,
|
||||
)
|
||||
|
||||
try:
|
||||
# response.created — initial envelope, status=in_progress
|
||||
@@ -1412,7 +1278,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"type": "response.created",
|
||||
"response": created_env,
|
||||
})
|
||||
_persist_response_snapshot(created_env)
|
||||
last_activity = time.monotonic()
|
||||
|
||||
async def _open_message_item() -> None:
|
||||
@@ -1669,18 +1534,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
_failed_history = list(conversation_history)
|
||||
_failed_history.append({"role": "user", "content": user_message})
|
||||
if final_response_text or agent_error:
|
||||
_failed_history.append({
|
||||
"role": "assistant",
|
||||
"content": final_response_text or agent_error,
|
||||
})
|
||||
_persist_response_snapshot(
|
||||
failed_env,
|
||||
conversation_history_snapshot=_failed_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.failed", {
|
||||
"type": "response.failed",
|
||||
"response": failed_env,
|
||||
@@ -1693,24 +1546,30 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
_persist_response_snapshot(
|
||||
completed_env,
|
||||
conversation_history_snapshot=full_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.completed", {
|
||||
"type": "response.completed",
|
||||
"response": completed_env,
|
||||
})
|
||||
|
||||
# Persist for future chaining / GET retrieval, mirroring
|
||||
# the batch path behavior.
|
||||
if store:
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
self._response_store.put(response_id, {
|
||||
"response": completed_env,
|
||||
"conversation_history": full_history,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
|
||||
_persist_incomplete_if_needed()
|
||||
# Client disconnected — interrupt the agent so it stops
|
||||
# making upstream LLM calls, then cancel the task.
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
@@ -1726,22 +1585,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
|
||||
except asyncio.CancelledError:
|
||||
# Server-side cancellation (e.g. shutdown, request timeout) —
|
||||
# persist an incomplete snapshot so GET /v1/responses/{id} and
|
||||
# previous_response_id chaining still work, then re-raise so the
|
||||
# runtime's cancellation semantics are respected.
|
||||
_persist_incomplete_if_needed()
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("SSE task cancelled")
|
||||
except Exception:
|
||||
pass
|
||||
if not agent_task.done():
|
||||
agent_task.cancel()
|
||||
logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
|
||||
raise
|
||||
|
||||
return response
|
||||
|
||||
@@ -2371,31 +2214,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
_MAX_CONCURRENT_RUNS = 10 # Prevent unbounded resource allocation
|
||||
_RUN_STREAM_TTL = 300 # seconds before orphaned runs are swept
|
||||
_RUN_STATUS_TTL = 3600 # seconds to retain terminal run status for polling
|
||||
|
||||
def _set_run_status(self, run_id: str, status: str, **fields: Any) -> Dict[str, Any]:
|
||||
"""Update pollable run status without exposing private agent objects."""
|
||||
now = time.time()
|
||||
current = self._run_statuses.get(run_id, {})
|
||||
current.update({
|
||||
"object": "hermes.run",
|
||||
"run_id": run_id,
|
||||
"status": status,
|
||||
"updated_at": now,
|
||||
})
|
||||
current.setdefault("created_at", fields.pop("created_at", now))
|
||||
current.update(fields)
|
||||
self._run_statuses[run_id] = current
|
||||
return current
|
||||
|
||||
def _make_run_event_callback(self, run_id: str, loop: "asyncio.AbstractEventLoop"):
|
||||
"""Return a tool_progress_callback that pushes structured events to the run's SSE queue."""
|
||||
def _push(event: Dict[str, Any]) -> None:
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
self._run_statuses.get(run_id, {}).get("status", "running"),
|
||||
last_event=event.get("event"),
|
||||
)
|
||||
q = self._run_streams.get(run_id)
|
||||
if q is None:
|
||||
return
|
||||
@@ -2460,6 +2282,28 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if not user_message:
|
||||
return web.json_response(_openai_error("No user message found in input"), status=400)
|
||||
|
||||
run_id = f"run_{uuid.uuid4().hex}"
|
||||
loop = asyncio.get_running_loop()
|
||||
q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
|
||||
self._run_streams[run_id] = q
|
||||
self._run_streams_created[run_id] = time.time()
|
||||
|
||||
event_cb = self._make_run_event_callback(run_id, loop)
|
||||
|
||||
# Also wire stream_delta_callback so message.delta events flow through
|
||||
def _text_cb(delta: Optional[str]) -> None:
|
||||
if delta is None:
|
||||
return
|
||||
try:
|
||||
loop.call_soon_threadsafe(q.put_nowait, {
|
||||
"event": "message.delta",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
"delta": delta,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
instructions = body.get("instructions")
|
||||
previous_response_id = body.get("previous_response_id")
|
||||
|
||||
@@ -2507,49 +2351,17 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
)
|
||||
conversation_history.append({"role": msg["role"], "content": str(content)})
|
||||
|
||||
run_id = f"run_{uuid.uuid4().hex}"
|
||||
session_id = body.get("session_id") or stored_session_id or run_id
|
||||
ephemeral_system_prompt = instructions
|
||||
loop = asyncio.get_running_loop()
|
||||
q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
|
||||
created_at = time.time()
|
||||
self._run_streams[run_id] = q
|
||||
self._run_streams_created[run_id] = created_at
|
||||
|
||||
event_cb = self._make_run_event_callback(run_id, loop)
|
||||
|
||||
# Also wire stream_delta_callback so message.delta events flow through.
|
||||
def _text_cb(delta: Optional[str]) -> None:
|
||||
if delta is None:
|
||||
return
|
||||
try:
|
||||
loop.call_soon_threadsafe(q.put_nowait, {
|
||||
"event": "message.delta",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
"delta": delta,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"queued",
|
||||
created_at=created_at,
|
||||
session_id=session_id,
|
||||
model=body.get("model", self._model_name),
|
||||
)
|
||||
|
||||
async def _run_and_close():
|
||||
try:
|
||||
self._set_run_status(run_id, "running")
|
||||
agent = self._create_agent(
|
||||
ephemeral_system_prompt=ephemeral_system_prompt,
|
||||
session_id=session_id,
|
||||
stream_delta_callback=_text_cb,
|
||||
tool_progress_callback=event_cb,
|
||||
)
|
||||
self._active_run_agents[run_id] = agent
|
||||
def _run_sync():
|
||||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
@@ -2572,36 +2384,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output": final_response,
|
||||
"usage": usage,
|
||||
})
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"completed",
|
||||
output=final_response,
|
||||
usage=usage,
|
||||
last_event="run.completed",
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"cancelled",
|
||||
last_event="run.cancelled",
|
||||
)
|
||||
try:
|
||||
q.put_nowait({
|
||||
"event": "run.cancelled",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.exception("[api_server] run %s failed", run_id)
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"failed",
|
||||
error=str(exc),
|
||||
last_event="run.failed",
|
||||
)
|
||||
try:
|
||||
q.put_nowait({
|
||||
"event": "run.failed",
|
||||
@@ -2617,11 +2401,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
q.put_nowait(None)
|
||||
except Exception:
|
||||
pass
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
task = asyncio.create_task(_run_and_close())
|
||||
self._active_run_tasks[run_id] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
@@ -2631,21 +2412,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "started"}, status=202)
|
||||
|
||||
async def _handle_get_run(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/runs/{run_id} — return pollable run status for external UIs."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
run_id = request.match_info["run_id"]
|
||||
status = self._run_statuses.get(run_id)
|
||||
if status is None:
|
||||
return web.json_response(
|
||||
_openai_error(f"Run not found: {run_id}", code="run_not_found"),
|
||||
status=404,
|
||||
)
|
||||
return web.json_response(status)
|
||||
|
||||
async def _handle_run_events(self, request: "web.Request") -> "web.StreamResponse":
|
||||
"""GET /v1/runs/{run_id}/events — SSE stream of structured agent lifecycle events."""
|
||||
auth_err = self._check_auth(request)
|
||||
@@ -2695,46 +2461,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
run_id = request.match_info["run_id"]
|
||||
agent = self._active_run_agents.get(run_id)
|
||||
task = self._active_run_tasks.get(run_id)
|
||||
|
||||
if agent is None and task is None:
|
||||
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
|
||||
|
||||
self._set_run_status(run_id, "stopping", last_event="run.stopping")
|
||||
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("Stop requested via API")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if task is not None and not task.done():
|
||||
task.cancel()
|
||||
# Bounded wait: run_conversation() executes in the default
|
||||
# executor thread which task.cancel() cannot preempt — we rely on
|
||||
# agent.interrupt() above to break the loop. Cap the wait so a
|
||||
# slow/unresponsive interrupt can't hang this handler.
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[api_server] stop for run %s timed out after 5s; "
|
||||
"agent may still be finishing the current step",
|
||||
run_id,
|
||||
)
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "stopping"})
|
||||
|
||||
async def _sweep_orphaned_runs(self) -> None:
|
||||
"""Periodically clean up run streams that were never consumed."""
|
||||
while True:
|
||||
@@ -2749,17 +2475,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
logger.debug("[api_server] sweeping orphaned run %s", run_id)
|
||||
self._run_streams.pop(run_id, None)
|
||||
self._run_streams_created.pop(run_id, None)
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
stale_statuses = [
|
||||
run_id
|
||||
for run_id, status in list(self._run_statuses.items())
|
||||
if status.get("status") in {"completed", "failed", "cancelled"}
|
||||
and now - float(status.get("updated_at", 0) or 0) > self._RUN_STATUS_TTL
|
||||
]
|
||||
for run_id in stale_statuses:
|
||||
self._run_statuses.pop(run_id, None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BasePlatformAdapter interface
|
||||
@@ -2779,7 +2494,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._app.router.add_get("/health/detailed", self._handle_health_detailed)
|
||||
self._app.router.add_get("/v1/health", self._handle_health)
|
||||
self._app.router.add_get("/v1/models", self._handle_models)
|
||||
self._app.router.add_get("/v1/capabilities", self._handle_capabilities)
|
||||
self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
|
||||
self._app.router.add_post("/v1/responses", self._handle_responses)
|
||||
self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response)
|
||||
@@ -2795,9 +2509,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._app.router.add_post("/api/jobs/{job_id}/run", self._handle_run_job)
|
||||
# Structured event streaming
|
||||
self._app.router.add_post("/v1/runs", self._handle_runs)
|
||||
self._app.router.add_get("/v1/runs/{run_id}", self._handle_get_run)
|
||||
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
|
||||
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
|
||||
# Start background sweep to clean up orphaned (unconsumed) run streams
|
||||
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
|
||||
try:
|
||||
|
||||
+118
-910
File diff suppressed because it is too large
Load Diff
@@ -99,7 +99,6 @@ def _normalize_server_url(raw: str) -> str:
|
||||
|
||||
class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
platform = Platform.BLUEBUBBLES
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
@@ -392,13 +391,6 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
# Text sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
|
||||
# Use the base splitter but skip pagination indicators — iMessage
|
||||
# bubbles flow naturally without "(1/3)" suffixes.
|
||||
chunks = BasePlatformAdapter.truncate_message(content, max_length)
|
||||
return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -406,19 +398,10 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
text = self.format_message(content)
|
||||
text = strip_markdown(content or "")
|
||||
if not text:
|
||||
return SendResult(success=False, error="BlueBubbles send requires text")
|
||||
# Split on paragraph breaks first (double newlines) so each thought
|
||||
# becomes its own iMessage bubble, then truncate any that are still
|
||||
# too long.
|
||||
paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
|
||||
chunks: List[str] = []
|
||||
for para in (paragraphs or [text]):
|
||||
if len(para) <= self.MAX_MESSAGE_LENGTH:
|
||||
chunks.append(para)
|
||||
else:
|
||||
chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
|
||||
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
|
||||
last = SendResult(success=True)
|
||||
for chunk in chunks:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
|
||||
+37
-513
@@ -18,12 +18,11 @@ import tempfile
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import Callable, Dict, List, Optional, Any, Tuple
|
||||
from typing import Callable, Dict, Optional, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
|
||||
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
|
||||
|
||||
try:
|
||||
import discord
|
||||
@@ -305,7 +304,7 @@ class VoiceReceiver:
|
||||
encrypted = bytes(payload_with_nonce[:-4])
|
||||
|
||||
try:
|
||||
import nacl.secret # noqa: E402 — delayed import, only in voice path
|
||||
import nacl.secret # noqa: delayed import – only in voice path
|
||||
box = nacl.secret.Aead(self._secret_key)
|
||||
decrypted = box.decrypt(encrypted, header, bytes(nonce))
|
||||
except Exception as e:
|
||||
@@ -528,7 +527,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# Reply threading mode: "off" (no replies), "first" (reply on first
|
||||
# chunk only, default), "all" (reply-reference on every chunk).
|
||||
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
|
||||
self._slash_commands: bool = self.config.extra.get("slash_commands", True)
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
@@ -746,8 +744,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
)
|
||||
|
||||
# Register slash commands
|
||||
if self._slash_commands:
|
||||
self._register_slash_commands()
|
||||
self._register_slash_commands()
|
||||
|
||||
# Start the bot in background
|
||||
self._bot_task = asyncio.create_task(self._client.start(self.config.token))
|
||||
@@ -803,222 +800,15 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if not self._client:
|
||||
return
|
||||
try:
|
||||
sync_policy = self._get_discord_command_sync_policy()
|
||||
if sync_policy == "off":
|
||||
logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
|
||||
return
|
||||
|
||||
if sync_policy == "bulk":
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
|
||||
return
|
||||
|
||||
# Discord's per-app command-management bucket is ~5 writes / 20 s,
|
||||
# so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
|
||||
# desired = 107 writes) takes several minutes of forced waits.
|
||||
# A flat 30 s budget blew up reliably under bucket pressure and
|
||||
# left slash commands broken for ~60 min until the bucket fully
|
||||
# recovered. Use a wide ceiling; the cap still guards against a
|
||||
# true hang. (#16713)
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
|
||||
logger.info(
|
||||
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
|
||||
self.name,
|
||||
summary["total"],
|
||||
summary["unchanged"],
|
||||
summary["updated"],
|
||||
summary["recreated"],
|
||||
summary["created"],
|
||||
summary["deleted"],
|
||||
)
|
||||
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
|
||||
logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[%s] Slash command sync timed out — Discord rate-limit bucket "
|
||||
"may be saturated; will retry on next reconnect",
|
||||
self.name,
|
||||
)
|
||||
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)
|
||||
|
||||
def _get_discord_command_sync_policy(self) -> str:
|
||||
raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
|
||||
if raw in _DISCORD_COMMAND_SYNC_POLICIES:
|
||||
return raw
|
||||
if raw:
|
||||
logger.warning(
|
||||
"[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
|
||||
self.name,
|
||||
raw,
|
||||
)
|
||||
return "safe"
|
||||
|
||||
def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Reduce command payloads to the semantic fields Hermes manages."""
|
||||
contexts = payload.get("contexts")
|
||||
integration_types = payload.get("integration_types")
|
||||
return {
|
||||
"type": int(payload.get("type", 1) or 1),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"default_member_permissions": self._normalize_permissions(
|
||||
payload.get("default_member_permissions")
|
||||
),
|
||||
"dm_permission": bool(payload.get("dm_permission", True)),
|
||||
"nsfw": bool(payload.get("nsfw", False)),
|
||||
"contexts": sorted(int(c) for c in contexts) if contexts else None,
|
||||
"integration_types": (
|
||||
sorted(int(i) for i in integration_types) if integration_types else None
|
||||
),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _normalize_permissions(value: Any) -> Optional[str]:
|
||||
"""Discord emits default_member_permissions as str server-side but discord.py
|
||||
sets it as int locally. Normalize to str-or-None so the comparison is stable."""
|
||||
if value is None:
|
||||
return None
|
||||
return str(value)
|
||||
|
||||
def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
|
||||
"""Build a canonical-ready dict from an AppCommand.
|
||||
|
||||
discord.py's AppCommand.to_dict() does NOT include nsfw,
|
||||
dm_permission, or default_member_permissions (they live only on the
|
||||
attributes). Pull them from the attributes so the canonicalizer sees
|
||||
the real server-side values instead of defaults — otherwise any
|
||||
command using non-default permissions would diff on every startup.
|
||||
"""
|
||||
payload = dict(command.to_dict())
|
||||
nsfw = getattr(command, "nsfw", None)
|
||||
if nsfw is not None:
|
||||
payload["nsfw"] = bool(nsfw)
|
||||
guild_only = getattr(command, "guild_only", None)
|
||||
if guild_only is not None:
|
||||
payload["dm_permission"] = not bool(guild_only)
|
||||
default_permissions = getattr(command, "default_member_permissions", None)
|
||||
if default_permissions is not None:
|
||||
payload["default_member_permissions"] = getattr(
|
||||
default_permissions, "value", default_permissions
|
||||
)
|
||||
return payload
|
||||
|
||||
def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
"type": int(payload.get("type", 0) or 0),
|
||||
"name": str(payload.get("name", "") or ""),
|
||||
"description": str(payload.get("description", "") or ""),
|
||||
"required": bool(payload.get("required", False)),
|
||||
"autocomplete": bool(payload.get("autocomplete", False)),
|
||||
"choices": [
|
||||
{
|
||||
"name": str(choice.get("name", "") or ""),
|
||||
"value": choice.get("value"),
|
||||
}
|
||||
for choice in payload.get("choices", []) or []
|
||||
if isinstance(choice, dict)
|
||||
],
|
||||
"channel_types": list(payload.get("channel_types", []) or []),
|
||||
"min_value": payload.get("min_value"),
|
||||
"max_value": payload.get("max_value"),
|
||||
"min_length": payload.get("min_length"),
|
||||
"max_length": payload.get("max_length"),
|
||||
"options": [
|
||||
self._canonicalize_app_command_option(item)
|
||||
for item in payload.get("options", []) or []
|
||||
if isinstance(item, dict)
|
||||
],
|
||||
}
|
||||
|
||||
def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Fields supported by discord.py's edit_global_command route."""
|
||||
canonical = self._canonicalize_app_command_payload(payload)
|
||||
return {
|
||||
"name": canonical["name"],
|
||||
"description": canonical["description"],
|
||||
"options": canonical["options"],
|
||||
}
|
||||
|
||||
async def _safe_sync_slash_commands(self) -> Dict[str, int]:
|
||||
"""Diff existing global commands and only mutate the commands that changed."""
|
||||
if not self._client:
|
||||
return {
|
||||
"total": 0,
|
||||
"unchanged": 0,
|
||||
"updated": 0,
|
||||
"recreated": 0,
|
||||
"created": 0,
|
||||
"deleted": 0,
|
||||
}
|
||||
|
||||
tree = self._client.tree
|
||||
app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
|
||||
if not app_id:
|
||||
raise RuntimeError("Discord application ID is unavailable for slash command sync")
|
||||
|
||||
desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
|
||||
desired_by_key = {
|
||||
(int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
|
||||
for payload in desired_payloads
|
||||
}
|
||||
existing_commands = await tree.fetch_commands()
|
||||
existing_by_key = {
|
||||
(
|
||||
int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
|
||||
str(command.name or "").lower(),
|
||||
): command
|
||||
for command in existing_commands
|
||||
}
|
||||
|
||||
unchanged = 0
|
||||
updated = 0
|
||||
recreated = 0
|
||||
created = 0
|
||||
deleted = 0
|
||||
http = self._client.http
|
||||
|
||||
for key, desired in desired_by_key.items():
|
||||
current = existing_by_key.pop(key, None)
|
||||
if current is None:
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
created += 1
|
||||
continue
|
||||
|
||||
current_existing_payload = self._existing_command_to_payload(current)
|
||||
current_payload = self._canonicalize_app_command_payload(current_existing_payload)
|
||||
desired_payload = self._canonicalize_app_command_payload(desired)
|
||||
if current_payload == desired_payload:
|
||||
unchanged += 1
|
||||
continue
|
||||
|
||||
if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
await http.upsert_global_command(app_id, desired)
|
||||
recreated += 1
|
||||
continue
|
||||
|
||||
await http.edit_global_command(app_id, current.id, desired)
|
||||
updated += 1
|
||||
|
||||
for current in existing_by_key.values():
|
||||
await http.delete_global_command(app_id, current.id)
|
||||
deleted += 1
|
||||
|
||||
return {
|
||||
"total": len(desired_payloads),
|
||||
"unchanged": unchanged,
|
||||
"updated": updated,
|
||||
"recreated": recreated,
|
||||
"created": created,
|
||||
"deleted": deleted,
|
||||
}
|
||||
|
||||
async def _add_reaction(self, message: Any, emoji: str) -> bool:
|
||||
"""Add an emoji reaction to a Discord message."""
|
||||
if not message or not hasattr(message, "add_reaction"):
|
||||
@@ -1343,134 +1133,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
msg = await channel.send(content=caption if caption else None, file=file)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images as a single Discord message with multiple attachments.
|
||||
|
||||
Discord permits up to 10 file attachments per message. Batches are
|
||||
chunked accordingly. URL images are downloaded into memory and
|
||||
uploaded as inline attachments (same pattern as ``send_image`` so
|
||||
they render inline, not as bare links). Local files are opened
|
||||
directly. On per-chunk failure the remaining images in that chunk
|
||||
fall back to the base per-image loop.
|
||||
"""
|
||||
if not self._client:
|
||||
return
|
||||
if not images:
|
||||
return
|
||||
|
||||
try:
|
||||
import discord as _discord_mod
|
||||
import io as _io
|
||||
from urllib.parse import unquote as _unquote
|
||||
except Exception: # pragma: no cover
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
return
|
||||
|
||||
try:
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
if not channel:
|
||||
logger.warning("[%s] Channel %s not found for multi-image send", self.name, chat_id)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning("[%s] Failed to resolve channel for multi-image send: %s", self.name, e)
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
return
|
||||
|
||||
CHUNK = 10
|
||||
chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
|
||||
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
if human_delay > 0 and chunk_idx > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
|
||||
files: List[Any] = []
|
||||
captions: List[str] = []
|
||||
aiohttp_session = None
|
||||
try:
|
||||
for image_url, alt_text in chunk:
|
||||
if alt_text:
|
||||
captions.append(alt_text)
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
if not os.path.exists(local_path):
|
||||
logger.warning("[%s] Skipping missing image: %s", self.name, local_path)
|
||||
continue
|
||||
files.append(_discord_mod.File(local_path, filename=os.path.basename(local_path)))
|
||||
else:
|
||||
if not is_safe_url(image_url):
|
||||
logger.warning("[%s] Blocked unsafe image URL in batch", self.name)
|
||||
continue
|
||||
# Download to BytesIO so it renders inline
|
||||
try:
|
||||
import aiohttp as _aiohttp
|
||||
from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
|
||||
_proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
|
||||
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
|
||||
if aiohttp_session is None:
|
||||
aiohttp_session = _aiohttp.ClientSession(**_sess_kw)
|
||||
async with aiohttp_session.get(
|
||||
image_url, timeout=_aiohttp.ClientTimeout(total=30), **_req_kw,
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
logger.warning(
|
||||
"[%s] Failed to download image (HTTP %d) in batch: %s",
|
||||
self.name, resp.status, image_url[:80],
|
||||
)
|
||||
continue
|
||||
data = await resp.read()
|
||||
ct = resp.headers.get("content-type", "image/png")
|
||||
ext = "png"
|
||||
if "jpeg" in ct or "jpg" in ct:
|
||||
ext = "jpg"
|
||||
elif "gif" in ct:
|
||||
ext = "gif"
|
||||
elif "webp" in ct:
|
||||
ext = "webp"
|
||||
files.append(_discord_mod.File(_io.BytesIO(data), filename=f"image_{len(files)}.{ext}"))
|
||||
except Exception as dl_err:
|
||||
logger.warning("[%s] Download failed for %s: %s", self.name, image_url[:80], dl_err)
|
||||
continue
|
||||
|
||||
if not files:
|
||||
continue
|
||||
|
||||
# Use the first caption if any (Discord only has one message body for the group)
|
||||
content = captions[0] if captions else None
|
||||
logger.info(
|
||||
"[%s] Sending %d image(s) as single Discord message (chunk %d/%d)",
|
||||
self.name, len(files), chunk_idx + 1, len(chunks),
|
||||
)
|
||||
|
||||
if self._is_forum_parent(channel):
|
||||
await self._forum_post_file(
|
||||
channel,
|
||||
content=(content or "").strip(),
|
||||
files=files,
|
||||
)
|
||||
else:
|
||||
await channel.send(content=content, files=files)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[%s] Multi-image Discord send failed (chunk %d/%d), falling back to per-image: %s",
|
||||
self.name, chunk_idx + 1, len(chunks), e,
|
||||
exc_info=True,
|
||||
)
|
||||
await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
|
||||
finally:
|
||||
if aiohttp_session is not None:
|
||||
try:
|
||||
await aiohttp_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def play_tts(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -2385,6 +2047,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_usage(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/usage")
|
||||
|
||||
@tree.command(name="provider", description="Show available providers")
|
||||
async def slash_provider(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/provider")
|
||||
|
||||
@tree.command(name="help", description="Show available commands")
|
||||
async def slash_help(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/help")
|
||||
@@ -2398,10 +2064,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_reload_mcp(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/reload-mcp")
|
||||
|
||||
@tree.command(name="reload-skills", description="Re-scan ~/.hermes/skills/ for new or removed skills")
|
||||
async def slash_reload_skills(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/reload-skills")
|
||||
|
||||
@tree.command(name="voice", description="Toggle voice reply mode")
|
||||
@discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
|
||||
@discord.app_commands.choices(mode=[
|
||||
@@ -2458,6 +2120,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_background(interaction: discord.Interaction, prompt: str):
|
||||
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
|
||||
|
||||
@tree.command(name="btw", description="Ephemeral side question using session context")
|
||||
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
|
||||
async def slash_btw(interaction: discord.Interaction, question: str):
|
||||
await self._run_simple_slash(interaction, f"/btw {question}")
|
||||
|
||||
# ── Auto-register any gateway-available commands not yet on the tree ──
|
||||
# This ensures new commands added to COMMAND_REGISTRY in
|
||||
# hermes_cli/commands.py automatically appear as Discord slash
|
||||
@@ -2822,8 +2489,21 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
skills: ["skill-a", "skill-b"]
|
||||
Also checks parent_id so forum threads inherit the forum's bindings.
|
||||
"""
|
||||
from gateway.platforms.base import resolve_channel_skills
|
||||
return resolve_channel_skills(self.config.extra, channel_id, parent_id)
|
||||
bindings = self.config.extra.get("channel_skill_bindings", [])
|
||||
if not bindings:
|
||||
return None
|
||||
ids_to_check = {channel_id}
|
||||
if parent_id:
|
||||
ids_to_check.add(parent_id)
|
||||
for entry in bindings:
|
||||
entry_id = str(entry.get("id", ""))
|
||||
if entry_id in ids_to_check:
|
||||
skills = entry.get("skills") or entry.get("skill")
|
||||
if isinstance(skills, str):
|
||||
return [skills]
|
||||
if isinstance(skills, list) and skills:
|
||||
return list(dict.fromkeys(skills)) # dedup, preserve order
|
||||
return None
|
||||
|
||||
def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
|
||||
"""Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
|
||||
@@ -2840,12 +2520,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
|
||||
|
||||
def _discord_free_response_channels(self) -> set:
|
||||
"""Return Discord channel IDs where no bot mention is required.
|
||||
|
||||
A single ``"*"`` entry (either from a list or a comma-separated
|
||||
string) is preserved in the returned set so callers can short-circuit
|
||||
on wildcard membership, consistent with ``allowed_channels``.
|
||||
"""
|
||||
"""Return Discord channel IDs where no bot mention is required."""
|
||||
raw = self.config.extra.get("free_response_channels")
|
||||
if raw is None:
|
||||
raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
|
||||
@@ -3038,43 +2713,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_slash_confirm(
|
||||
self, chat_id: str, title: str, message: str, session_key: str,
|
||||
confirm_id: str, metadata: Optional[dict] = None,
|
||||
) -> SendResult:
|
||||
"""Send a three-button slash-command confirmation prompt."""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
target_id = chat_id
|
||||
if metadata and metadata.get("thread_id"):
|
||||
target_id = metadata["thread_id"]
|
||||
|
||||
channel = self._client.get_channel(int(target_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(target_id))
|
||||
|
||||
# Embed description limit is 4096; message usually fits easily.
|
||||
max_desc = 4088
|
||||
body = message if len(message) <= max_desc else message[: max_desc - 3] + "..."
|
||||
embed = discord.Embed(
|
||||
title=title or "Confirm",
|
||||
description=body,
|
||||
color=discord.Color.orange(),
|
||||
)
|
||||
|
||||
view = SlashConfirmView(
|
||||
session_key=session_key,
|
||||
confirm_id=confirm_id,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_update_prompt(
|
||||
self, chat_id: str, prompt: str, default: str = "",
|
||||
session_key: str = "",
|
||||
@@ -3375,14 +3013,14 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
|
||||
if allowed_channels_raw:
|
||||
allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
|
||||
if "*" not in allowed_channels and not (channel_ids & allowed_channels):
|
||||
if not (channel_ids & allowed_channels):
|
||||
logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
# Check ignored channels - never respond even when mentioned
|
||||
ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
|
||||
if "*" in ignored_channels or (channel_ids & ignored_channels):
|
||||
if channel_ids & ignored_channels:
|
||||
logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
@@ -3396,11 +3034,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
|
||||
current_channel_id = str(message.channel.id)
|
||||
is_voice_linked_channel = current_channel_id in voice_linked_ids
|
||||
is_free_channel = (
|
||||
"*" in free_channels
|
||||
or bool(channel_ids & free_channels)
|
||||
or is_voice_linked_channel
|
||||
)
|
||||
is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel
|
||||
|
||||
# Skip the mention check if the message is in a thread where
|
||||
# the bot has previously participated (auto-created or replied in).
|
||||
@@ -3423,7 +3057,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
parent_channel_id = str(message.channel.id)
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
@@ -3474,7 +3107,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)
|
||||
|
||||
# Build source
|
||||
guild = getattr(message, "guild", None)
|
||||
source = self.build_source(
|
||||
chat_id=str(effective_channel.id),
|
||||
chat_name=chat_name,
|
||||
@@ -3484,9 +3116,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
is_bot=getattr(message.author, "bot", False),
|
||||
guild_id=str(guild.id) if guild else None,
|
||||
parent_chat_id=parent_channel_id,
|
||||
message_id=str(message.id),
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
@@ -3808,103 +3437,6 @@ if DISCORD_AVAILABLE:
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
class SlashConfirmView(discord.ui.View):
|
||||
"""Three-button view for generic slash-command confirmations.
|
||||
|
||||
Used by ``/reload-mcp`` and any future slash command routed through
|
||||
``GatewayRunner._request_slash_confirm``. Buttons map to the
|
||||
gateway's three choices:
|
||||
|
||||
* "Approve Once" → ``choice="once"``
|
||||
* "Always Approve" → ``choice="always"``
|
||||
* "Cancel" → ``choice="cancel"``
|
||||
|
||||
Clicking calls the module-level
|
||||
``tools.slash_confirm.resolve(session_key, confirm_id, choice)``
|
||||
which runs the handler the runner stored for this ``session_key``.
|
||||
Only users in the adapter's allowlist can click. Times out after
|
||||
5 minutes (matches the gateway primitive's timeout).
|
||||
"""
|
||||
|
||||
def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
|
||||
super().__init__(timeout=300)
|
||||
self.session_key = session_key
|
||||
self.confirm_id = confirm_id
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, choice: str,
|
||||
color: discord.Color, label: str,
|
||||
):
|
||||
if self.resolved:
|
||||
await interaction.response.send_message(
|
||||
"This prompt has already been resolved~", ephemeral=True,
|
||||
)
|
||||
return
|
||||
if not self._check_auth(interaction):
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized to answer this prompt~", ephemeral=True,
|
||||
)
|
||||
return
|
||||
|
||||
self.resolved = True
|
||||
|
||||
embed = interaction.message.embeds[0] if interaction.message.embeds else None
|
||||
if embed:
|
||||
embed.color = color
|
||||
embed.set_footer(text=f"{label} by {interaction.user.display_name}")
|
||||
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
await interaction.response.edit_message(embed=embed, view=self)
|
||||
|
||||
# Resolve via the module-level primitive. If the handler
|
||||
# returns a follow-up message, post it in the same channel.
|
||||
try:
|
||||
from tools import slash_confirm as _slash_confirm_mod
|
||||
result_text = await _slash_confirm_mod.resolve(
|
||||
self.session_key, self.confirm_id, choice,
|
||||
)
|
||||
if result_text:
|
||||
await interaction.followup.send(result_text)
|
||||
logger.info(
|
||||
"Discord button resolved slash-confirm for session %s "
|
||||
"(choice=%s, user=%s)",
|
||||
self.session_key, choice, interaction.user.display_name,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Discord slash-confirm resolve failed: %s", exc, exc_info=True)
|
||||
|
||||
@discord.ui.button(label="Approve Once", style=discord.ButtonStyle.green)
|
||||
async def approve_once(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button,
|
||||
):
|
||||
await self._resolve(interaction, "once", discord.Color.green(), "Approved once")
|
||||
|
||||
@discord.ui.button(label="Always Approve", style=discord.ButtonStyle.blurple)
|
||||
async def approve_always(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button,
|
||||
):
|
||||
await self._resolve(interaction, "always", discord.Color.purple(), "Always approved")
|
||||
|
||||
@discord.ui.button(label="Cancel", style=discord.ButtonStyle.red)
|
||||
async def cancel(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button,
|
||||
):
|
||||
await self._resolve(interaction, "cancel", discord.Color.greyple(), "Cancelled")
|
||||
|
||||
async def on_timeout(self):
|
||||
self.resolved = True
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
class UpdatePromptView(discord.ui.View):
|
||||
"""Interactive Yes/No buttons for ``hermes update`` prompts.
|
||||
|
||||
@@ -4135,15 +3667,6 @@ if DISCORD_AVAILABLE:
|
||||
|
||||
self.resolved = True
|
||||
model_id = interaction.data["values"][0]
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Switching Model",
|
||||
description=f"Switching to `{model_id}`...",
|
||||
color=discord.Color.blue(),
|
||||
),
|
||||
view=None,
|
||||
)
|
||||
|
||||
try:
|
||||
result_text = await self.on_model_selected(
|
||||
@@ -4154,13 +3677,14 @@ if DISCORD_AVAILABLE:
|
||||
except Exception as exc:
|
||||
result_text = f"Error switching model: {exc}"
|
||||
|
||||
await interaction.edit_original_response(
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Model Switched",
|
||||
description=result_text,
|
||||
color=discord.Color.green(),
|
||||
),
|
||||
view=None,
|
||||
view=self,
|
||||
)
|
||||
|
||||
async def _on_back(self, interaction: discord.Interaction):
|
||||
|
||||
+1
-111
@@ -28,10 +28,9 @@ from email.header import decode_header
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.base import MIMEBase
|
||||
from email.utils import formatdate
|
||||
from email import encoders
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
@@ -505,7 +504,6 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
msg["In-Reply-To"] = original_msg_id
|
||||
msg["References"] = original_msg_id
|
||||
|
||||
msg["Date"] = formatdate(localtime=True)
|
||||
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
|
||||
msg["Message-ID"] = msg_id
|
||||
|
||||
@@ -540,113 +538,6 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
text += f"\n\nImage: {image_url}"
|
||||
return await self.send(chat_id, text.strip(), reply_to)
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images as a single email with multiple MIME attachments.
|
||||
|
||||
Local files are attached directly. URL images have their URL
|
||||
appended to the body (email adapter does not download remote
|
||||
images). No hard cap — email clients handle dozens of
|
||||
attachments fine, subject to SMTP message size limits.
|
||||
"""
|
||||
if not images:
|
||||
return
|
||||
|
||||
from urllib.parse import unquote as _unquote
|
||||
|
||||
body_parts: List[str] = []
|
||||
local_paths: List[str] = []
|
||||
for image_url, alt_text in images:
|
||||
if alt_text:
|
||||
body_parts.append(alt_text)
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
if Path(local_path).exists():
|
||||
local_paths.append(local_path)
|
||||
else:
|
||||
logger.warning("[Email] Skipping missing image: %s", local_path)
|
||||
else:
|
||||
# Remote URLs just get linked in the body (parity with send_image)
|
||||
body_parts.append(f"Image: {image_url}")
|
||||
|
||||
if not local_paths and not body_parts:
|
||||
return
|
||||
|
||||
body = "\n\n".join(body_parts)
|
||||
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
self._send_email_with_attachments,
|
||||
chat_id,
|
||||
body,
|
||||
local_paths,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("[Email] Multi-image send failed, falling back: %s", e, exc_info=True)
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
|
||||
def _send_email_with_attachments(
|
||||
self,
|
||||
to_addr: str,
|
||||
body: str,
|
||||
file_paths: List[str],
|
||||
) -> str:
|
||||
"""Send an email with multiple file attachments via SMTP."""
|
||||
msg = MIMEMultipart()
|
||||
msg["From"] = self._address
|
||||
msg["To"] = to_addr
|
||||
|
||||
ctx = self._thread_context.get(to_addr, {})
|
||||
subject = ctx.get("subject", "Hermes Agent")
|
||||
if not subject.startswith("Re:"):
|
||||
subject = f"Re: {subject}"
|
||||
msg["Subject"] = subject
|
||||
|
||||
original_msg_id = ctx.get("message_id")
|
||||
if original_msg_id:
|
||||
msg["In-Reply-To"] = original_msg_id
|
||||
msg["References"] = original_msg_id
|
||||
|
||||
msg["Date"] = formatdate(localtime=True)
|
||||
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
|
||||
msg["Message-ID"] = msg_id
|
||||
|
||||
if body:
|
||||
msg.attach(MIMEText(body, "plain", "utf-8"))
|
||||
|
||||
for file_path in file_paths:
|
||||
p = Path(file_path)
|
||||
try:
|
||||
with open(p, "rb") as f:
|
||||
part = MIMEBase("application", "octet-stream")
|
||||
part.set_payload(f.read())
|
||||
encoders.encode_base64(part)
|
||||
part.add_header("Content-Disposition", f"attachment; filename={p.name}")
|
||||
msg.attach(part)
|
||||
except Exception as e:
|
||||
logger.warning("[Email] Failed to attach %s: %s", file_path, e)
|
||||
|
||||
smtp = smtplib.SMTP(self._smtp_host, self._smtp_port, timeout=30)
|
||||
try:
|
||||
smtp.starttls(context=ssl.create_default_context())
|
||||
smtp.login(self._address, self._password)
|
||||
smtp.send_message(msg)
|
||||
finally:
|
||||
try:
|
||||
smtp.quit()
|
||||
except Exception:
|
||||
smtp.close()
|
||||
|
||||
logger.info("[Email] Sent multi-attachment email to %s (%d files)", to_addr, len(file_paths))
|
||||
return msg_id
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -695,7 +586,6 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
msg["In-Reply-To"] = original_msg_id
|
||||
msg["References"] = original_msg_id
|
||||
|
||||
msg["Date"] = formatdate(localtime=True)
|
||||
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
|
||||
msg["Message-ID"] = msg_id
|
||||
|
||||
|
||||
@@ -1700,7 +1700,6 @@ class FeishuAdapter(BasePlatformAdapter):
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
content = self.format_message(content)
|
||||
try:
|
||||
msg_type, payload = self._build_outbound_payload(content)
|
||||
body = self._build_update_message_body(msg_type=msg_type, content=payload)
|
||||
|
||||
@@ -974,6 +974,7 @@ def build_whole_comment_prompt(
|
||||
|
||||
def _resolve_model_and_runtime() -> Tuple[str, dict]:
|
||||
"""Resolve model and provider credentials, same as gateway message handling."""
|
||||
import os
|
||||
from gateway.run import _load_gateway_config, _resolve_gateway_model
|
||||
|
||||
user_config = _load_gateway_config()
|
||||
|
||||
@@ -11,10 +11,10 @@ import logging
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Dict
|
||||
from typing import TYPE_CHECKING, Dict, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.platforms.base import BasePlatformAdapter, MessageEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -57,15 +57,6 @@ class MessageDeduplicator:
|
||||
if len(self._seen) > self._max_size:
|
||||
cutoff = now - self._ttl
|
||||
self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
|
||||
if len(self._seen) > self._max_size:
|
||||
# TTL pruning alone does not cap the cache when every entry is
|
||||
# still fresh. Keep the newest entries so the helper's
|
||||
# max_size bound is enforced under sustained traffic.
|
||||
newest = sorted(
|
||||
self._seen.items(),
|
||||
key=lambda item: item[1],
|
||||
)[-self._max_size:]
|
||||
self._seen = dict(newest)
|
||||
return False
|
||||
|
||||
def clear(self):
|
||||
|
||||
+47
-507
@@ -11,7 +11,6 @@ Environment variables:
|
||||
MATRIX_PASSWORD Password (alternative to access token)
|
||||
MATRIX_ENCRYPTION Set "true" to enable E2EE
|
||||
MATRIX_DEVICE_ID Stable device ID for E2EE persistence across restarts
|
||||
MATRIX_PROXY HTTP(S) or SOCKS proxy URL for Matrix traffic
|
||||
MATRIX_ALLOWED_USERS Comma-separated Matrix user IDs (@user:server)
|
||||
MATRIX_HOME_ROOM Room ID for cron/notification delivery
|
||||
MATRIX_REACTIONS Set "false" to disable processing lifecycle reactions
|
||||
@@ -19,7 +18,6 @@ Environment variables:
|
||||
MATRIX_REQUIRE_MENTION Require @mention in rooms (default: true)
|
||||
MATRIX_FREE_RESPONSE_ROOMS Comma-separated room IDs exempt from mention requirement
|
||||
MATRIX_AUTO_THREAD Auto-create threads for room messages (default: true)
|
||||
MATRIX_DM_AUTO_THREAD Auto-create threads for DM messages (default: false)
|
||||
MATRIX_RECOVERY_KEY Recovery key for cross-signing verification after device key rotation
|
||||
MATRIX_DM_MENTION_THREADS Create a thread when bot is @mentioned in a DM (default: false)
|
||||
"""
|
||||
@@ -32,8 +30,6 @@ import mimetypes
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
from html import escape as _html_escape
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Set
|
||||
@@ -99,25 +95,11 @@ from gateway.platforms.base import (
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
resolve_proxy_url,
|
||||
proxy_kwargs_for_aiohttp,
|
||||
)
|
||||
from gateway.platforms.helpers import ThreadParticipationTracker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _MatrixApprovalPrompt:
|
||||
"""Tracks a pending Matrix reaction-based exec approval prompt."""
|
||||
|
||||
def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False):
|
||||
self.session_key = session_key
|
||||
self.chat_id = chat_id
|
||||
self.message_id = message_id
|
||||
self.resolved = resolved
|
||||
self.bot_reaction_events: dict[str, str] = {} # emoji -> event_id
|
||||
|
||||
# Matrix message size limit (4000 chars practical, spec has no hard limit
|
||||
# but clients render poorly above this).
|
||||
MAX_MESSAGE_LENGTH = 4000
|
||||
@@ -132,85 +114,11 @@ _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
|
||||
# Grace period: ignore messages older than this many seconds before startup.
|
||||
_STARTUP_GRACE_SECONDS = 5
|
||||
|
||||
_OUTBOUND_MENTION_RE = re.compile(
|
||||
r"(?<![\w/])(@[0-9A-Za-z._=/-]+:[0-9A-Za-z.-]+(?::\d+)?)"
|
||||
)
|
||||
|
||||
_E2EE_INSTALL_HINT = (
|
||||
"Install with: pip install 'mautrix[encryption]' (requires libolm C library)"
|
||||
)
|
||||
|
||||
_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".webp",
|
||||
".bmp",
|
||||
".svg",
|
||||
".heic",
|
||||
".heif",
|
||||
".avif",
|
||||
})
|
||||
|
||||
|
||||
def _looks_like_matrix_image_filename(text: str) -> bool:
|
||||
"""Return True when Matrix image body text is probably just a transport filename.
|
||||
|
||||
Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
|
||||
filename when the user did not add a caption. Treating that raw filename as
|
||||
user-authored text confuses downstream vision enrichment.
|
||||
"""
|
||||
candidate = str(text or "").strip()
|
||||
if not candidate or "\n" in candidate or candidate.endswith("/"):
|
||||
return False
|
||||
|
||||
name = Path(candidate).name
|
||||
if not name or name != candidate:
|
||||
return False
|
||||
|
||||
suffix = Path(name).suffix.lower()
|
||||
if not suffix:
|
||||
return False
|
||||
|
||||
guessed_type, _ = mimetypes.guess_type(name)
|
||||
if guessed_type and guessed_type.startswith("image/"):
|
||||
return True
|
||||
return suffix in _MATRIX_IMAGE_FILENAME_EXTS
|
||||
|
||||
|
||||
def _create_matrix_session(proxy_url: str | None):
|
||||
"""Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests.
|
||||
|
||||
mautrix's ``HTTPAPI._send()`` calls ``session.request()`` without forwarding
|
||||
per-request ``proxy=`` kwargs. For HTTP(S) proxies we use aiohttp's native
|
||||
``proxy=`` session parameter which sets a default for every request. For SOCKS
|
||||
we use ``aiohttp_socks.ProxyConnector`` (connector-level).
|
||||
When no proxy is configured we enable ``trust_env`` so standard env vars
|
||||
(``HTTP_PROXY`` / ``HTTPS_PROXY``) are honoured automatically.
|
||||
"""
|
||||
import aiohttp
|
||||
|
||||
if not proxy_url:
|
||||
return aiohttp.ClientSession(trust_env=True)
|
||||
|
||||
if proxy_url.split("://")[0].lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
return aiohttp.ClientSession(
|
||||
connector=ProxyConnector.from_url(proxy_url, rdns=True),
|
||||
)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return aiohttp.ClientSession(trust_env=True)
|
||||
|
||||
return aiohttp.ClientSession(proxy=proxy_url)
|
||||
|
||||
|
||||
def _check_e2ee_deps() -> bool:
|
||||
"""Return True if mautrix E2EE dependencies (python-olm) are available."""
|
||||
@@ -352,9 +260,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"1",
|
||||
"yes",
|
||||
)
|
||||
self._dm_auto_thread: bool = os.getenv(
|
||||
"MATRIX_DM_AUTO_THREAD", "false"
|
||||
).lower() in ("true", "1", "yes")
|
||||
self._dm_mention_threads: bool = os.getenv(
|
||||
"MATRIX_DM_MENTION_THREADS", "false"
|
||||
).lower() in ("true", "1", "yes")
|
||||
@@ -365,11 +270,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
).lower() not in ("false", "0", "no")
|
||||
self._pending_reactions: dict[tuple[str, str], str] = {}
|
||||
|
||||
# Proxy support — resolve once at init, reuse for all HTTP traffic.
|
||||
self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY")
|
||||
if self._proxy_url:
|
||||
logger.info("Matrix: proxy configured — %s", self._proxy_url)
|
||||
|
||||
# Text batching: merge rapid successive messages (Telegram-style).
|
||||
# Matrix clients split long messages around 4000 chars.
|
||||
self._text_batch_delay_seconds = float(
|
||||
@@ -381,18 +281,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
|
||||
# Matrix reaction-based dangerous command approvals.
|
||||
self._approval_reaction_map = {
|
||||
"✅": "once",
|
||||
"❎": "deny",
|
||||
}
|
||||
self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {}
|
||||
self._approval_prompt_by_session: Dict[str, str] = {}
|
||||
allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "")
|
||||
self._allowed_user_ids: Set[str] = {
|
||||
u.strip() for u in allowed_users_raw.split(",") if u.strip()
|
||||
}
|
||||
|
||||
def _is_duplicate_event(self, event_id) -> bool:
|
||||
"""Return True if this event was already processed. Tracks the ID otherwise."""
|
||||
if not event_id:
|
||||
@@ -438,7 +326,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return False
|
||||
except Exception as exc:
|
||||
logger.error("Matrix: post-upload key verification failed: %s", exc, exc_info=True)
|
||||
logger.error("Matrix: post-upload key verification failed: %s", exc)
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -454,7 +342,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.error(
|
||||
"Matrix: cannot verify device keys on server: %s — refusing E2EE",
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -469,7 +356,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
await olm.share_keys()
|
||||
except Exception as exc:
|
||||
logger.error("Matrix: failed to re-upload device keys: %s", exc, exc_info=True)
|
||||
logger.error("Matrix: failed to re-upload device keys: %s", exc)
|
||||
return False
|
||||
return await self._reverify_keys_after_upload(client, local_ed25519)
|
||||
|
||||
@@ -509,7 +396,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"Try generating a new access token to get a fresh device.",
|
||||
client.device_id,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
return await self._reverify_keys_after_upload(client, local_ed25519)
|
||||
@@ -534,11 +420,9 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
_STORE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create the HTTP API layer.
|
||||
client_session = _create_matrix_session(self._proxy_url)
|
||||
api = HTTPAPI(
|
||||
base_url=self._homeserver,
|
||||
token=self._access_token or "",
|
||||
client_session=client_session,
|
||||
)
|
||||
|
||||
# Create the client.
|
||||
@@ -581,7 +465,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.error(
|
||||
"Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER: %s",
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
await api.session.close()
|
||||
return False
|
||||
@@ -649,20 +532,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
await crypto_store.open()
|
||||
|
||||
# Bind the store to the runtime device_id before any
|
||||
# put_account() runs. PgCryptoStore defaults _device_id
|
||||
# to "" and its crypto_account UPSERT never updates the
|
||||
# device_id column on conflict — so once put_account
|
||||
# writes blank, it stays blank forever. That breaks
|
||||
# every downstream device-scoped olm operation: peer
|
||||
# to-device ciphertext can't find our identity key and
|
||||
# no megolm sessions ever land. Setting _device_id here
|
||||
# (in-memory; the on-disk row may not exist yet) makes
|
||||
# the first put_account write the correct value.
|
||||
# DeviceID is a NewType(str) so plain str works at runtime.
|
||||
if client.device_id:
|
||||
await crypto_store.put_device_id(client.device_id)
|
||||
|
||||
crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
|
||||
olm = OlmMachine(client, crypto_store, crypto_state)
|
||||
|
||||
@@ -724,44 +593,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.warning(
|
||||
"Matrix: recovery key verification failed: %s", exc
|
||||
)
|
||||
else:
|
||||
# No recovery key — bootstrap cross-signing if the bot
|
||||
# has none yet. Without this, Element shows "Encrypted
|
||||
# by a device not verified by its owner" on every
|
||||
# message from this bot, indefinitely. mautrix's
|
||||
# generate_recovery_key does the full flow: generates
|
||||
# MSK/SSK/USK, uploads private keys to SSSS, publishes
|
||||
# public keys to the homeserver, and signs the current
|
||||
# device with the new SSK. Some homeservers require UIA
|
||||
# for /keys/device_signing/upload — those will need an
|
||||
# alternate path; Continuwuity and Synapse-with-shared-
|
||||
# secret accept the unauthenticated upload.
|
||||
try:
|
||||
own_xsign = await olm.get_own_cross_signing_public_keys()
|
||||
except Exception as exc:
|
||||
own_xsign = None
|
||||
logger.warning(
|
||||
"Matrix: cross-signing key lookup failed: %s", exc
|
||||
)
|
||||
if own_xsign is None:
|
||||
try:
|
||||
new_recovery_key = await olm.generate_recovery_key()
|
||||
logger.warning(
|
||||
"Matrix: bootstrapped cross-signing for %s. "
|
||||
"SAVE THIS RECOVERY KEY — set "
|
||||
"MATRIX_RECOVERY_KEY for future restarts so "
|
||||
"the bot can re-sign its device after key "
|
||||
"rotation: %s",
|
||||
client.mxid,
|
||||
new_recovery_key,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Matrix: cross-signing bootstrap failed "
|
||||
"(non-fatal — Element will show 'not "
|
||||
"verified by its owner'): %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
client.crypto = olm
|
||||
logger.info(
|
||||
@@ -819,7 +650,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
await asyncio.gather(*tasks)
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: initial sync event dispatch error: %s", exc)
|
||||
await self._join_pending_invites(sync_data)
|
||||
else:
|
||||
logger.warning(
|
||||
"Matrix: initial sync returned unexpected type %s",
|
||||
@@ -883,8 +713,17 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)
|
||||
|
||||
last_event_id = None
|
||||
for i, chunk in enumerate(chunks):
|
||||
msg_content = self._build_text_message_content(chunk)
|
||||
for chunk in chunks:
|
||||
msg_content: Dict[str, Any] = {
|
||||
"msgtype": "m.text",
|
||||
"body": chunk,
|
||||
}
|
||||
|
||||
# Convert markdown to HTML for rich rendering.
|
||||
html = self._markdown_to_html(chunk)
|
||||
if html and html != chunk:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
|
||||
# Reply-to support.
|
||||
if reply_to:
|
||||
@@ -991,21 +830,25 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"""Edit an existing message (via m.replace)."""
|
||||
|
||||
formatted = self.format_message(content)
|
||||
new_content = self._build_text_message_content(formatted)
|
||||
msg_content: Dict[str, Any] = {
|
||||
"msgtype": "m.text",
|
||||
"body": f"* {formatted}",
|
||||
"m.new_content": new_content,
|
||||
"m.new_content": {
|
||||
"msgtype": "m.text",
|
||||
"body": formatted,
|
||||
},
|
||||
"m.relates_to": {
|
||||
"rel_type": "m.replace",
|
||||
"event_id": message_id,
|
||||
},
|
||||
}
|
||||
if "m.mentions" in new_content:
|
||||
msg_content["m.mentions"] = new_content["m.mentions"]
|
||||
if "formatted_body" in new_content:
|
||||
|
||||
html = self._markdown_to_html(formatted)
|
||||
if html and html != formatted:
|
||||
msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
|
||||
msg_content["m.new_content"]["formatted_body"] = html
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = f'* {new_content["formatted_body"]}'
|
||||
msg_content["m.relates_to"] = {
|
||||
"rel_type": "m.replace",
|
||||
"event_id": message_id,
|
||||
}
|
||||
msg_content["formatted_body"] = f"* {html}"
|
||||
|
||||
try:
|
||||
event_id = await self._client.send_message_event(
|
||||
@@ -1038,12 +881,10 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
# Try aiohttp first (always available), fall back to httpx
|
||||
try:
|
||||
import aiohttp as _aiohttp
|
||||
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url)
|
||||
async with _aiohttp.ClientSession(**_sess_kw) as http:
|
||||
|
||||
async with _aiohttp.ClientSession(trust_env=True) as http:
|
||||
async with http.get(
|
||||
image_url,
|
||||
timeout=_aiohttp.ClientTimeout(total=30),
|
||||
**_req_kw,
|
||||
image_url, timeout=_aiohttp.ClientTimeout(total=30)
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
data = await resp.read()
|
||||
@@ -1053,10 +894,8 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
except ImportError:
|
||||
import httpx
|
||||
_httpx_kw: dict = {}
|
||||
if self._proxy_url:
|
||||
_httpx_kw["proxy"] = self._proxy_url
|
||||
async with httpx.AsyncClient(**_httpx_kw) as http:
|
||||
|
||||
async with httpx.AsyncClient() as http:
|
||||
resp = await http.get(image_url, follow_redirects=True, timeout=30)
|
||||
resp.raise_for_status()
|
||||
data = resp.content
|
||||
@@ -1131,56 +970,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
chat_id, video_path, "m.video", caption, reply_to, metadata=metadata
|
||||
)
|
||||
|
||||
async def send_exec_approval(
|
||||
self,
|
||||
chat_id: str,
|
||||
command: str,
|
||||
session_key: str,
|
||||
description: str = "dangerous command",
|
||||
metadata: Optional[dict] = None,
|
||||
) -> SendResult:
|
||||
"""Send a reaction-based exec approval prompt for Matrix."""
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
cmd_preview = command[:2000] + "..." if len(command) > 2000 else command
|
||||
text = (
|
||||
"⚠️ **Dangerous command requires approval**\n"
|
||||
f"```\n{cmd_preview}\n```\n"
|
||||
f"Reason: {description}\n\n"
|
||||
"Reply `/approve` to execute, `/approve session` to approve this pattern for the session, "
|
||||
"`/approve always` to approve permanently, or `/deny` to cancel.\n\n"
|
||||
"You can also click the reaction to approve:\n"
|
||||
"✅ = /approve\n"
|
||||
"❎ = /deny"
|
||||
)
|
||||
|
||||
result = await self.send(chat_id, text, metadata=metadata)
|
||||
if not result.success or not result.message_id:
|
||||
return result
|
||||
|
||||
prompt = _MatrixApprovalPrompt(
|
||||
session_key=session_key,
|
||||
chat_id=chat_id,
|
||||
message_id=result.message_id,
|
||||
)
|
||||
old_event = self._approval_prompt_by_session.get(session_key)
|
||||
if old_event:
|
||||
self._approval_prompts_by_event.pop(old_event, None)
|
||||
self._approval_prompts_by_event[result.message_id] = prompt
|
||||
self._approval_prompt_by_session[session_key] = result.message_id
|
||||
|
||||
for emoji in ("✅", "❎"):
|
||||
try:
|
||||
reaction_result = await self._send_reaction(chat_id, result.message_id, emoji)
|
||||
# Save the bot's reaction event_id for later cleanup
|
||||
if reaction_result:
|
||||
prompt.bot_reaction_events[emoji] = str(reaction_result)
|
||||
except Exception as exc:
|
||||
logger.debug("Matrix: failed to add approval reaction %s: %s", emoji, exc)
|
||||
|
||||
return result
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Pass-through — Matrix supports standard Markdown natively."""
|
||||
# Strip image markdown; media is uploaded separately.
|
||||
@@ -1312,15 +1101,9 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
next_batch = await client.sync_store.get_next_batch()
|
||||
while not self._closing:
|
||||
try:
|
||||
# Wrap in asyncio.wait_for to guard against TCP-level hangs
|
||||
# that the Matrix long-poll timeout cannot catch. Long-poll
|
||||
# is 30s, so 45s gives 15s slack for network drain.
|
||||
sync_data = await asyncio.wait_for(
|
||||
client.sync(
|
||||
since=next_batch,
|
||||
timeout=30000,
|
||||
),
|
||||
timeout=45.0,
|
||||
sync_data = await client.sync(
|
||||
since=next_batch,
|
||||
timeout=30000,
|
||||
)
|
||||
|
||||
# nio returns SyncError objects (not exceptions) for auth
|
||||
@@ -1356,7 +1139,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
await asyncio.gather(*tasks)
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: sync event dispatch error: %s", exc)
|
||||
await self._join_pending_invites(sync_data)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
@@ -1382,92 +1164,13 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
# Event callbacks
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _is_self_sender(self, sender: str) -> bool:
|
||||
"""Return True if the sender refers to the bot's own account.
|
||||
|
||||
Matrix user IDs are byte-compared after trimming whitespace and
|
||||
lowercasing — some homeservers normalize the localpart case
|
||||
differently at different API surfaces, and the reply-loop tail
|
||||
of the "hall of mirrors" bug (#15763) has been observed with the
|
||||
bot's own account bypassing a case-sensitive equality check.
|
||||
|
||||
When ``self._user_id`` is empty (whoami hasn't resolved yet, or
|
||||
login failed), we cannot prove a sender is NOT us, so we return
|
||||
True defensively — an unidentified bot dropping its own events
|
||||
is always preferable to falling into an echo loop.
|
||||
"""
|
||||
own = (self._user_id or "").strip().lower()
|
||||
if not own:
|
||||
return True
|
||||
return sender.strip().lower() == own
|
||||
|
||||
@staticmethod
|
||||
def _is_system_or_bridge_sender(sender: str) -> bool:
|
||||
"""Return True if the sender looks like a system / bridge / appservice
|
||||
identity rather than a real user.
|
||||
|
||||
Appservice namespaces on Matrix conventionally prefix bot / puppet
|
||||
user IDs with an underscore (e.g. ``@_telegram_12345:server``,
|
||||
``@_discord_999:server``, ``@_slack_...:server``). Server-notices
|
||||
bots and bridge-controller bots on many homeservers use the same
|
||||
pattern.
|
||||
|
||||
We treat these as system identities for pairing purposes: they
|
||||
should never be offered a pairing code, because an operator
|
||||
approving the code would hand the bridge itself permanent
|
||||
authorization — and every outbound message relayed by the bridge
|
||||
would then loop back into the agent as an "authorized user
|
||||
message", which is the root of issue #15763.
|
||||
|
||||
Matches:
|
||||
``@_something:server`` — appservice namespace convention
|
||||
``@:server`` — malformed / empty localpart
|
||||
``:server`` — malformed, no leading ``@``
|
||||
"""
|
||||
s = (sender or "").strip()
|
||||
if not s:
|
||||
return True
|
||||
# Localpart is everything between leading '@' and ':'
|
||||
if s.startswith("@"):
|
||||
s = s[1:]
|
||||
if ":" in s:
|
||||
localpart, _, _ = s.partition(":")
|
||||
else:
|
||||
localpart = s
|
||||
if not localpart:
|
||||
return True
|
||||
return localpart.startswith("_")
|
||||
|
||||
async def _on_room_message(self, event: Any) -> None:
|
||||
"""Handle incoming room message events (text, media)."""
|
||||
room_id = str(getattr(event, "room_id", ""))
|
||||
sender = str(getattr(event, "sender", ""))
|
||||
|
||||
# Diagnostic: confirm the callback is firing at all when DEBUG is on.
|
||||
# Helps users troubleshoot silent inbound issues like #5819, #7914, #12614.
|
||||
logger.debug(
|
||||
"Matrix: callback fired — event %s from %s in %s",
|
||||
getattr(event, "event_id", "?"),
|
||||
sender,
|
||||
room_id,
|
||||
)
|
||||
|
||||
# Ignore own messages (case-insensitive; also drops when our own
|
||||
# user_id hasn't been resolved yet — see _is_self_sender docstring
|
||||
# and issue #15763).
|
||||
if self._is_self_sender(sender):
|
||||
return
|
||||
|
||||
# Ignore appservice / bridge / system identities so they never
|
||||
# trigger the pairing flow. Once a bridge user is paired, every
|
||||
# outbound message it relays would loop back as an authorized
|
||||
# user message (the "hall of mirrors" in #15763).
|
||||
if self._is_system_or_bridge_sender(sender):
|
||||
logger.debug(
|
||||
"Matrix: ignoring system/bridge sender %s in %s",
|
||||
sender,
|
||||
room_id,
|
||||
)
|
||||
# Ignore own messages.
|
||||
if sender == self._user_id:
|
||||
return
|
||||
|
||||
# Deduplicate by event ID.
|
||||
@@ -1563,12 +1266,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
in_bot_thread = bool(thread_id and thread_id in self._threads)
|
||||
if self._require_mention and not is_free_room and not in_bot_thread:
|
||||
if not is_mentioned:
|
||||
logger.debug(
|
||||
"Matrix: ignoring message %s in %s — no @mention "
|
||||
"(set MATRIX_REQUIRE_MENTION=false to disable)",
|
||||
event_id,
|
||||
room_id,
|
||||
)
|
||||
return None
|
||||
|
||||
# DM mention-thread.
|
||||
@@ -1581,7 +1278,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
body = self._strip_mention(body)
|
||||
|
||||
# Auto-thread.
|
||||
if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)):
|
||||
if not is_dm and not thread_id and self._auto_thread:
|
||||
thread_id = event_id
|
||||
self._threads.mark(thread_id)
|
||||
|
||||
@@ -1823,9 +1520,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
return
|
||||
body, is_dm, chat_type, thread_id, display_name, source = ctx
|
||||
|
||||
if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
|
||||
body = ""
|
||||
|
||||
allow_http_fallback = bool(http_url) and not is_encrypted_media
|
||||
media_urls = (
|
||||
[cached_path]
|
||||
@@ -1855,35 +1549,13 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"Matrix: invited to %s — joining",
|
||||
room_id,
|
||||
)
|
||||
await self._join_room_by_id(room_id)
|
||||
|
||||
async def _join_room_by_id(self, room_id: str) -> bool:
|
||||
"""Join a room by ID and refresh local caches on success."""
|
||||
if not room_id:
|
||||
return False
|
||||
if room_id in self._joined_rooms:
|
||||
return True
|
||||
try:
|
||||
await self._client.join_room(RoomID(room_id))
|
||||
self._joined_rooms.add(room_id)
|
||||
logger.info("Matrix: joined %s", room_id)
|
||||
await self._refresh_dm_cache()
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: error joining %s: %s", room_id, exc)
|
||||
return False
|
||||
|
||||
async def _join_pending_invites(self, sync_data: Dict[str, Any]) -> None:
|
||||
"""Join rooms still present in rooms.invite after sync processing."""
|
||||
rooms = sync_data.get("rooms", {}) if isinstance(sync_data, dict) else {}
|
||||
invites = rooms.get("invite", {})
|
||||
if not isinstance(invites, dict):
|
||||
return
|
||||
for room_id in invites:
|
||||
if room_id in self._joined_rooms:
|
||||
continue
|
||||
logger.info("Matrix: reconciling pending invite for %s", room_id)
|
||||
await self._join_room_by_id(str(room_id))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Reactions (send, receive, processing lifecycle)
|
||||
@@ -1968,7 +1640,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
async def _on_reaction(self, event: Any) -> None:
|
||||
"""Handle incoming reaction events."""
|
||||
sender = str(getattr(event, "sender", ""))
|
||||
if self._is_self_sender(sender):
|
||||
if sender == self._user_id:
|
||||
return
|
||||
event_id = str(getattr(event, "event_id", ""))
|
||||
if self._is_duplicate_event(event_id):
|
||||
@@ -1998,51 +1670,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
room_id,
|
||||
)
|
||||
|
||||
# Check if this reaction resolves a pending approval prompt.
|
||||
prompt = self._approval_prompts_by_event.get(reacts_to)
|
||||
if prompt and not prompt.resolved:
|
||||
if room_id != prompt.chat_id:
|
||||
return
|
||||
if self._allowed_user_ids and sender not in self._allowed_user_ids:
|
||||
logger.info(
|
||||
"Matrix: ignoring approval reaction from unauthorized user %s on %s",
|
||||
sender, reacts_to,
|
||||
)
|
||||
return
|
||||
choice = self._approval_reaction_map.get(key)
|
||||
if not choice:
|
||||
return
|
||||
try:
|
||||
from tools.approval import resolve_gateway_approval
|
||||
|
||||
count = resolve_gateway_approval(prompt.session_key, choice)
|
||||
if count:
|
||||
prompt.resolved = True
|
||||
self._approval_prompts_by_event.pop(reacts_to, None)
|
||||
self._approval_prompt_by_session.pop(prompt.session_key, None)
|
||||
logger.info(
|
||||
"Matrix reaction resolved %d approval(s) for session %s "
|
||||
"(choice=%s, user=%s)",
|
||||
count, prompt.session_key, choice, sender,
|
||||
)
|
||||
# Redact bot's seed reactions, leaving only the user's
|
||||
await self._redact_bot_approval_reactions(room_id, prompt)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc)
|
||||
|
||||
async def _redact_bot_approval_reactions(
|
||||
self,
|
||||
room_id: str,
|
||||
prompt: "_MatrixApprovalPrompt",
|
||||
) -> None:
|
||||
"""Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction."""
|
||||
for emoji, evt_id in prompt.bot_reaction_events.items():
|
||||
try:
|
||||
await self.redact_message(room_id, evt_id, "approval resolved")
|
||||
logger.debug("Matrix: redacted bot reaction %s (%s)", emoji, evt_id)
|
||||
except Exception as exc:
|
||||
logger.debug("Matrix: failed to redact bot reaction %s: %s", emoji, exc)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text message aggregation (handles Matrix client-side splits)
|
||||
# ------------------------------------------------------------------
|
||||
@@ -2268,7 +1895,11 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
if not self._client or not text:
|
||||
return SendResult(success=False, error="No client or empty text")
|
||||
|
||||
msg_content = self._build_text_message_content(text, msgtype=msgtype)
|
||||
msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
|
||||
html = self._markdown_to_html(text)
|
||||
if html and html != text:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
|
||||
try:
|
||||
event_id = await self._client.send_message_event(
|
||||
@@ -2331,77 +1962,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
# Mention detection helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_text_message_content(self, text: str, msgtype: str = "m.text") -> Dict[str, Any]:
|
||||
"""Build Matrix text content with HTML and outbound mention metadata."""
|
||||
msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
|
||||
mention_user_ids = self._extract_outbound_mentions(text)
|
||||
if mention_user_ids:
|
||||
msg_content["m.mentions"] = {"user_ids": mention_user_ids}
|
||||
|
||||
html_source = self._inject_outbound_mention_links(text)
|
||||
html = self._markdown_to_html(html_source)
|
||||
if html and html != text:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
|
||||
return msg_content
|
||||
|
||||
def _extract_outbound_mentions(self, text: str) -> list[str]:
|
||||
"""Return unique Matrix user IDs mentioned in outbound text."""
|
||||
protected, _ = self._protect_outbound_mention_regions(text)
|
||||
seen: Set[str] = set()
|
||||
mentions: list[str] = []
|
||||
for match in _OUTBOUND_MENTION_RE.finditer(protected):
|
||||
user_id = match.group(1)
|
||||
if user_id not in seen:
|
||||
seen.add(user_id)
|
||||
mentions.append(user_id)
|
||||
return mentions
|
||||
|
||||
def _inject_outbound_mention_links(self, text: str) -> str:
|
||||
"""Wrap outbound Matrix mentions in markdown links outside code spans."""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
protected, placeholders = self._protect_outbound_mention_regions(text)
|
||||
|
||||
linked = _OUTBOUND_MENTION_RE.sub(
|
||||
lambda match: f"[{match.group(1)}](https://matrix.to/#/{match.group(1)})",
|
||||
protected,
|
||||
)
|
||||
|
||||
for idx, original in enumerate(placeholders):
|
||||
linked = linked.replace(f"\x00MENTION_PROTECTED{idx}\x00", original)
|
||||
|
||||
return linked
|
||||
|
||||
def _protect_outbound_mention_regions(self, text: str) -> tuple[str, list[str]]:
|
||||
"""Protect markdown regions where outbound mentions should stay literal."""
|
||||
placeholders: list[str] = []
|
||||
|
||||
def _protect(fragment: str) -> str:
|
||||
idx = len(placeholders)
|
||||
placeholders.append(fragment)
|
||||
return f"\x00MENTION_PROTECTED{idx}\x00"
|
||||
|
||||
protected = re.sub(
|
||||
r"```[\s\S]*?```",
|
||||
lambda match: _protect(match.group(0)),
|
||||
text or "",
|
||||
)
|
||||
protected = re.sub(
|
||||
r"`[^`\n]+`",
|
||||
lambda match: _protect(match.group(0)),
|
||||
protected,
|
||||
)
|
||||
protected = re.sub(
|
||||
r"\[[^\]]+\]\([^)]+\)",
|
||||
lambda match: _protect(match.group(0)),
|
||||
protected,
|
||||
)
|
||||
|
||||
return protected, placeholders
|
||||
|
||||
def _is_bot_mentioned(
|
||||
self,
|
||||
body: str,
|
||||
@@ -2436,33 +1996,13 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
def _strip_mention(self, body: str) -> str:
|
||||
"""Remove explicit bot mentions from message body.
|
||||
"""Strip the bot's full MXID (``@user:server``) from *body*.
|
||||
|
||||
Important: only strip explicit mention tokens (``@user:server`` or
|
||||
``@localpart``). Do NOT strip bare words matching the bot localpart,
|
||||
otherwise normal phrases like "Hermes Agent" become "Agent".
|
||||
The bare localpart is intentionally *not* stripped — it would
|
||||
mangle file paths like ``/home/hermes/media/file.png``.
|
||||
"""
|
||||
if not body:
|
||||
return ""
|
||||
|
||||
# Strip explicit full MXID mentions.
|
||||
if self._user_id:
|
||||
body = body.replace(self._user_id, "")
|
||||
|
||||
# Strip explicit @localpart mentions only (not bare localpart words).
|
||||
if self._user_id and ":" in self._user_id:
|
||||
localpart = self._user_id.split(":")[0].lstrip("@")
|
||||
if localpart:
|
||||
body = re.sub(
|
||||
r'(?<![\w])@' + re.escape(localpart) + r'\b',
|
||||
'',
|
||||
body,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Normalize spacing after mention removal.
|
||||
body = re.sub(r'[ \t]{2,}', ' ', body)
|
||||
body = re.sub(r'\s+([,.;:!?])', r'\1', body)
|
||||
return body.strip()
|
||||
|
||||
async def _get_display_name(self, room_id: str, user_id: str) -> str:
|
||||
|
||||
@@ -19,7 +19,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.helpers import MessageDeduplicator
|
||||
@@ -412,6 +412,7 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
|
||||
import aiohttp
|
||||
|
||||
last_exc = None
|
||||
file_data = None
|
||||
ct = "application/octet-stream"
|
||||
fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
|
||||
@@ -496,100 +497,6 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
return SendResult(success=False, error="Failed to post with file")
|
||||
return SendResult(success=True, message_id=data["id"])
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images as a single Mattermost post with multiple attachments.
|
||||
|
||||
Mattermost supports up to 5 ``file_ids`` per post. Each image is
|
||||
uploaded individually (Mattermost's file API is one-at-a-time),
|
||||
then a single post is created referencing all uploaded file_ids
|
||||
at once. Batches larger than 5 are chunked. Falls back to the
|
||||
base per-image loop on total failure.
|
||||
"""
|
||||
if not images:
|
||||
return
|
||||
|
||||
import mimetypes
|
||||
import aiohttp
|
||||
from urllib.parse import unquote as _unquote
|
||||
|
||||
CHUNK = 5 # Mattermost post file_ids cap
|
||||
chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
|
||||
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
if human_delay > 0 and chunk_idx > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
|
||||
file_ids: List[str] = []
|
||||
caption_parts: List[str] = []
|
||||
try:
|
||||
for image_url, alt_text in chunk:
|
||||
if alt_text:
|
||||
caption_parts.append(alt_text)
|
||||
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
p = Path(local_path)
|
||||
if not p.exists():
|
||||
logger.warning("Mattermost: skipping missing image %s", local_path)
|
||||
continue
|
||||
fname = p.name
|
||||
ct = mimetypes.guess_type(fname)[0] or "image/png"
|
||||
file_data = p.read_bytes()
|
||||
else:
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(image_url):
|
||||
logger.warning("Mattermost: blocked unsafe image URL in batch")
|
||||
continue
|
||||
try:
|
||||
async with self._session.get(
|
||||
image_url, timeout=aiohttp.ClientTimeout(total=30)
|
||||
) as resp:
|
||||
if resp.status >= 400:
|
||||
logger.warning(
|
||||
"Mattermost: failed to download image (HTTP %d): %s",
|
||||
resp.status, image_url[:80],
|
||||
)
|
||||
continue
|
||||
file_data = await resp.read()
|
||||
ct = resp.content_type or "image/png"
|
||||
except Exception as dl_err:
|
||||
logger.warning("Mattermost: download failed for %s: %s", image_url[:80], dl_err)
|
||||
continue
|
||||
fname = image_url.rsplit("/", 1)[-1].split("?")[0] or f"image_{len(file_ids)}.png"
|
||||
|
||||
fid = await self._upload_file(chat_id, file_data, fname, ct)
|
||||
if fid:
|
||||
file_ids.append(fid)
|
||||
|
||||
if not file_ids:
|
||||
continue
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"channel_id": chat_id,
|
||||
"message": "\n".join(caption_parts),
|
||||
"file_ids": file_ids,
|
||||
}
|
||||
logger.info(
|
||||
"Mattermost: sending %d image(s) as single post (chunk %d/%d)",
|
||||
len(file_ids), chunk_idx + 1, len(chunks),
|
||||
)
|
||||
data = await self._api_post("posts", payload)
|
||||
if not data or "id" not in data:
|
||||
logger.warning("Mattermost: multi-image post failed, falling back")
|
||||
await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Mattermost: multi-image send failed (chunk %d/%d), falling back: %s",
|
||||
chunk_idx + 1, len(chunks), e, exc_info=True,
|
||||
)
|
||||
await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# WebSocket
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -535,9 +535,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
quick_disconnect_count = 0
|
||||
else:
|
||||
backoff_idx += 1
|
||||
if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
|
||||
logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
|
||||
return
|
||||
|
||||
except Exception as exc:
|
||||
if not self._running:
|
||||
@@ -976,18 +973,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
if not channel_id:
|
||||
return
|
||||
|
||||
# Apply group_policy ACL — guild channels are group-like contexts.
|
||||
# Without this check any member of any guild the bot is in could
|
||||
# bypass the configured allowlist.
|
||||
guild_id = str(d.get("guild_id", ""))
|
||||
author_id = str(author.get("id", ""))
|
||||
if not self._is_group_allowed(guild_id or channel_id, author_id):
|
||||
logger.debug(
|
||||
"[%s] Guild message blocked by ACL: channel=%s user=%s",
|
||||
self._log_tag, channel_id, author_id,
|
||||
)
|
||||
return
|
||||
|
||||
member = d.get("member") if isinstance(d.get("member"), dict) else {}
|
||||
nick = str(member.get("nick", "")) or str(author.get("username", ""))
|
||||
|
||||
@@ -1044,17 +1029,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
if not guild_id:
|
||||
return
|
||||
|
||||
# Apply dm_policy ACL — guild DMs were previously unauthenticated.
|
||||
# Without this check any member of any guild the bot is in could
|
||||
# bypass the configured allowlist via direct messages.
|
||||
author_id = str(author.get("id", ""))
|
||||
if not self._is_dm_allowed(author_id):
|
||||
logger.debug(
|
||||
"[%s] Guild DM blocked by ACL: guild=%s user=%s",
|
||||
self._log_tag, guild_id, author_id,
|
||||
)
|
||||
return
|
||||
|
||||
text = content
|
||||
att_result = await self._process_attachments(d.get("attachments"))
|
||||
image_urls = att_result["image_urls"]
|
||||
@@ -1980,7 +1954,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self, openid: str, content: str, reply_to: Optional[str] = None
|
||||
) -> SendResult:
|
||||
"""Send text to a C2C user via REST API."""
|
||||
self._next_msg_seq(reply_to or openid)
|
||||
msg_seq = self._next_msg_seq(reply_to or openid)
|
||||
body = self._build_text_body(content, reply_to)
|
||||
if reply_to:
|
||||
body["msg_id"] = reply_to
|
||||
@@ -1993,7 +1967,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self, group_openid: str, content: str, reply_to: Optional[str] = None
|
||||
) -> SendResult:
|
||||
"""Send text to a group via REST API."""
|
||||
self._next_msg_seq(reply_to or group_openid)
|
||||
msg_seq = self._next_msg_seq(reply_to or group_openid)
|
||||
body = self._build_text_body(content, reply_to)
|
||||
if reply_to:
|
||||
body["msg_id"] = reply_to
|
||||
@@ -2158,6 +2132,11 @@ class QQAdapter(BasePlatformAdapter):
|
||||
|
||||
# Route
|
||||
chat_type = self._guess_chat_type(chat_id)
|
||||
target_path = (
|
||||
f"/v2/users/{chat_id}/files"
|
||||
if chat_type == "c2c"
|
||||
else f"/v2/groups/{chat_id}/files"
|
||||
)
|
||||
|
||||
if chat_type == "guild":
|
||||
# Guild channels don't support native media upload in the same way
|
||||
|
||||
+12
-490
@@ -21,7 +21,7 @@ import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Dict, List, Optional, Any
|
||||
from urllib.parse import quote, unquote
|
||||
|
||||
import httpx
|
||||
@@ -31,7 +31,6 @@ from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
@@ -39,17 +38,6 @@ from gateway.platforms.base import (
|
||||
cache_image_from_url,
|
||||
)
|
||||
from gateway.platforms.helpers import redact_phone
|
||||
from gateway.platforms.signal_rate_limit import (
|
||||
SIGNAL_BATCH_PACING_NOTICE_THRESHOLD,
|
||||
SIGNAL_MAX_ATTACHMENTS_PER_MSG,
|
||||
SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
|
||||
SignalRateLimitError,
|
||||
_extract_retry_after_seconds,
|
||||
_format_wait,
|
||||
_is_signal_rate_limit_error,
|
||||
_signal_send_timeout,
|
||||
get_scheduler,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -64,7 +52,6 @@ SSE_RETRY_DELAY_MAX = 60.0
|
||||
HEALTH_CHECK_INTERVAL = 30.0 # seconds between health checks
|
||||
HEALTH_CHECK_STALE_THRESHOLD = 120.0 # seconds without SSE activity before concern
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -175,10 +162,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
"""Signal messenger adapter using signal-cli HTTP daemon."""
|
||||
|
||||
platform = Platform.SIGNAL
|
||||
# Signal has no real edit API for already-sent messages. Mark it explicitly
|
||||
# so streaming suppresses the visible cursor instead of leaving a stale tofu
|
||||
# square behind in chat clients when edit attempts fail.
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.SIGNAL)
|
||||
@@ -505,11 +488,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
if text and mentions:
|
||||
text = _render_mentions(text, mentions)
|
||||
|
||||
# Extract quote (reply-to) context from Signal dataMessage
|
||||
quote_data = data_message.get("quote") or {}
|
||||
reply_to_id = str(quote_data.get("id")) if quote_data.get("id") else None
|
||||
reply_to_text = quote_data.get("text")
|
||||
|
||||
# Process attachments
|
||||
attachments_data = data_message.get("attachments", [])
|
||||
media_urls = []
|
||||
@@ -563,9 +541,7 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
else:
|
||||
timestamp = datetime.now(tz=timezone.utc)
|
||||
|
||||
# Build and dispatch event.
|
||||
# Store raw envelope data in raw_message so on_processing_start/complete
|
||||
# can extract targetAuthor + targetTimestamp for sendReaction.
|
||||
# Build and dispatch event
|
||||
event = MessageEvent(
|
||||
source=source,
|
||||
text=text or "",
|
||||
@@ -573,9 +549,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
media_urls=media_urls,
|
||||
media_types=media_types,
|
||||
timestamp=timestamp,
|
||||
raw_message={"sender": sender, "timestamp_ms": ts_ms},
|
||||
reply_to_message_id=reply_to_id,
|
||||
reply_to_text=reply_to_text,
|
||||
)
|
||||
|
||||
logger.debug("Signal: message from %s in %s: %s",
|
||||
@@ -686,8 +659,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
rpc_id: str = None,
|
||||
*,
|
||||
log_failures: bool = True,
|
||||
raise_on_rate_limit: bool = False,
|
||||
timeout: float = 30.0,
|
||||
) -> Any:
|
||||
"""Send a JSON-RPC 2.0 request to signal-cli daemon.
|
||||
|
||||
@@ -696,11 +667,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
repeated NETWORK_FAILURE spam for unreachable recipients while
|
||||
still preserving visibility for the first occurrence and for
|
||||
unrelated RPCs.
|
||||
|
||||
When ``raise_on_rate_limit=True``, a Signal ``[429]`` /
|
||||
``RateLimitException`` response raises ``SignalRateLimitError``
|
||||
instead of being swallowed — lets callers (multi-attachment send)
|
||||
opt into backoff-retry without changing default behaviour.
|
||||
"""
|
||||
if not self.client:
|
||||
logger.warning("Signal: RPC called but client not connected")
|
||||
@@ -720,28 +686,20 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
resp = await self.client.post(
|
||||
f"{self.http_url}/api/v1/rpc",
|
||||
json=payload,
|
||||
timeout=timeout,
|
||||
timeout=30.0,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if "error" in data:
|
||||
err = data["error"]
|
||||
if raise_on_rate_limit:
|
||||
if _is_signal_rate_limit_error(err):
|
||||
err_msg = str(err.get("message", "")) if isinstance(err, dict) else str(err)
|
||||
retry_after = _extract_retry_after_seconds(err)
|
||||
raise SignalRateLimitError(err_msg, retry_after=retry_after)
|
||||
if log_failures:
|
||||
logger.warning("Signal RPC error (%s): %s", method, err)
|
||||
logger.warning("Signal RPC error (%s): %s", method, data["error"])
|
||||
else:
|
||||
logger.debug("Signal RPC error (%s): %s", method, err)
|
||||
logger.debug("Signal RPC error (%s): %s", method, data["error"])
|
||||
return None
|
||||
|
||||
return data.get("result")
|
||||
|
||||
except SignalRateLimitError:
|
||||
raise
|
||||
except Exception as e:
|
||||
if log_failures:
|
||||
logger.warning("Signal RPC %s failed: %s", method, e)
|
||||
@@ -749,159 +707,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
logger.debug("Signal RPC %s failed: %s", method, e)
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Formatting — markdown → Signal body ranges
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _markdown_to_signal(text: str) -> tuple:
|
||||
"""Convert markdown to plain text + Signal textStyles list.
|
||||
|
||||
Signal doesn't render markdown. Instead it uses ``bodyRanges``
|
||||
(exposed by signal-cli as ``textStyle`` / ``textStyles`` params)
|
||||
with the format ``start:length:STYLE``.
|
||||
|
||||
Positions are measured in **UTF-16 code units** (not Python code
|
||||
points) because that's what the Signal protocol uses.
|
||||
|
||||
Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
|
||||
(Signal's SPOILER style is not currently mapped — no standard
|
||||
markdown syntax for it; would need ``||spoiler||`` parsing.)
|
||||
|
||||
Returns ``(plain_text, styles_list)`` where *styles_list* may be
|
||||
empty if there's nothing to format.
|
||||
"""
|
||||
import re
|
||||
|
||||
def _utf16_len(s: str) -> int:
|
||||
"""Length of *s* in UTF-16 code units."""
|
||||
return len(s.encode("utf-16-le")) // 2
|
||||
|
||||
# Pre-process: normalize whitespace before any position tracking
|
||||
# so later operations don't invalidate recorded offsets.
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
text = text.strip()
|
||||
|
||||
styles: list = []
|
||||
|
||||
# --- Phase 1: fenced code blocks ```...``` → MONOSPACE ---
|
||||
_CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
|
||||
while m := _CB.search(text):
|
||||
inner = m.group(1).rstrip("\n")
|
||||
start = m.start()
|
||||
text = text[: m.start()] + inner + text[m.end() :]
|
||||
styles.append((start, len(inner), "MONOSPACE"))
|
||||
|
||||
# --- Phase 2: heading markers # Foo → Foo (BOLD) ---
|
||||
_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
|
||||
new_text = ""
|
||||
last_end = 0
|
||||
for m in _HEADING.finditer(text):
|
||||
new_text += text[last_end : m.start()]
|
||||
last_end = m.end()
|
||||
eol = text.find("\n", m.end())
|
||||
if eol == -1:
|
||||
eol = len(text)
|
||||
heading_text = text[m.end() : eol]
|
||||
start = len(new_text)
|
||||
new_text += heading_text
|
||||
styles.append((start, len(heading_text), "BOLD"))
|
||||
last_end = eol
|
||||
new_text += text[last_end:]
|
||||
text = new_text
|
||||
|
||||
# --- Phase 3: inline patterns (single-pass to avoid offset drift) ---
|
||||
# The old code processed each pattern sequentially, stripping markers
|
||||
# and recording positions per-pass. Later passes shifted text without
|
||||
# adjusting earlier positions → bold/italic landed mid-word.
|
||||
#
|
||||
# Fix: collect ALL non-overlapping matches first, then strip every
|
||||
# marker in one pass so positions are computed against the final text.
|
||||
_PATTERNS = [
|
||||
(re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
|
||||
(re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
|
||||
(re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
|
||||
(re.compile(r"`(.+?)`"), "MONOSPACE"),
|
||||
(re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
|
||||
(re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
|
||||
]
|
||||
|
||||
# Collect all non-overlapping matches (earlier patterns win ties).
|
||||
all_matches: list = [] # (start, end, g1_start, g1_end, style)
|
||||
occupied: list = [] # (start, end) intervals already claimed
|
||||
for pat, style in _PATTERNS:
|
||||
for m in pat.finditer(text):
|
||||
ms, me = m.start(), m.end()
|
||||
if not any(ms < oe and me > os for os, oe in occupied):
|
||||
all_matches.append((ms, me, m.start(1), m.end(1), style))
|
||||
occupied.append((ms, me))
|
||||
all_matches.sort()
|
||||
|
||||
# Build removal list so we can adjust Phase 1/2 styles.
|
||||
# Each match removes its prefix markers (start..g1_start) and
|
||||
# suffix markers (g1_end..end).
|
||||
removals: list = [] # (position, length) sorted
|
||||
for ms, me, g1s, g1e, _ in all_matches:
|
||||
if g1s > ms:
|
||||
removals.append((ms, g1s - ms))
|
||||
if me > g1e:
|
||||
removals.append((g1e, me - g1e))
|
||||
removals.sort()
|
||||
|
||||
# Adjust Phase 1/2 styles for characters about to be removed.
|
||||
def _adj(pos: int) -> int:
|
||||
shift = 0
|
||||
for rp, rl in removals:
|
||||
if rp < pos:
|
||||
shift += min(rl, pos - rp)
|
||||
else:
|
||||
break
|
||||
return pos - shift
|
||||
|
||||
adjusted_prior: list = []
|
||||
for s, l, st in styles:
|
||||
ns = _adj(s)
|
||||
ne = _adj(s + l)
|
||||
if ne > ns:
|
||||
adjusted_prior.append((ns, ne - ns, st))
|
||||
|
||||
# Strip all inline markers in one pass → positions are correct.
|
||||
result = ""
|
||||
last_end = 0
|
||||
inline_styles: list = []
|
||||
for ms, me, g1s, g1e, sty in all_matches:
|
||||
result += text[last_end:ms]
|
||||
pos = len(result)
|
||||
inner = text[g1s:g1e]
|
||||
result += inner
|
||||
inline_styles.append((pos, len(inner), sty))
|
||||
last_end = me
|
||||
result += text[last_end:]
|
||||
text = result
|
||||
|
||||
styles = adjusted_prior + inline_styles
|
||||
|
||||
# Convert code-point offsets → UTF-16 code-unit offsets
|
||||
style_strings = []
|
||||
for cp_start, cp_len, stype in sorted(styles):
|
||||
# Safety: skip any out-of-bounds styles
|
||||
if cp_start < 0 or cp_start + cp_len > len(text):
|
||||
continue
|
||||
u16_start = _utf16_len(text[:cp_start])
|
||||
u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
|
||||
style_strings.append(f"{u16_start}:{u16_len}:{stype}")
|
||||
|
||||
return text, style_strings
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Strip markdown for plain-text fallback (used by base class).
|
||||
|
||||
The actual rich formatting happens in send() via _markdown_to_signal().
|
||||
"""
|
||||
# This is only called if someone uses the base-class send path.
|
||||
# Our send() override bypasses this entirely.
|
||||
return content
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Sending
|
||||
# ------------------------------------------------------------------
|
||||
@@ -913,22 +718,14 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a text message with native Signal formatting."""
|
||||
"""Send a text message."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
plain_text, text_styles = self._markdown_to_signal(content)
|
||||
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"message": plain_text,
|
||||
"message": content,
|
||||
}
|
||||
|
||||
if text_styles:
|
||||
if len(text_styles) == 1:
|
||||
params["textStyle"] = text_styles[0]
|
||||
else:
|
||||
params["textStyles"] = text_styles
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
@@ -938,10 +735,11 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
|
||||
if result is not None:
|
||||
self._track_sent_timestamp(result)
|
||||
# Signal has no editable message identifier. Returning None keeps the
|
||||
# stream consumer on the non-edit fallback path instead of pretending
|
||||
# future edits can remove an in-progress cursor from the chat thread.
|
||||
return SendResult(success=True, message_id=None)
|
||||
# Use the timestamp from the RPC result as a pseudo message_id.
|
||||
# Signal doesn't have real message IDs, but the stream consumer
|
||||
# needs a truthy value to follow its edit→fallback path correctly.
|
||||
_msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
|
||||
return SendResult(success=True, message_id=_msg_id or None)
|
||||
return SendResult(success=False, error="RPC send failed")
|
||||
|
||||
def _track_sent_timestamp(self, rpc_result) -> None:
|
||||
@@ -1005,178 +803,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
self._typing_failures.pop(chat_id, None)
|
||||
self._typing_skip_until.pop(chat_id, None)
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images via chunked Signal RPC calls.
|
||||
|
||||
Per-image alt texts are dropped — Signal's send RPC only carries
|
||||
one shared message body. Bad images (download failure, missing
|
||||
file, oversize) are skipped with a warning so one bad URL
|
||||
doesn't lose the rest of the batch. ``human_delay`` is ignored:
|
||||
the rate-limit scheduler handles inter-batch pacing.
|
||||
"""
|
||||
if not images:
|
||||
return
|
||||
|
||||
scheduler = get_scheduler()
|
||||
logger.info(
|
||||
"Signal send_multiple_images: received %d image(s) for %s — "
|
||||
"scheduler state: %s",
|
||||
len(images), chat_id[:30], scheduler.state(),
|
||||
)
|
||||
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
attachments: List[str] = []
|
||||
skipped_download = 0
|
||||
skipped_missing = 0
|
||||
skipped_oversize = 0
|
||||
for image_url, _alt_text in images:
|
||||
if image_url.startswith("file://"):
|
||||
file_path = unquote(image_url[7:])
|
||||
else:
|
||||
try:
|
||||
file_path = await cache_image_from_url(image_url)
|
||||
except Exception as e:
|
||||
logger.warning("Signal: failed to download image %s: %s", image_url, e)
|
||||
skipped_download += 1
|
||||
continue
|
||||
|
||||
if not file_path or not Path(file_path).exists():
|
||||
logger.warning("Signal: image file not found for %s", image_url)
|
||||
skipped_missing += 1
|
||||
continue
|
||||
|
||||
file_size = Path(file_path).stat().st_size
|
||||
if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
|
||||
logger.warning(
|
||||
"Signal: image too large (%d bytes), skipping %s", file_size, image_url
|
||||
)
|
||||
skipped_oversize += 1
|
||||
continue
|
||||
|
||||
attachments.append(file_path)
|
||||
|
||||
if not attachments:
|
||||
logger.error(
|
||||
"Signal: no valid images in batch of %d "
|
||||
"(download=%d missing=%d oversize=%d)",
|
||||
len(images), skipped_download, skipped_missing, skipped_oversize,
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Signal send_multiple_images: %d/%d images valid, sending in chunks",
|
||||
len(attachments), len(images),
|
||||
)
|
||||
|
||||
base_params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"message": "",
|
||||
}
|
||||
if chat_id.startswith("group:"):
|
||||
base_params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
base_params["recipient"] = [await self._resolve_recipient(chat_id)]
|
||||
|
||||
att_batches = [
|
||||
attachments[i:i + SIGNAL_MAX_ATTACHMENTS_PER_MSG]
|
||||
for i in range(0, len(attachments), SIGNAL_MAX_ATTACHMENTS_PER_MSG)
|
||||
]
|
||||
|
||||
for idx, att_batch in enumerate(att_batches):
|
||||
n = len(att_batch)
|
||||
estimated = scheduler.estimate_wait(n)
|
||||
logger.debug(
|
||||
"Signal batch %d/%d: %d attachments, estimated wait=%.1fs",
|
||||
idx + 1, len(att_batches), n, estimated,
|
||||
)
|
||||
if estimated >= SIGNAL_BATCH_PACING_NOTICE_THRESHOLD:
|
||||
await self._notify_batch_pacing(
|
||||
chat_id, idx + 1, len(att_batches), estimated
|
||||
)
|
||||
|
||||
params = dict(base_params, attachments=att_batch)
|
||||
send_timeout = _signal_send_timeout(n)
|
||||
|
||||
for attempt in range(1, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS + 1):
|
||||
await scheduler.acquire(n)
|
||||
try:
|
||||
_rpc_t0 = time.monotonic()
|
||||
result = await self._rpc(
|
||||
"send", params, raise_on_rate_limit=True, timeout=send_timeout,
|
||||
)
|
||||
_rpc_duration = time.monotonic() - _rpc_t0
|
||||
if result is not None:
|
||||
self._track_sent_timestamp(result)
|
||||
await scheduler.report_rpc_duration(_rpc_duration, n)
|
||||
logger.info(
|
||||
"Signal batch %d/%d: %d attachments sent in %.1fs "
|
||||
"(attempt %d/%d)",
|
||||
idx + 1, len(att_batches), n, _rpc_duration,
|
||||
attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
|
||||
)
|
||||
else:
|
||||
# Assume the server didn't accept the batch, don't deduce tokens
|
||||
logger.error(
|
||||
"Signal: RPC send failed for batch %d/%d (%d attachments, "
|
||||
"attempt %d/%d, rpc_duration=%.1fs)",
|
||||
idx + 1, len(att_batches), n,
|
||||
attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
|
||||
_rpc_duration,
|
||||
)
|
||||
# Retry transient (non-rate-limit) failures once
|
||||
if attempt < SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
|
||||
backoff = 2.0 ** attempt
|
||||
logger.info(
|
||||
"Signal: retrying batch %d/%d after %.1fs backoff",
|
||||
idx + 1, len(att_batches), backoff,
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
continue
|
||||
break
|
||||
except SignalRateLimitError as e:
|
||||
scheduler.feedback(e.retry_after, n)
|
||||
if attempt >= SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
|
||||
logger.error(
|
||||
"Signal: rate-limit retries exhausted on batch %d/%d "
|
||||
"(%d attachments lost, server retry_after=%s)",
|
||||
idx + 1, len(att_batches), n,
|
||||
f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
|
||||
)
|
||||
break
|
||||
logger.warning(
|
||||
"Signal: rate-limited on batch %d/%d "
|
||||
"(attempt %d/%d, server retry_after=%s); "
|
||||
"scheduler will pace the retry",
|
||||
idx + 1, len(att_batches),
|
||||
attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
|
||||
f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
|
||||
)
|
||||
|
||||
async def _notify_batch_pacing(
|
||||
self,
|
||||
chat_id: str,
|
||||
next_batch_idx: int,
|
||||
total_batches: int,
|
||||
wait_s: float,
|
||||
) -> None:
|
||||
"""Inform the user when an inter-batch pacing wait crosses the
|
||||
notice threshold. Best-effort; logs and continues on failure."""
|
||||
try:
|
||||
await self.send(
|
||||
chat_id,
|
||||
f"(More images coming — pausing ~{_format_wait(wait_s)} "
|
||||
f"for Signal rate limit, batch {next_batch_idx}/{total_batches}.)",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Signal: failed to send pacing notice: %s", e)
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -1337,110 +963,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
_keep_typing finally block to clean up platform-level typing tasks."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Reactions
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send_reaction(
|
||||
self,
|
||||
chat_id: str,
|
||||
emoji: str,
|
||||
target_author: str,
|
||||
target_timestamp: int,
|
||||
) -> bool:
|
||||
"""Send a reaction emoji to a specific message via signal-cli RPC.
|
||||
|
||||
Args:
|
||||
chat_id: The chat (phone number or "group:<id>")
|
||||
emoji: Reaction emoji string (e.g. "👀", "✅")
|
||||
target_author: Phone number / UUID of the message author
|
||||
target_timestamp: Signal timestamp (ms) of the message to react to
|
||||
"""
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"emoji": emoji,
|
||||
"targetAuthor": target_author,
|
||||
"targetTimestamp": target_timestamp,
|
||||
}
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("sendReaction", params)
|
||||
if result is not None:
|
||||
return True
|
||||
logger.debug("Signal: sendReaction failed (chat=%s, emoji=%s)", chat_id[:20], emoji)
|
||||
return False
|
||||
|
||||
async def remove_reaction(
|
||||
self,
|
||||
chat_id: str,
|
||||
target_author: str,
|
||||
target_timestamp: int,
|
||||
) -> bool:
|
||||
"""Remove a reaction by sending an empty-string emoji."""
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"emoji": "",
|
||||
"targetAuthor": target_author,
|
||||
"targetTimestamp": target_timestamp,
|
||||
"remove": True,
|
||||
}
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("sendReaction", params)
|
||||
return result is not None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Processing Lifecycle Hooks (reactions as progress indicators)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_reaction_target(self, event: MessageEvent) -> Optional[tuple]:
|
||||
"""Extract (target_author, target_timestamp) from a MessageEvent.
|
||||
|
||||
Returns None if the event doesn't carry the raw Signal envelope data
|
||||
needed for sendReaction.
|
||||
"""
|
||||
raw = event.raw_message
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
author = raw.get("sender")
|
||||
ts = raw.get("timestamp_ms")
|
||||
if not author or not ts:
|
||||
return None
|
||||
return (author, ts)
|
||||
|
||||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""React with 👀 when processing begins."""
|
||||
target = self._extract_reaction_target(event)
|
||||
if target:
|
||||
await self.send_reaction(event.source.chat_id, "👀", *target)
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: "ProcessingOutcome") -> None:
|
||||
"""Swap the 👀 reaction for ✅ (success) or ❌ (failure).
|
||||
|
||||
On CANCELLED we leave the 👀 in place — no terminal outcome means
|
||||
the reaction should keep reflecting "in progress" (matches Telegram).
|
||||
"""
|
||||
if outcome == ProcessingOutcome.CANCELLED:
|
||||
return
|
||||
target = self._extract_reaction_target(event)
|
||||
if not target:
|
||||
return
|
||||
chat_id = event.source.chat_id
|
||||
# Remove the in-progress reaction, then add the final one
|
||||
await self.remove_reaction(chat_id, *target)
|
||||
if outcome == ProcessingOutcome.SUCCESS:
|
||||
await self.send_reaction(chat_id, "✅", *target)
|
||||
elif outcome == ProcessingOutcome.FAILURE:
|
||||
await self.send_reaction(chat_id, "❌", *target)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat Info
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -1,369 +0,0 @@
|
||||
"""
|
||||
Signal attachment rate-limit scheduler.
|
||||
|
||||
Process-wide token-bucket simulator that mirrors the per-account
|
||||
attachment rate limit signal-cli/Signal-Server enforce. Producers
|
||||
(``SignalAdapter.send_multiple_images`` and the ``send_message`` tool's
|
||||
Signal path) call ``acquire(n)`` before an attachment send; on a 429
|
||||
they call ``feedback(retry_after, n)`` so the model recalibrates from
|
||||
the server's authoritative hint.
|
||||
|
||||
The scheduler serializes concurrent calls through an ``asyncio.Lock``,
|
||||
giving FIFO fairness across agent sessions sharing one signal-cli
|
||||
daemon.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SIGNAL_MAX_ATTACHMENTS_PER_MSG = 32 # per-message attachment cap (source: Signal-{Android,Desktop} source code)
|
||||
SIGNAL_RATE_LIMIT_BUCKET_CAPACITY = 50 # server-side token-bucket capacity for attachments rate limiting
|
||||
SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER = 4 # fallback token refill interval for signal-cli < v0.14.3
|
||||
SIGNAL_RATE_LIMIT_MAX_ATTEMPTS = 2 # initial attempt + 1 retry
|
||||
SIGNAL_BATCH_PACING_NOTICE_THRESHOLD = 10.0 # if estimated waiting time > 10s, notify the user about the delay
|
||||
SIGNAL_RPC_ERROR_RATELIMIT = -5 # signal-cli (v0.14.3+) JSON-RPC error code for RateLimitException
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SignalRateLimitError(Exception):
|
||||
"""
|
||||
Raised by ``SignalAdapter._rpc`` for rate-limit responses when the
|
||||
caller has opted in via ``raise_on_rate_limit=True``.
|
||||
|
||||
Carries the server-supplied per-token Retry-After (in seconds) on
|
||||
signal-cli ≥ v0.14.3
|
||||
``retry_after`` is None when the version doesn't expose it.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
|
||||
super().__init__(message)
|
||||
self.retry_after = retry_after
|
||||
|
||||
|
||||
class SignalSchedulerError(Exception):
|
||||
pass
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Detection helpers — used to fish a 429 out of signal-cli's various error
|
||||
# shapes (typed code, [429] substring, libsignal-net RetryLaterException
|
||||
# leaked through AttachmentInvalidException).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# "Retry after 4 seconds" / "retry after 4 second" — libsignal-net's
|
||||
# RetryLaterException string form, surfaced when 429s hit during
|
||||
# attachment upload (signal-cli wraps these as AttachmentInvalidException
|
||||
# rather than RateLimitException, so the typed path doesn't fire).
|
||||
_RETRY_AFTER_RE = re.compile(r"Retry after (\d+(?:\.\d+)?)\s*second", re.IGNORECASE)
|
||||
|
||||
|
||||
def _extract_retry_after_seconds(err: Any) -> Optional[float]:
|
||||
"""Pull the per-token Retry-After window from a signal-cli rate-limit error.
|
||||
|
||||
Tries two sources, in order:
|
||||
1. ``error.data.response.results[*].retryAfterSeconds`` — the
|
||||
structured field signal-cli ≥ v0.14.3 surfaces for plain
|
||||
RateLimitException.
|
||||
2. ``"Retry after N seconds"`` parsed out of the message — covers
|
||||
libsignal-net's RetryLaterException that gets wrapped as
|
||||
AttachmentInvalidException during attachment upload, where the
|
||||
structured field stays null.
|
||||
|
||||
Returns None when neither yields a value.
|
||||
"""
|
||||
msg = ""
|
||||
if isinstance(err, dict):
|
||||
data = err.get("data") or {}
|
||||
response = data.get("response") or {}
|
||||
results = response.get("results") or []
|
||||
candidates = [
|
||||
r.get("retryAfterSeconds") for r in results
|
||||
if isinstance(r, dict) and r.get("retryAfterSeconds")
|
||||
]
|
||||
if candidates:
|
||||
return float(max(candidates))
|
||||
msg = str(err.get("message", ""))
|
||||
else:
|
||||
msg = str(err)
|
||||
match = _RETRY_AFTER_RE.search(msg)
|
||||
return float(match.group(1)) if match else None
|
||||
|
||||
|
||||
def _is_signal_rate_limit_error(err: Any) -> bool:
|
||||
"""True if a signal-cli RPC error reflects a rate-limit failure.
|
||||
|
||||
Matches three layers:
|
||||
- typed ``RATELIMIT_ERROR`` code (signal-cli ≥ v0.14.3, plain
|
||||
RateLimitException)
|
||||
- legacy ``[429] / RateLimitException`` substrings
|
||||
- libsignal-net's ``RetryLaterException`` / ``Retry after N seconds``
|
||||
surfaced inside ``AttachmentInvalidException`` when the rate
|
||||
limit is hit during attachment upload — signal-cli never re-tags
|
||||
these as RateLimitException, so substring is the only signal.
|
||||
"""
|
||||
if isinstance(err, dict) and err.get("code") == SIGNAL_RPC_ERROR_RATELIMIT:
|
||||
return True
|
||||
|
||||
message = (
|
||||
str(err.get("message", ""))
|
||||
if isinstance(err, dict)
|
||||
else str(err)
|
||||
)
|
||||
msg_lower = message.lower()
|
||||
return (
|
||||
"[429]" in message
|
||||
or "ratelimit" in msg_lower
|
||||
or "retrylaterexception" in msg_lower
|
||||
or "retry after" in msg_lower
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Misc helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _format_wait(seconds: float) -> str:
|
||||
"""Human-friendly wait label for user-facing pacing notices."""
|
||||
s = max(0.0, seconds)
|
||||
if s < 90:
|
||||
return f"{int(round(s))}s"
|
||||
return f"{max(1, int(round(s / 60)))} min"
|
||||
|
||||
|
||||
def _signal_send_timeout(num_attachments: int) -> float:
|
||||
"""HTTP timeout for a Signal ``send`` RPC.
|
||||
|
||||
signal-cli uploads attachments serially during the call, so the
|
||||
server-side time scales with batch size. Default 30s is fine for
|
||||
text-only sends but truncates large attachment batches mid-upload —
|
||||
we then log a phantom failure even though signal-cli completes the
|
||||
send a few seconds later. Scale at 5s/attachment with a 60s floor.
|
||||
"""
|
||||
if num_attachments <= 0:
|
||||
return 30.0
|
||||
return max(60.0, 5.0 * num_attachments)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scheduler
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SignalAttachmentScheduler:
|
||||
"""Process-wide token-bucket simulator for Signal attachment sends.
|
||||
|
||||
The bucket holds up to ``capacity`` tokens (default 50, matching
|
||||
Signal's server-side rate-limit bucket size). Each attachment consumes one
|
||||
token. Tokens refill at ``refill_rate`` tokens/second, calibrated
|
||||
from the per-token Retry-After hint we get from the server when a
|
||||
429 fires. Until we've observed one, we use the documented default
|
||||
(1 token / 4 seconds).
|
||||
|
||||
Concurrent ``acquire(n)`` calls serialize through an
|
||||
``asyncio.Lock`` — natural FIFO across agent sessions hitting the
|
||||
same daemon.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
capacity: float = float(SIGNAL_RATE_LIMIT_BUCKET_CAPACITY),
|
||||
default_retry_after: float = float(SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER),
|
||||
) -> None:
|
||||
self.capacity = float(capacity)
|
||||
self.tokens = float(capacity)
|
||||
self.refill_rate = 1.0 / float(default_retry_after)
|
||||
self.last_refill = time.monotonic()
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internals
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _refill(self) -> None:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
if elapsed > 0 and self.tokens < self.capacity:
|
||||
self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate)
|
||||
self.last_refill = now
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def estimate_wait(self, n: int) -> float:
|
||||
"""Best-effort estimate of the seconds until ``n`` tokens would
|
||||
be available. Used to decide whether to emit a user-facing
|
||||
pacing notice *before* committing to an ``acquire`` that may
|
||||
block silently. Lock-free; small races vs. concurrent acquires
|
||||
are benign for an informational notice.
|
||||
"""
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
projected = self.tokens
|
||||
if elapsed > 0 and projected < self.capacity:
|
||||
projected = min(self.capacity, projected + elapsed * self.refill_rate)
|
||||
deficit = n - projected
|
||||
if deficit <= 0:
|
||||
return 0.0
|
||||
return deficit / self.refill_rate
|
||||
|
||||
async def acquire(self, n: int) -> float:
|
||||
"""Block until at least ``n`` tokens are available, return the
|
||||
seconds slept.
|
||||
|
||||
Does **not** deduct tokens — the bucket is a read-only model of
|
||||
server-side capacity. Call ``report_rpc_duration()`` after the
|
||||
RPC to synchronise the model with the server timeline.
|
||||
|
||||
Not perfect in case lots of coroutines try to acquire for big
|
||||
uploads (``report_rpc_duration`` will take a long time to get hit)
|
||||
but this is just a simulation. Signal server is ground truth and
|
||||
will raise rate-limit exceptions triggering requeues.
|
||||
|
||||
The lock is released during ``asyncio.sleep`` so other callers
|
||||
can interleave. A retry loop re-checks after each sleep in
|
||||
case the deadline was pessimistic.
|
||||
"""
|
||||
if n <= 0:
|
||||
return 0.0
|
||||
if n > self.capacity:
|
||||
raise SignalSchedulerError(
|
||||
f"Signal scheduler was called requesting {n} tokens "
|
||||
f"(max is {self.capacity})",
|
||||
)
|
||||
|
||||
total_slept = 0.0
|
||||
first_pass = True
|
||||
while True:
|
||||
async with self._lock:
|
||||
self._refill()
|
||||
if self.tokens >= n:
|
||||
if not first_pass or total_slept > 0:
|
||||
logger.debug(
|
||||
"Signal scheduler: tokens sufficient for %d "
|
||||
"(remaining=%.1f, total_slept=%.1fs)",
|
||||
n, self.tokens, total_slept,
|
||||
)
|
||||
return total_slept
|
||||
deficit = n - self.tokens
|
||||
wait = deficit / self.refill_rate
|
||||
if first_pass:
|
||||
logger.info(
|
||||
"Signal scheduler: pausing %.1fs for %d tokens "
|
||||
"(available=%.1f, deficit=%.1f, refill=%.4f/s ≈ %.1fs/token)",
|
||||
wait, n, self.tokens, deficit,
|
||||
self.refill_rate, 1.0 / self.refill_rate,
|
||||
)
|
||||
first_pass = False
|
||||
await asyncio.sleep(wait)
|
||||
total_slept += wait
|
||||
|
||||
async def report_rpc_duration(self, rpc_duration: float, n_attachments: int) -> None:
|
||||
"""Record an attachment-send RPC that just completed.
|
||||
|
||||
Deducts ``n_attachments`` tokens without crediting refill during
|
||||
the upload window. Signal's server checks the bucket at RPC start
|
||||
and does *not* refill during request processing — refill resumes
|
||||
after the response. Crediting upload-time refill causes cumulative
|
||||
drift that eventually triggers 429s.
|
||||
|
||||
Advances ``last_refill`` so the next ``acquire`` / ``_refill``
|
||||
starts counting from this point.
|
||||
"""
|
||||
if n_attachments <= 0:
|
||||
return
|
||||
|
||||
async with self._lock:
|
||||
now = time.monotonic()
|
||||
token_before = self.tokens
|
||||
self.tokens = max(0.0, token_before - float(n_attachments))
|
||||
self.last_refill = now
|
||||
logger.log(
|
||||
logging.INFO if rpc_duration > 10 and n_attachments > 5 else logging.DEBUG,
|
||||
"Signal scheduler: RPC for %d att took %.1fs — "
|
||||
"tokens %.1f → %.1f (deducted=%d, no upload refill credited, refill=%.4fs⁻¹)",
|
||||
n_attachments, rpc_duration,
|
||||
token_before, self.tokens,
|
||||
n_attachments, self.refill_rate,
|
||||
)
|
||||
|
||||
def feedback(self, retry_after: Optional[float], n_attempted: int) -> None:
|
||||
"""Apply server feedback after a 429.
|
||||
|
||||
``retry_after`` is the per-*token* refill window the server
|
||||
reports (None when signal-cli is older than v0.14.3 and didn't
|
||||
surface it).
|
||||
|
||||
When present we calibrate ``refill_rate`` from it:
|
||||
the server is authoritative.
|
||||
"""
|
||||
if retry_after and retry_after > 0:
|
||||
new_rate = 1.0 / float(retry_after)
|
||||
if new_rate != self.refill_rate:
|
||||
logger.info(
|
||||
"Signal scheduler: calibrating refill_rate to %.4f tokens/sec "
|
||||
"(server retry_after=%.1fs per token)",
|
||||
new_rate, retry_after,
|
||||
)
|
||||
self.refill_rate = new_rate
|
||||
self.tokens = 0.0
|
||||
self.last_refill = time.monotonic()
|
||||
|
||||
def state(self) -> dict:
|
||||
"""Return current scheduler state for diagnostic logging (read-only).
|
||||
|
||||
Does not advance ``last_refill`` — safe to call from logging paths
|
||||
without perturbing the bucket.
|
||||
"""
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
projected = self.tokens
|
||||
if elapsed > 0 and projected < self.capacity:
|
||||
projected = min(self.capacity, projected + elapsed * self.refill_rate)
|
||||
return {
|
||||
"tokens": round(projected, 1),
|
||||
"capacity": int(self.capacity),
|
||||
"refill_rate": round(self.refill_rate, 4),
|
||||
"refill_seconds_per_token": round(1.0 / self.refill_rate, 1) if self.refill_rate > 0 else float("inf"),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-wide singleton
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_scheduler: Optional[SignalAttachmentScheduler] = None
|
||||
|
||||
|
||||
def get_scheduler() -> SignalAttachmentScheduler:
|
||||
"""Return the process-wide scheduler, creating it on first access."""
|
||||
global _scheduler
|
||||
if _scheduler is None:
|
||||
_scheduler = SignalAttachmentScheduler()
|
||||
logger.info(
|
||||
"Signal scheduler: created (capacity=%d tokens, refill=%.4f/s ≈ %.1fs/token)",
|
||||
int(_scheduler.capacity),
|
||||
_scheduler.refill_rate,
|
||||
1.0 / _scheduler.refill_rate,
|
||||
)
|
||||
return _scheduler
|
||||
|
||||
|
||||
def _reset_scheduler() -> None:
|
||||
"""Drop the cached scheduler so the next ``get_scheduler`` call
|
||||
builds a fresh one. Test-only — never call from production paths."""
|
||||
global _scheduler
|
||||
_scheduler = None
|
||||
+70
-1029
File diff suppressed because it is too large
Load Diff
+31
-385
@@ -84,7 +84,6 @@ from gateway.platforms.telegram_network import (
|
||||
discover_fallback_ips,
|
||||
parse_fallback_ip_env,
|
||||
)
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
def check_telegram_requirements() -> bool:
|
||||
@@ -123,12 +122,12 @@ def _strip_mdv2(text: str) -> str:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown table → Telegram-friendly row groups
|
||||
# Markdown table → code block conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
|
||||
# so pipe tables render as noisy backslash-pipe text with no alignment.
|
||||
# Reformating each row into a bold heading plus bullet list keeps the content
|
||||
# readable on mobile clients while preserving the source data.
|
||||
# Wrapping the table in a fenced code block makes Telegram render it as
|
||||
# monospace preformatted text with columns intact.
|
||||
|
||||
# Matches a GFM table delimiter row: optional outer pipes, cells containing
|
||||
# only dashes (with optional leading/trailing colons for alignment) separated
|
||||
@@ -145,49 +144,13 @@ def _is_table_row(line: str) -> bool:
|
||||
return bool(stripped) and '|' in stripped
|
||||
|
||||
|
||||
def _split_markdown_table_row(line: str) -> list[str]:
|
||||
"""Split a simple GFM table row into stripped cell values."""
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("|"):
|
||||
stripped = stripped[1:]
|
||||
if stripped.endswith("|"):
|
||||
stripped = stripped[:-1]
|
||||
return [cell.strip() for cell in stripped.split("|")]
|
||||
|
||||
|
||||
def _render_table_block_for_telegram(table_block: list[str]) -> str:
|
||||
"""Render a detected GFM table as Telegram-friendly row groups."""
|
||||
if len(table_block) < 3:
|
||||
return "\n".join(table_block)
|
||||
|
||||
headers = _split_markdown_table_row(table_block[0])
|
||||
if len(headers) < 2:
|
||||
return "\n".join(table_block)
|
||||
|
||||
rendered_rows: list[str] = []
|
||||
for index, row in enumerate(table_block[2:], start=1):
|
||||
cells = _split_markdown_table_row(row)
|
||||
if len(cells) < len(headers):
|
||||
cells.extend([""] * (len(headers) - len(cells)))
|
||||
elif len(cells) > len(headers):
|
||||
cells = cells[: len(headers)]
|
||||
|
||||
heading = next((cell for cell in cells if cell), f"Row {index}")
|
||||
rendered_rows.append(f"**{heading}**")
|
||||
rendered_rows.extend(
|
||||
f"• {header}: {value}" for header, value in zip(headers, cells)
|
||||
)
|
||||
|
||||
return "\n\n".join(rendered_rows)
|
||||
|
||||
|
||||
def _wrap_markdown_tables(text: str) -> str:
|
||||
"""Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.
|
||||
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
|
||||
|
||||
Detected by a row containing '|' immediately followed by a delimiter
|
||||
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
|
||||
non-blank lines are consumed as the table body and rewritten as
|
||||
per-row bullet groups. Tables inside existing fenced code blocks are left
|
||||
non-blank lines are consumed as the table body and included in the
|
||||
wrapped block. Tables inside existing fenced code blocks are left
|
||||
alone.
|
||||
"""
|
||||
if '|' not in text or '-' not in text:
|
||||
@@ -224,7 +187,9 @@ def _wrap_markdown_tables(text: str) -> str:
|
||||
while j < len(lines) and _is_table_row(lines[j]):
|
||||
table_block.append(lines[j])
|
||||
j += 1
|
||||
out.append(_render_table_block_for_telegram(table_block))
|
||||
out.append('```')
|
||||
out.extend(table_block)
|
||||
out.append('```')
|
||||
i = j
|
||||
continue
|
||||
|
||||
@@ -237,14 +202,14 @@ def _wrap_markdown_tables(text: str) -> str:
|
||||
class TelegramAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
Telegram bot adapter.
|
||||
|
||||
|
||||
Handles:
|
||||
- Receiving messages from users and groups
|
||||
- Sending responses with Telegram markdown
|
||||
- Forum topics (thread_id support)
|
||||
- Media messages
|
||||
"""
|
||||
|
||||
|
||||
# Telegram message limits
|
||||
MAX_MESSAGE_LENGTH = 4096
|
||||
# Threshold for detecting Telegram client-side message splits.
|
||||
@@ -252,7 +217,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
_SPLIT_THRESHOLD = 4000
|
||||
MEDIA_GROUP_WAIT_SECONDS = 0.8
|
||||
_GENERAL_TOPIC_THREAD_ID = "1"
|
||||
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.TELEGRAM)
|
||||
self._app: Optional[Application] = None
|
||||
@@ -286,9 +251,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self._model_picker_state: Dict[str, dict] = {}
|
||||
# Approval button state: message_id → session_key
|
||||
self._approval_state: Dict[int, str] = {}
|
||||
# Slash-confirm button state: confirm_id → session_key (for /reload-mcp
|
||||
# and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
|
||||
self._slash_confirm_state: Dict[str, str] = {}
|
||||
|
||||
@staticmethod
|
||||
def _is_callback_user_authorized(user_id: str) -> bool:
|
||||
@@ -372,49 +334,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
|
||||
return {"disable_web_page_preview": True}
|
||||
|
||||
async def _drain_polling_connections(self) -> None:
|
||||
"""Reset the httpx connection pool used for getUpdates polling.
|
||||
|
||||
Network errors (especially through proxies like sing-box) can leave
|
||||
httpx connections in a half-closed state that still occupy pool slots.
|
||||
After enough reconnect cycles the pool fills up entirely, causing
|
||||
``Pool timeout: All connections in the connection pool are occupied.``
|
||||
|
||||
We reset ONLY ``_request[0]`` (the getUpdates request) — the general
|
||||
request (``_request[1]``) is left untouched so concurrent
|
||||
``send_message`` / ``edit_message`` calls are never interrupted.
|
||||
|
||||
Implementation note: accesses ``Bot._request[0]`` which is the
|
||||
get-updates ``BaseRequest`` in the PTB 22.x internal tuple
|
||||
``(get_updates_request, general_request)``. There is no public
|
||||
accessor for the polling request; review if upgrading to PTB 23+.
|
||||
"""
|
||||
if not (self._app and self._app.bot):
|
||||
return
|
||||
try:
|
||||
# PTB 22.x: _request is a (get_updates, general) tuple;
|
||||
# no public accessor exists for the polling request.
|
||||
polling_req = self._app.bot._request[0] # noqa: SLF001
|
||||
except Exception:
|
||||
return
|
||||
try:
|
||||
await polling_req.shutdown()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Polling request shutdown failed (non-fatal)",
|
||||
self.name, exc_info=True,
|
||||
)
|
||||
try:
|
||||
await polling_req.initialize()
|
||||
logger.debug(
|
||||
"[%s] Polling request pool drained before reconnect", self.name
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Polling request re-initialize failed (non-fatal)",
|
||||
self.name, exc_info=True,
|
||||
)
|
||||
|
||||
async def _handle_polling_network_error(self, error: Exception) -> None:
|
||||
"""Reconnect polling after a transient network interruption.
|
||||
|
||||
@@ -460,8 +379,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await self._drain_polling_connections()
|
||||
|
||||
try:
|
||||
await self._app.updater.start_polling(
|
||||
allowed_updates=Update.ALL_TYPES,
|
||||
@@ -509,7 +426,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.sleep(RETRY_DELAY)
|
||||
await self._drain_polling_connections()
|
||||
try:
|
||||
await self._app.updater.start_polling(
|
||||
allowed_updates=Update.ALL_TYPES,
|
||||
@@ -638,7 +554,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, config_path)
|
||||
os.replace(tmp_path, config_path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -787,6 +703,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
|
||||
}
|
||||
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
|
||||
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
|
||||
fallback_ips = self._fallback_ips()
|
||||
if not fallback_ips:
|
||||
@@ -797,8 +714,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
|
||||
proxy_targets = ["api.telegram.org", *fallback_ips]
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
|
||||
if fallback_ips and not proxy_url and not disable_fallback:
|
||||
logger.info(
|
||||
"[%s] Telegram fallback IPs active: %s",
|
||||
@@ -997,7 +912,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self._set_fatal_error("telegram_connect_error", message, retryable=True)
|
||||
logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Stop polling/webhook, cancel pending album flushes, and disconnect."""
|
||||
pending_media_group_tasks = list(self._media_group_tasks.values())
|
||||
@@ -1293,31 +1208,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def delete_message(self, chat_id: str, message_id: str) -> bool:
|
||||
"""Delete a previously sent Telegram message.
|
||||
|
||||
Used by the stream consumer's fresh-final cleanup path (ported
|
||||
from openclaw/openclaw#72038) to remove long-lived preview
|
||||
messages after sending the completed reply as a fresh message.
|
||||
Telegram's Bot API ``deleteMessage`` works for bot-posted
|
||||
messages in the last 48 hours. Failures are non-fatal — the
|
||||
caller leaves the preview in place and logs at debug level.
|
||||
"""
|
||||
if not self._bot:
|
||||
return False
|
||||
try:
|
||||
await self._bot.delete_message(
|
||||
chat_id=int(chat_id),
|
||||
message_id=int(message_id),
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"[%s] Failed to delete Telegram message %s: %s",
|
||||
self.name, message_id, e,
|
||||
)
|
||||
return False
|
||||
|
||||
async def send_update_prompt(
|
||||
self, chat_id: str, prompt: str, default: str = "",
|
||||
session_key: str = "",
|
||||
@@ -1414,48 +1304,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_slash_confirm(
|
||||
self, chat_id: str, title: str, message: str, session_key: str,
|
||||
confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Render a three-button slash-command confirmation prompt."""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
# Message body: render as plain text (message already contains
|
||||
# markdown formatting from the gateway primitive).
|
||||
preview = message if len(message) <= 3800 else message[:3800] + "..."
|
||||
|
||||
keyboard = InlineKeyboardMarkup([
|
||||
[
|
||||
InlineKeyboardButton("✅ Approve Once", callback_data=f"sc:once:{confirm_id}"),
|
||||
InlineKeyboardButton("🔒 Always Approve", callback_data=f"sc:always:{confirm_id}"),
|
||||
],
|
||||
[
|
||||
InlineKeyboardButton("❌ Cancel", callback_data=f"sc:cancel:{confirm_id}"),
|
||||
],
|
||||
])
|
||||
|
||||
thread_id = self._metadata_thread_id(metadata)
|
||||
kwargs: Dict[str, Any] = {
|
||||
"chat_id": int(chat_id),
|
||||
"text": preview,
|
||||
"parse_mode": ParseMode.MARKDOWN,
|
||||
"reply_markup": keyboard,
|
||||
**self._link_preview_kwargs(),
|
||||
}
|
||||
message_thread_id = self._message_thread_id_for_send(thread_id)
|
||||
if message_thread_id is not None:
|
||||
kwargs["message_thread_id"] = message_thread_id
|
||||
|
||||
msg = await self._bot.send_message(**kwargs)
|
||||
self._slash_confirm_state[confirm_id] = session_key
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_model_picker(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -1824,68 +1672,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
|
||||
return
|
||||
|
||||
# --- Slash-confirm callbacks (sc:choice:confirm_id) ---
|
||||
if data.startswith("sc:"):
|
||||
parts = data.split(":", 2)
|
||||
if len(parts) == 3:
|
||||
choice = parts[1] # once, always, cancel
|
||||
confirm_id = parts[2]
|
||||
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
if not self._is_callback_user_authorized(caller_id):
|
||||
await query.answer(text="⛔ You are not authorized to answer this prompt.")
|
||||
return
|
||||
|
||||
session_key = self._slash_confirm_state.pop(confirm_id, None)
|
||||
if not session_key:
|
||||
await query.answer(text="This prompt has already been resolved.")
|
||||
return
|
||||
|
||||
label_map = {
|
||||
"once": "✅ Approved once",
|
||||
"always": "🔒 Always approve",
|
||||
"cancel": "❌ Cancelled",
|
||||
}
|
||||
user_display = getattr(query.from_user, "first_name", "User")
|
||||
label = label_map.get(choice, "Resolved")
|
||||
|
||||
await query.answer(text=label)
|
||||
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=f"{label} by {user_display}",
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Resolve via the module-level primitive. The runner stored
|
||||
# a handler keyed by session_key; we run it on the event
|
||||
# loop and (if it returns a string) send it as a follow-up
|
||||
# message in the same chat.
|
||||
try:
|
||||
from tools import slash_confirm as _slash_confirm_mod
|
||||
result_text = await _slash_confirm_mod.resolve(
|
||||
session_key, confirm_id, choice,
|
||||
)
|
||||
if result_text and query.message:
|
||||
# Inherit the prompt message's thread so the reply
|
||||
# lands in the same supergroup topic / reply chain.
|
||||
thread_id = getattr(query.message, "message_thread_id", None)
|
||||
send_kwargs: Dict[str, Any] = {
|
||||
"chat_id": int(query.message.chat_id),
|
||||
"text": result_text,
|
||||
"parse_mode": ParseMode.MARKDOWN,
|
||||
**self._link_preview_kwargs(),
|
||||
}
|
||||
if thread_id is not None:
|
||||
send_kwargs["message_thread_id"] = thread_id
|
||||
await self._bot.send_message(**send_kwargs)
|
||||
except Exception as exc:
|
||||
logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
|
||||
return
|
||||
|
||||
# --- Update prompt callbacks ---
|
||||
if not data.startswith("update_prompt:"):
|
||||
return
|
||||
@@ -1951,9 +1737,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
|
||||
|
||||
with open(audio_path, "rb") as audio_file:
|
||||
ext = os.path.splitext(audio_path)[1].lower()
|
||||
# .ogg / .opus files -> send as voice (round playable bubble)
|
||||
if ext in (".ogg", ".opus"):
|
||||
# .ogg files -> send as voice (round playable bubble)
|
||||
if audio_path.endswith((".ogg", ".opus")):
|
||||
_voice_thread = self._metadata_thread_id(metadata)
|
||||
msg = await self._bot.send_voice(
|
||||
chat_id=int(chat_id),
|
||||
@@ -1962,8 +1747,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
reply_to_message_id=int(reply_to) if reply_to else None,
|
||||
message_thread_id=self._message_thread_id_for_send(_voice_thread),
|
||||
)
|
||||
elif ext in (".mp3", ".m4a"):
|
||||
# Telegram's Bot API sendAudio only accepts MP3 / M4A.
|
||||
else:
|
||||
# .mp3 and others -> send as audio file
|
||||
_audio_thread = self._metadata_thread_id(metadata)
|
||||
msg = await self._bot.send_audio(
|
||||
chat_id=int(chat_id),
|
||||
@@ -1972,16 +1757,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
reply_to_message_id=int(reply_to) if reply_to else None,
|
||||
message_thread_id=self._message_thread_id_for_send(_audio_thread),
|
||||
)
|
||||
else:
|
||||
# Formats Telegram can't play natively (.wav, .flac, ...)
|
||||
# — fall back to document delivery instead of raising.
|
||||
return await self.send_document(
|
||||
chat_id=chat_id,
|
||||
file_path=audio_path,
|
||||
caption=caption,
|
||||
reply_to=reply_to,
|
||||
metadata=metadata,
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
@@ -1991,118 +1766,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
exc_info=True,
|
||||
)
|
||||
return await super().send_voice(chat_id, audio_path, caption, reply_to)
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[tuple],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images natively via Telegram's media group API.
|
||||
|
||||
Telegram's ``send_media_group`` bundles up to 10 photos/videos into
|
||||
a single album. Larger batches are chunked. Animated GIFs cannot
|
||||
go into a media group (they require ``send_animation``), so they
|
||||
are peeled off and sent individually via the base default path.
|
||||
|
||||
URL-based photos go into the group directly; local files are
|
||||
opened as byte streams. On failure the whole batch falls back to
|
||||
the base adapter's per-image loop.
|
||||
"""
|
||||
if not self._bot:
|
||||
return
|
||||
if not images:
|
||||
return
|
||||
|
||||
try:
|
||||
from telegram import InputMediaPhoto
|
||||
except Exception as exc: # pragma: no cover - missing SDK
|
||||
logger.warning(
|
||||
"[%s] InputMediaPhoto unavailable, falling back to per-image send: %s",
|
||||
self.name, exc,
|
||||
)
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
return
|
||||
|
||||
# Peel off animations — they need send_animation, not send_media_group
|
||||
animations: List[tuple] = []
|
||||
photos: List[tuple] = []
|
||||
for image_url, alt_text in images:
|
||||
if not image_url.startswith("file://") and self._is_animation_url(image_url):
|
||||
animations.append((image_url, alt_text))
|
||||
else:
|
||||
photos.append((image_url, alt_text))
|
||||
|
||||
# Animations: route through the base default (per-image send_animation)
|
||||
if animations:
|
||||
await super().send_multiple_images(
|
||||
chat_id, animations, metadata, human_delay=human_delay,
|
||||
)
|
||||
|
||||
if not photos:
|
||||
return
|
||||
|
||||
from urllib.parse import unquote as _unquote
|
||||
_thread = self._metadata_thread_id(metadata)
|
||||
_thread_id = self._message_thread_id_for_send(_thread)
|
||||
|
||||
# Chunk into groups of 10 (Telegram's album limit)
|
||||
CHUNK = 10
|
||||
chunks = [photos[i:i + CHUNK] for i in range(0, len(photos), CHUNK)]
|
||||
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
if human_delay > 0 and chunk_idx > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
|
||||
media: List[Any] = []
|
||||
opened_files: List[Any] = []
|
||||
try:
|
||||
for image_url, alt_text in chunk:
|
||||
caption = alt_text[:1024] if alt_text else None
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
if not os.path.exists(local_path):
|
||||
logger.warning(
|
||||
"[%s] Skipping missing image in media group: %s",
|
||||
self.name, local_path,
|
||||
)
|
||||
continue
|
||||
fh = open(local_path, "rb")
|
||||
opened_files.append(fh)
|
||||
media.append(InputMediaPhoto(media=fh, caption=caption))
|
||||
else:
|
||||
media.append(InputMediaPhoto(media=image_url, caption=caption))
|
||||
|
||||
if not media:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"[%s] Sending media group of %d photo(s) (chunk %d/%d)",
|
||||
self.name, len(media), chunk_idx + 1, len(chunks),
|
||||
)
|
||||
await self._bot.send_media_group(
|
||||
chat_id=int(chat_id),
|
||||
media=media,
|
||||
message_thread_id=_thread_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[%s] send_media_group failed (chunk %d/%d), falling back to per-image: %s",
|
||||
self.name, chunk_idx + 1, len(chunks), e,
|
||||
exc_info=True,
|
||||
)
|
||||
# Fallback: send each photo in this chunk individually
|
||||
await super().send_multiple_images(
|
||||
chat_id, chunk, metadata, human_delay=human_delay,
|
||||
)
|
||||
finally:
|
||||
for fh in opened_files:
|
||||
try:
|
||||
fh.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -2269,7 +1933,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
# Final fallback: send URL as text
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
|
||||
|
||||
async def send_animation(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -2331,7 +1995,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Get information about a Telegram chat."""
|
||||
if not self._bot:
|
||||
@@ -2365,7 +2029,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
exc_info=True,
|
||||
)
|
||||
return {"name": str(chat_id), "type": "dm", "error": str(e)}
|
||||
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""
|
||||
Convert standard markdown to Telegram MarkdownV2 format.
|
||||
@@ -2390,8 +2054,10 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
text = content
|
||||
|
||||
# 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
|
||||
# before the normal MarkdownV2 conversions run.
|
||||
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
|
||||
# render tables natively, but fenced code blocks render as
|
||||
# monospace preformatted text with columns intact. The wrapped
|
||||
# tables then flow through step (1) below as protected regions.
|
||||
text = _wrap_markdown_tables(text)
|
||||
|
||||
# 1) Protect fenced code blocks (``` ... ```)
|
||||
@@ -2537,7 +2203,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
text = ''.join(_safe_parts)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# ── Group mention gating ──────────────────────────────────────────────
|
||||
|
||||
def _telegram_require_mention(self) -> bool:
|
||||
@@ -2661,26 +2327,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
user = getattr(entity, "user", None)
|
||||
if user and getattr(user, "id", None) == bot_id:
|
||||
return True
|
||||
elif entity_type == "bot_command" and expected:
|
||||
# Telegram's official group-disambiguation form for slash
|
||||
# commands (``/cmd@botname``) is emitted as a single
|
||||
# ``bot_command`` entity covering the whole span — there
|
||||
# is no accompanying ``mention`` entity. Treat it as a
|
||||
# direct address to this bot when the ``@botname`` suffix
|
||||
# matches. This is the form Telegram's own command menu
|
||||
# autocomplete produces in groups, so dropping it at the
|
||||
# mention gate would break /new, /reset, /help, ... for
|
||||
# every group that has ``require_mention`` enabled (#15415).
|
||||
offset = int(getattr(entity, "offset", -1))
|
||||
length = int(getattr(entity, "length", 0))
|
||||
if offset < 0 or length <= 0:
|
||||
continue
|
||||
command_text = source_text[offset:offset + length]
|
||||
at_index = command_text.find("@")
|
||||
if at_index < 0:
|
||||
continue
|
||||
if command_text[at_index:].strip().lower() == expected:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _message_matches_mention_patterns(self, message: Message) -> bool:
|
||||
@@ -2752,7 +2398,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
|
||||
event.text = self._clean_bot_trigger_text(event.text)
|
||||
self._enqueue_text_event(event)
|
||||
|
||||
|
||||
async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle incoming command messages."""
|
||||
if not update.message or not update.message.text:
|
||||
@@ -2762,7 +2408,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
|
||||
await self.handle_message(event)
|
||||
|
||||
|
||||
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle incoming location/venue pin messages."""
|
||||
if not update.message:
|
||||
@@ -3120,7 +2766,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
|
||||
async def _queue_media_group_event(self, media_group_id: str, event: MessageEvent) -> None:
|
||||
"""Buffer Telegram media-group items so albums arrive as one logical event.
|
||||
|
||||
|
||||
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
|
||||
_SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
|
||||
|
||||
|
||||
def _resolve_proxy_url(target_hosts=None) -> str | None:
|
||||
def _resolve_proxy_url() -> str | None:
|
||||
# Delegate to shared implementation (env vars + macOS system proxy detection)
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)
|
||||
return resolve_proxy_url("TELEGRAM_PROXY")
|
||||
|
||||
|
||||
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
|
||||
def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
|
||||
self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
|
||||
proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
|
||||
proxy_url = _resolve_proxy_url()
|
||||
if proxy_url and "proxy" not in transport_kwargs:
|
||||
transport_kwargs["proxy"] = proxy_url
|
||||
self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
|
||||
|
||||
@@ -202,22 +202,26 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
if deliver_type == "github_comment":
|
||||
return await self._deliver_github_comment(content, delivery)
|
||||
|
||||
# Cross-platform delivery — any platform with a gateway adapter.
|
||||
# Check both built-in names and plugin-registered platforms.
|
||||
_BUILTIN_DELIVER_PLATFORMS = {
|
||||
"telegram", "discord", "slack", "signal", "sms", "whatsapp",
|
||||
"matrix", "mattermost", "homeassistant", "email", "dingtalk",
|
||||
"feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
|
||||
"qqbot", "yuanbao",
|
||||
}
|
||||
_is_known_platform = deliver_type in _BUILTIN_DELIVER_PLATFORMS
|
||||
if not _is_known_platform:
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
_is_known_platform = platform_registry.is_registered(deliver_type)
|
||||
except Exception:
|
||||
pass
|
||||
if self.gateway_runner and _is_known_platform:
|
||||
# Cross-platform delivery — any platform with a gateway adapter
|
||||
if self.gateway_runner and deliver_type in (
|
||||
"telegram",
|
||||
"discord",
|
||||
"slack",
|
||||
"signal",
|
||||
"sms",
|
||||
"whatsapp",
|
||||
"matrix",
|
||||
"mattermost",
|
||||
"homeassistant",
|
||||
"email",
|
||||
"dingtalk",
|
||||
"feishu",
|
||||
"wecom",
|
||||
"wecom_callback",
|
||||
"weixin",
|
||||
"bluebubbles",
|
||||
"qqbot",
|
||||
):
|
||||
return await self._deliver_cross_platform(
|
||||
deliver_type, content, delivery
|
||||
)
|
||||
|
||||
@@ -508,11 +508,6 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
self._remember_chat_req_id(chat_id, self._payload_req_id(payload))
|
||||
|
||||
text, reply_text = self._extract_text(body)
|
||||
# Strip leading @mention in group chats so slash commands like
|
||||
# "@BotName /approve" are correctly recognized as "/approve".
|
||||
# Mirrors what the Telegram adapter does (re.sub @botname).
|
||||
if is_group and text:
|
||||
text = re.sub(r"^@\S+\s*", "", text).strip()
|
||||
media_urls, media_types = await self._extract_media(body)
|
||||
message_type = self._derive_message_type(body, text, media_types)
|
||||
has_reply_context = bool(reply_text and (text or media_urls))
|
||||
|
||||
@@ -89,21 +89,8 @@ MAX_CONSECUTIVE_FAILURES = 3
|
||||
RETRY_DELAY_SECONDS = 2
|
||||
BACKOFF_DELAY_SECONDS = 30
|
||||
SESSION_EXPIRED_ERRCODE = -14
|
||||
RATE_LIMIT_ERRCODE = -2 # iLink frequency limit — backoff and retry
|
||||
MESSAGE_DEDUP_TTL_SECONDS = 300
|
||||
|
||||
|
||||
def _is_stale_session_ret(
|
||||
ret: "Optional[int]", errcode: "Optional[int]", errmsg: "Optional[str]",
|
||||
) -> bool:
|
||||
"""True when iLink returns ret=-2 / errcode=-2 with 'unknown error',
|
||||
which is a stale-session signal (same as errcode=-14) rather than
|
||||
a genuine rate limit."""
|
||||
if ret != RATE_LIMIT_ERRCODE and errcode != RATE_LIMIT_ERRCODE:
|
||||
return False
|
||||
return (errmsg or "").lower() == "unknown error"
|
||||
|
||||
|
||||
MEDIA_IMAGE = 1
|
||||
MEDIA_VIDEO = 2
|
||||
MEDIA_FILE = 3
|
||||
@@ -1126,7 +1113,7 @@ async def qr_login(
|
||||
class WeixinAdapter(BasePlatformAdapter):
|
||||
"""Native Hermes adapter for Weixin personal accounts."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 2000
|
||||
MAX_MESSAGE_LENGTH = 4000
|
||||
|
||||
# WeChat does not support editing sent messages — streaming must use the
|
||||
# fallback "send-final-only" path so the cursor (▉) is never left visible.
|
||||
@@ -1151,10 +1138,10 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
|
||||
).strip().rstrip("/")
|
||||
self._send_chunk_delay_seconds = float(
|
||||
extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
|
||||
extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
|
||||
)
|
||||
self._send_chunk_retries = int(
|
||||
extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
|
||||
extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
|
||||
)
|
||||
self._send_chunk_retry_delay_seconds = float(
|
||||
extra.get("send_chunk_retry_delay_seconds")
|
||||
@@ -1222,17 +1209,6 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
self._mark_connected()
|
||||
_LIVE_ADAPTERS[self._token] = self
|
||||
logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
|
||||
if self._group_policy != "disabled":
|
||||
logger.warning(
|
||||
"[%s] WEIXIN_GROUP_POLICY=%s is set, but QR-login connects an iLink bot "
|
||||
"identity (e.g. ...@im.bot) which typically cannot be invited into ordinary "
|
||||
"WeChat groups. iLink usually does not deliver ordinary-group events for "
|
||||
"these accounts, so group messages may never reach Hermes regardless of this "
|
||||
"policy. If group delivery doesn't work, the limitation is on the iLink side, "
|
||||
"not in Hermes.",
|
||||
self.name,
|
||||
self._group_policy,
|
||||
)
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
@@ -1277,8 +1253,7 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
ret = response.get("ret", 0)
|
||||
errcode = response.get("errcode", 0)
|
||||
if ret not in (0, None) or errcode not in (0, None):
|
||||
if (ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE
|
||||
or _is_stale_session_ret(ret, errcode, response.get("errmsg"))):
|
||||
if ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE:
|
||||
logger.error("[%s] Session expired; pausing for 10 minutes", self.name)
|
||||
await asyncio.sleep(600)
|
||||
consecutive_failures = 0
|
||||
@@ -1543,7 +1518,6 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
is_session_expired = (
|
||||
ret == SESSION_EXPIRED_ERRCODE
|
||||
or errcode == SESSION_EXPIRED_ERRCODE
|
||||
or _is_stale_session_ret(ret, errcode, resp.get("errmsg"))
|
||||
)
|
||||
# Session expired — strip token and retry once
|
||||
if is_session_expired and not retried_without_token and context_token:
|
||||
@@ -1557,28 +1531,6 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
self.name, _safe_id(chat_id),
|
||||
)
|
||||
continue
|
||||
# Rate limit (-2) — backoff and retry
|
||||
is_rate_limited = (
|
||||
ret == RATE_LIMIT_ERRCODE
|
||||
or errcode == RATE_LIMIT_ERRCODE
|
||||
)
|
||||
if is_rate_limited:
|
||||
errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
|
||||
# Record the error so we raise a descriptive
|
||||
# RuntimeError (instead of AssertionError) if the
|
||||
# loop exhausts with the server still rate-limiting.
|
||||
last_error = RuntimeError(
|
||||
f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
|
||||
)
|
||||
if attempt >= self._send_chunk_retries:
|
||||
break
|
||||
wait = self._send_chunk_retry_delay_seconds * 3 # 3x backoff for rate limit
|
||||
logger.warning(
|
||||
"[%s] rate limited for %s; backing off %.1fs before retry",
|
||||
self.name, _safe_id(chat_id), wait,
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
|
||||
raise RuntimeError(
|
||||
f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
|
||||
@@ -1620,7 +1572,7 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
_, image_cleaned = self.extract_images(cleaned_content)
|
||||
local_files, final_content = self.extract_local_files(image_cleaned)
|
||||
|
||||
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
|
||||
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
|
||||
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
|
||||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,645 +0,0 @@
|
||||
"""
|
||||
yuanbao_media.py — 元宝平台媒体处理模块
|
||||
|
||||
提供 COS 上传、文件下载、TIM 媒体消息构建等功能。
|
||||
移植自 TypeScript 版 media.ts(yuanbao-openclaw-plugin),
|
||||
使用 httpx 替代 cos-nodejs-sdk-v5,避免引入额外 SDK 依赖。
|
||||
|
||||
COS 上传流程:
|
||||
1. 调用 genUploadInfo 获取临时凭证(tmpSecretId/tmpSecretKey/sessionToken)
|
||||
2. 用临时凭证通过 HMAC-SHA1 签名构建 Authorization 头
|
||||
3. HTTP PUT 上传到 COS
|
||||
|
||||
TIM 消息体构建:
|
||||
- buildImageMsgBody() → TIMImageElem
|
||||
- buildFileMsgBody() → TIMFileElem
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
from typing import Optional, Any
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ============ 常量 ============
|
||||
|
||||
UPLOAD_INFO_PATH = "/api/resource/genUploadInfo"
|
||||
DEFAULT_API_DOMAIN = "yuanbao.tencent.com"
|
||||
DEFAULT_MAX_SIZE_MB = 50
|
||||
|
||||
# COS 加速域名后缀(优先使用全球加速)
|
||||
COS_USE_ACCELERATE = True
|
||||
|
||||
# ============ 类型映射 ============
|
||||
|
||||
# MIME → image_format 数字(TIM 协议字段)
|
||||
_MIME_TO_IMAGE_FORMAT: dict[str, int] = {
|
||||
"image/jpeg": 1,
|
||||
"image/jpg": 1,
|
||||
"image/gif": 2,
|
||||
"image/png": 3,
|
||||
"image/bmp": 4,
|
||||
"image/webp": 255,
|
||||
"image/heic": 255,
|
||||
"image/tiff": 255,
|
||||
}
|
||||
|
||||
# 文件扩展名 → MIME
|
||||
_EXT_TO_MIME: dict[str, str] = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
".heic": "image/heic",
|
||||
".tiff": "image/tiff",
|
||||
".ico": "image/x-icon",
|
||||
".pdf": "application/pdf",
|
||||
".doc": "application/msword",
|
||||
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".xls": "application/vnd.ms-excel",
|
||||
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
".ppt": "application/vnd.ms-powerpoint",
|
||||
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
".txt": "text/plain",
|
||||
".zip": "application/zip",
|
||||
".tar": "application/x-tar",
|
||||
".gz": "application/gzip",
|
||||
".mp3": "audio/mpeg",
|
||||
".mp4": "video/mp4",
|
||||
".wav": "audio/wav",
|
||||
".ogg": "audio/ogg",
|
||||
".webm": "video/webm",
|
||||
}
|
||||
|
||||
|
||||
# ============ 工具函数 ============
|
||||
|
||||
def guess_mime_type(filename: str) -> str:
|
||||
"""根据文件扩展名猜测 MIME 类型。"""
|
||||
ext = os.path.splitext(filename)[-1].lower()
|
||||
return _EXT_TO_MIME.get(ext, "application/octet-stream")
|
||||
|
||||
|
||||
def is_image(filename: str, mime_type: str = "") -> bool:
|
||||
"""判断是否为图片类型。"""
|
||||
if mime_type.startswith("image/"):
|
||||
return True
|
||||
ext = os.path.splitext(filename)[-1].lower()
|
||||
return ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".heic", ".tiff", ".ico"}
|
||||
|
||||
|
||||
def get_image_format(mime_type: str) -> int:
|
||||
"""获取 TIM 图片格式编号。"""
|
||||
return _MIME_TO_IMAGE_FORMAT.get(mime_type.lower(), 255)
|
||||
|
||||
|
||||
def md5_hex(data: bytes) -> str:
|
||||
"""计算 MD5 十六进制摘要。"""
|
||||
return hashlib.md5(data).hexdigest()
|
||||
|
||||
|
||||
def generate_file_id() -> str:
|
||||
"""生成随机文件 ID(32 位 hex)。"""
|
||||
return secrets.token_hex(16)
|
||||
|
||||
|
||||
|
||||
# ============ 图片尺寸解析(纯 Python,无需 Pillow) ============
|
||||
|
||||
def parse_image_size(data: bytes) -> Optional[dict[str, int]]:
|
||||
"""
|
||||
解析图片宽高(支持 JPEG/PNG/GIF/WebP),无需第三方依赖。
|
||||
返回 {"width": w, "height": h} 或 None(无法识别)。
|
||||
"""
|
||||
return (
|
||||
_parse_png_size(data)
|
||||
or _parse_jpeg_size(data)
|
||||
or _parse_gif_size(data)
|
||||
or _parse_webp_size(data)
|
||||
)
|
||||
|
||||
|
||||
def _parse_png_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 24:
|
||||
return None
|
||||
if buf[:4] != b"\x89PNG":
|
||||
return None
|
||||
w = struct.unpack(">I", buf[16:20])[0]
|
||||
h = struct.unpack(">I", buf[20:24])[0]
|
||||
return {"width": w, "height": h}
|
||||
|
||||
|
||||
def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 4 or buf[0] != 0xFF or buf[1] != 0xD8:
|
||||
return None
|
||||
i = 2
|
||||
while i < len(buf) - 9:
|
||||
if buf[i] != 0xFF:
|
||||
i += 1
|
||||
continue
|
||||
marker = buf[i + 1]
|
||||
if marker in (0xC0, 0xC2):
|
||||
h = struct.unpack(">H", buf[i + 5: i + 7])[0]
|
||||
w = struct.unpack(">H", buf[i + 7: i + 9])[0]
|
||||
return {"width": w, "height": h}
|
||||
if i + 3 < len(buf):
|
||||
i += 2 + struct.unpack(">H", buf[i + 2: i + 4])[0]
|
||||
else:
|
||||
break
|
||||
return None
|
||||
|
||||
|
||||
def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 10:
|
||||
return None
|
||||
sig = buf[:6].decode("ascii", errors="replace")
|
||||
if sig not in ("GIF87a", "GIF89a"):
|
||||
return None
|
||||
w = struct.unpack("<H", buf[6:8])[0]
|
||||
h = struct.unpack("<H", buf[8:10])[0]
|
||||
return {"width": w, "height": h}
|
||||
|
||||
|
||||
def _parse_webp_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 16:
|
||||
return None
|
||||
if buf[:4] != b"RIFF" or buf[8:12] != b"WEBP":
|
||||
return None
|
||||
chunk = buf[12:16].decode("ascii", errors="replace")
|
||||
if chunk == "VP8 ":
|
||||
if len(buf) >= 30 and buf[23] == 0x9D and buf[24] == 0x01 and buf[25] == 0x2A:
|
||||
w = struct.unpack("<H", buf[26:28])[0] & 0x3FFF
|
||||
h = struct.unpack("<H", buf[28:30])[0] & 0x3FFF
|
||||
return {"width": w, "height": h}
|
||||
elif chunk == "VP8L":
|
||||
if len(buf) >= 25 and buf[20] == 0x2F:
|
||||
bits = struct.unpack("<I", buf[21:25])[0]
|
||||
w = (bits & 0x3FFF) + 1
|
||||
h = ((bits >> 14) & 0x3FFF) + 1
|
||||
return {"width": w, "height": h}
|
||||
elif chunk == "VP8X":
|
||||
if len(buf) >= 30:
|
||||
w = (buf[24] | (buf[25] << 8) | (buf[26] << 16)) + 1
|
||||
h = (buf[27] | (buf[28] << 8) | (buf[29] << 16)) + 1
|
||||
return {"width": w, "height": h}
|
||||
return None
|
||||
|
||||
|
||||
# ============ URL 下载 ============
|
||||
|
||||
async def download_url(
|
||||
url: str,
|
||||
max_size_mb: int = DEFAULT_MAX_SIZE_MB,
|
||||
) -> tuple[bytes, str]:
|
||||
"""
|
||||
下载 URL 内容,返回 (bytes, content_type)。
|
||||
|
||||
Args:
|
||||
url: HTTP(S) URL
|
||||
max_size_mb: 最大允许大小(MB),超过则抛出异常
|
||||
|
||||
Returns:
|
||||
(data_bytes, content_type_string)
|
||||
|
||||
Raises:
|
||||
ValueError: 内容超过大小限制
|
||||
httpx.HTTPError: 网络/HTTP 错误
|
||||
"""
|
||||
max_bytes = max_size_mb * 1024 * 1024
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
# 先 HEAD 检查大小
|
||||
try:
|
||||
head = await client.head(url)
|
||||
content_length = int(head.headers.get("content-length", 0) or 0)
|
||||
if content_length > 0 and content_length > max_bytes:
|
||||
raise ValueError(
|
||||
f"文件过大: {content_length / 1024 / 1024:.1f} MB > {max_size_mb} MB"
|
||||
)
|
||||
except httpx.HTTPStatusError:
|
||||
pass # 部分服务器不支持 HEAD,忽略
|
||||
|
||||
# GET 下载(流式读取,防止超限)
|
||||
async with client.stream("GET", url) as resp:
|
||||
resp.raise_for_status()
|
||||
|
||||
content_type = resp.headers.get("content-type", "").split(";")[0].strip()
|
||||
|
||||
chunks: list[bytes] = []
|
||||
downloaded = 0
|
||||
async for chunk in resp.aiter_bytes(65536):
|
||||
downloaded += len(chunk)
|
||||
if downloaded > max_bytes:
|
||||
raise ValueError(
|
||||
f"文件过大: 已超过 {max_size_mb} MB 限制"
|
||||
)
|
||||
chunks.append(chunk)
|
||||
|
||||
data = b"".join(chunks)
|
||||
return data, content_type
|
||||
|
||||
|
||||
# ============ COS 鉴权(HMAC-SHA1) ============
|
||||
|
||||
def _cos_sign(
|
||||
method: str,
|
||||
path: str,
|
||||
params: dict[str, str],
|
||||
headers: dict[str, str],
|
||||
secret_id: str,
|
||||
secret_key: str,
|
||||
start_time: Optional[int] = None,
|
||||
expire_seconds: int = 3600,
|
||||
) -> str:
|
||||
"""
|
||||
构建 COS 请求签名(q-sign-algorithm=sha1 方案)。
|
||||
参考:https://cloud.tencent.com/document/product/436/7778
|
||||
|
||||
Args:
|
||||
method: HTTP 方法(小写,如 "put")
|
||||
path: URL 路径(URL encode 后的小写)
|
||||
params: URL 查询参数 dict(用于签名)
|
||||
headers: 参与签名的请求头 dict(key 需小写)
|
||||
secret_id: 临时 SecretId(tmpSecretId)
|
||||
secret_key: 临时 SecretKey(tmpSecretKey)
|
||||
start_time: 签名起始 Unix 时间戳(默认 now)
|
||||
expire_seconds: 签名有效期(秒,默认 3600)
|
||||
|
||||
Returns:
|
||||
Authorization header 值(完整字符串)
|
||||
"""
|
||||
now = int(time.time())
|
||||
q_sign_time = f"{start_time or now};{(start_time or now) + expire_seconds}"
|
||||
|
||||
# Step 1: SignKey = HMAC-SHA1(SecretKey, q-sign-time)
|
||||
sign_key = hmac.new(
|
||||
secret_key.encode("utf-8"),
|
||||
q_sign_time.encode("utf-8"),
|
||||
hashlib.sha1,
|
||||
).hexdigest()
|
||||
|
||||
# Step 2: HttpString
|
||||
# 参数和头部需按字典序排列,key 小写
|
||||
sorted_params = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in params.items())
|
||||
sorted_headers = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in headers.items())
|
||||
|
||||
url_param_list = ";".join(k for k, _ in sorted_params)
|
||||
url_params = "&".join(f"{k}={v}" for k, v in sorted_params)
|
||||
header_list = ";".join(k for k, _ in sorted_headers)
|
||||
header_str = "&".join(f"{k}={v}" for k, v in sorted_headers)
|
||||
|
||||
http_string = "\n".join([
|
||||
method.lower(),
|
||||
path,
|
||||
url_params,
|
||||
header_str,
|
||||
"",
|
||||
])
|
||||
|
||||
# Step 3: StringToSign = sha1 hash of HttpString
|
||||
sha1_of_http = hashlib.sha1(http_string.encode("utf-8")).hexdigest()
|
||||
string_to_sign = "\n".join([
|
||||
"sha1",
|
||||
q_sign_time,
|
||||
sha1_of_http,
|
||||
"",
|
||||
])
|
||||
|
||||
# Step 4: Signature = HMAC-SHA1(SignKey, StringToSign)
|
||||
signature = hmac.new(
|
||||
sign_key.encode("utf-8"),
|
||||
string_to_sign.encode("utf-8"),
|
||||
hashlib.sha1,
|
||||
).hexdigest()
|
||||
|
||||
return (
|
||||
f"q-sign-algorithm=sha1"
|
||||
f"&q-ak={secret_id}"
|
||||
f"&q-sign-time={q_sign_time}"
|
||||
f"&q-key-time={q_sign_time}"
|
||||
f"&q-header-list={header_list}"
|
||||
f"&q-url-param-list={url_param_list}"
|
||||
f"&q-signature={signature}"
|
||||
)
|
||||
|
||||
|
||||
# ============ 主要公开 API ============
|
||||
|
||||
async def get_cos_credentials(
|
||||
app_key: str,
|
||||
api_domain: str,
|
||||
token: str,
|
||||
filename: str = "file",
|
||||
file_id: Optional[str] = None,
|
||||
bot_id: str = "",
|
||||
route_env: str = "",
|
||||
) -> dict:
|
||||
"""
|
||||
调用 genUploadInfo 接口获取 COS 临时密钥及上传配置。
|
||||
|
||||
Args:
|
||||
app_key: 应用 Key(用于 X-ID 头)
|
||||
api_domain: API 域名(如 https://bot.yuanbao.tencent.com)
|
||||
token: 当前有效的签票 token(X-Token 头)
|
||||
filename: 待上传的文件名(含扩展名)
|
||||
file_id: 客户端生成的唯一文件 ID(不传则自动生成)
|
||||
bot_id: Bot 账号 ID(用于 X-ID 头)
|
||||
|
||||
Returns:
|
||||
COS 上传配置 dict,包含以下字段:
|
||||
bucketName (str) — COS Bucket 名称
|
||||
region (str) — COS 地域
|
||||
location (str) — 上传 Key(对象路径)
|
||||
encryptTmpSecretId (str) — 临时 SecretId
|
||||
encryptTmpSecretKey(str) — 临时 SecretKey
|
||||
encryptToken (str) — SessionToken
|
||||
startTime (int) — 凭证起始时间戳(Unix)
|
||||
expiredTime (int) — 凭证过期时间戳(Unix)
|
||||
resourceUrl (str) — 上传后的公网访问 URL
|
||||
resourceID (str) — 资源 ID(可选)
|
||||
|
||||
Raises:
|
||||
RuntimeError: 接口返回非 0 code 或字段缺失
|
||||
"""
|
||||
if file_id is None:
|
||||
file_id = generate_file_id()
|
||||
|
||||
upload_url = f"{api_domain.rstrip('/')}{UPLOAD_INFO_PATH}"
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Token": token,
|
||||
"X-ID": bot_id or app_key,
|
||||
"X-Source": "web",
|
||||
}
|
||||
if route_env:
|
||||
headers["X-Route-Env"] = route_env
|
||||
body = {
|
||||
"fileName": filename,
|
||||
"fileId": file_id,
|
||||
"docFrom": "localDoc",
|
||||
"docOpenId": "",
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||
resp = await client.post(upload_url, json=body, headers=headers)
|
||||
resp.raise_for_status()
|
||||
result: dict[str, Any] = resp.json()
|
||||
|
||||
code = result.get("code")
|
||||
if code != 0 and code is not None:
|
||||
raise RuntimeError(
|
||||
f"genUploadInfo 失败: code={code}, msg={result.get('msg', '')}"
|
||||
)
|
||||
|
||||
data = result.get("data") or result
|
||||
required_fields = ["bucketName", "location"]
|
||||
missing = [f for f in required_fields if not data.get(f)]
|
||||
if missing:
|
||||
raise RuntimeError(
|
||||
f"genUploadInfo 返回字段不完整: 缺少字段 {missing}"
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
async def upload_to_cos(
|
||||
file_bytes: bytes,
|
||||
filename: str,
|
||||
content_type: str,
|
||||
credentials: dict,
|
||||
bucket: str,
|
||||
region: str,
|
||||
) -> dict:
|
||||
"""
|
||||
通过 httpx PUT 请求将文件上传到 COS。
|
||||
使用临时凭证(tmpSecretId/tmpSecretKey/sessionToken)构建 HMAC-SHA1 签名。
|
||||
|
||||
Args:
|
||||
file_bytes: 文件二进制内容
|
||||
filename: 文件名(用于辅助计算 MIME、UUID)
|
||||
content_type: MIME 类型(如 "image/jpeg")
|
||||
credentials: get_cos_credentials() 返回的 dict,包含:
|
||||
encryptTmpSecretId → tmpSecretId
|
||||
encryptTmpSecretKey → tmpSecretKey
|
||||
encryptToken → sessionToken
|
||||
location → COS key(对象路径)
|
||||
resourceUrl → 上传后公网 URL
|
||||
startTime → 凭证起始时间(Unix)
|
||||
expiredTime → 凭证过期时间(Unix)
|
||||
bucket: COS Bucket 名称(如 chatbot-1234567890)
|
||||
region: COS 地域(如 ap-guangzhou)
|
||||
|
||||
Returns:
|
||||
上传结果 dict,包含:
|
||||
url (str) — COS 公网访问 URL
|
||||
uuid (str) — 文件内容 MD5
|
||||
size (int) — 文件大小(字节)
|
||||
width (int, optional) — 图片宽度(仅图片)
|
||||
height (int, optional) — 图片高度(仅图片)
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: COS 返回非 2xx 状态
|
||||
RuntimeError: credentials 字段缺失
|
||||
"""
|
||||
secret_id: str = credentials.get("encryptTmpSecretId", "")
|
||||
secret_key: str = credentials.get("encryptTmpSecretKey", "")
|
||||
session_token: str = credentials.get("encryptToken", "")
|
||||
cos_key: str = credentials.get("location", "")
|
||||
resource_url: str = credentials.get("resourceUrl", "")
|
||||
start_time: Optional[int] = credentials.get("startTime")
|
||||
expired_time: Optional[int] = credentials.get("expiredTime")
|
||||
|
||||
if not secret_id or not secret_key or not cos_key:
|
||||
raise RuntimeError(
|
||||
f"COS credentials 不完整: secretId={bool(secret_id)}, "
|
||||
f"secretKey={bool(secret_key)}, location={bool(cos_key)}"
|
||||
)
|
||||
|
||||
# 构建 COS 上传 URL(优先使用全球加速域名)
|
||||
if COS_USE_ACCELERATE:
|
||||
cos_host = f"{bucket}.cos.accelerate.myqcloud.com"
|
||||
else:
|
||||
cos_host = f"{bucket}.cos.{region}.myqcloud.com"
|
||||
|
||||
# URL encode cos_key(保留 /)
|
||||
encoded_key = urllib.parse.quote(cos_key, safe="/")
|
||||
cos_url = f"https://{cos_host}/{encoded_key.lstrip('/')}"
|
||||
|
||||
# 确定 Content-Type
|
||||
if not content_type or content_type == "application/octet-stream":
|
||||
if is_image(filename):
|
||||
content_type = guess_mime_type(filename)
|
||||
else:
|
||||
content_type = "application/octet-stream"
|
||||
|
||||
# 计算文件 MD5 + size
|
||||
file_uuid = md5_hex(file_bytes)
|
||||
file_size = len(file_bytes)
|
||||
|
||||
# 参与签名的请求头
|
||||
sign_headers = {
|
||||
"host": cos_host,
|
||||
"content-type": content_type,
|
||||
"x-cos-security-token": session_token,
|
||||
}
|
||||
|
||||
# 计算签名有效期
|
||||
now = int(time.time())
|
||||
sign_start = start_time if start_time else now
|
||||
sign_expire = (expired_time - now) if expired_time and expired_time > now else 3600
|
||||
|
||||
authorization = _cos_sign(
|
||||
method="put",
|
||||
path=f"/{encoded_key.lstrip('/')}",
|
||||
params={},
|
||||
headers=sign_headers,
|
||||
secret_id=secret_id,
|
||||
secret_key=secret_key,
|
||||
start_time=sign_start,
|
||||
expire_seconds=sign_expire,
|
||||
)
|
||||
|
||||
put_headers = {
|
||||
"Authorization": authorization,
|
||||
"Content-Type": content_type,
|
||||
"x-cos-security-token": session_token,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"COS PUT: bucket=%s region=%s key=%s size=%d mime=%s",
|
||||
bucket, region, cos_key, file_size, content_type,
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.put(
|
||||
cos_url,
|
||||
content=file_bytes,
|
||||
headers=put_headers,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
# 解析图片尺寸(仅图片类型)
|
||||
result: dict[str, Any] = {
|
||||
"url": resource_url or cos_url,
|
||||
"uuid": file_uuid,
|
||||
"size": file_size,
|
||||
}
|
||||
|
||||
if content_type.startswith("image/"):
|
||||
size_info = parse_image_size(file_bytes)
|
||||
if size_info:
|
||||
result["width"] = size_info["width"]
|
||||
result["height"] = size_info["height"]
|
||||
|
||||
logger.info(
|
||||
"COS 上传成功: url=%s size=%d",
|
||||
result["url"], file_size,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# ============ TIM 媒体消息构建 ============
|
||||
|
||||
def build_image_msg_body(
|
||||
url: str,
|
||||
uuid: Optional[str] = None,
|
||||
filename: Optional[str] = None,
|
||||
size: int = 0,
|
||||
width: int = 0,
|
||||
height: int = 0,
|
||||
mime_type: str = "",
|
||||
) -> list[dict]:
|
||||
"""
|
||||
构建腾讯 IM TIMImageElem 消息体。
|
||||
参考:https://cloud.tencent.com/document/product/269/2720
|
||||
|
||||
Args:
|
||||
url: 图片公网访问 URL(COS resourceUrl)
|
||||
uuid: 文件 UUID(MD5 或其他唯一标识)
|
||||
filename: 文件名(uuid 为空时作为备用)
|
||||
size: 文件大小(字节)
|
||||
width: 图片宽度(像素)
|
||||
height: 图片高度(像素)
|
||||
mime_type: MIME 类型(用于确定 image_format)
|
||||
|
||||
Returns:
|
||||
TIMImageElem 消息体列表(适合直接放入 msg_body)
|
||||
"""
|
||||
_uuid = uuid or filename or _basename_from_url(url) or "image"
|
||||
image_format = get_image_format(mime_type) if mime_type else 255
|
||||
|
||||
return [
|
||||
{
|
||||
"msg_type": "TIMImageElem",
|
||||
"msg_content": {
|
||||
"uuid": _uuid,
|
||||
"image_format": image_format,
|
||||
"image_info_array": [
|
||||
{
|
||||
"type": 1, # 1 = 原图
|
||||
"size": size,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"url": url,
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def build_file_msg_body(
|
||||
url: str,
|
||||
filename: str,
|
||||
uuid: Optional[str] = None,
|
||||
size: int = 0,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
构建腾讯 IM TIMFileElem 消息体。
|
||||
参考:https://cloud.tencent.com/document/product/269/2720
|
||||
|
||||
Args:
|
||||
url: 文件公网访问 URL(COS resourceUrl)
|
||||
filename: 文件名(含扩展名)
|
||||
uuid: 文件 UUID(MD5 或其他唯一标识,不传则使用 filename)
|
||||
size: 文件大小(字节)
|
||||
|
||||
Returns:
|
||||
TIMFileElem 消息体列表(适合直接放入 msg_body)
|
||||
"""
|
||||
_uuid = uuid or filename
|
||||
|
||||
return [
|
||||
{
|
||||
"msg_type": "TIMFileElem",
|
||||
"msg_content": {
|
||||
"uuid": _uuid,
|
||||
"file_name": filename,
|
||||
"file_size": size,
|
||||
"url": url,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
# ============ 内部工具 ============
|
||||
|
||||
def _basename_from_url(url: str) -> str:
|
||||
"""从 URL 提取文件名。"""
|
||||
try:
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
return os.path.basename(parsed.path)
|
||||
except Exception:
|
||||
return ""
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,558 +0,0 @@
|
||||
"""
|
||||
Yuanbao sticker (TIMFaceElem) support.
|
||||
|
||||
Ported from yuanbao-openclaw-plugin/src/sticker/.
|
||||
|
||||
TIMFaceElem wire format:
|
||||
{
|
||||
"msg_type": "TIMFaceElem",
|
||||
"msg_content": {
|
||||
"index": 0, # always 0 per Yuanbao convention
|
||||
"data": "<json>", # serialised sticker metadata
|
||||
}
|
||||
}
|
||||
|
||||
The `data` field carries a JSON string with the sticker's metadata so the
|
||||
receiver can look up the correct asset in the emoji pack.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import unicodedata
|
||||
from typing import Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sticker catalogue – ported from builtin-stickers.json
|
||||
# Key : canonical name (Chinese)
|
||||
# Value : {sticker_id, package_id, name, description, width, height, formats}
|
||||
# ---------------------------------------------------------------------------
|
||||
STICKER_MAP: dict[str, dict] = {
|
||||
"六六六": {
|
||||
"sticker_id": "278", "package_id": "1003", "name": "六六六",
|
||||
"description": "666 厉害 牛 棒 绝了 好强 awesome",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"我想开了": {
|
||||
"sticker_id": "262", "package_id": "1003", "name": "我想开了",
|
||||
"description": "想开 佛系 释怀 顿悟 看淡了 无所谓",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"害羞": {
|
||||
"sticker_id": "130", "package_id": "1003", "name": "害羞",
|
||||
"description": "腼腆 不好意思 脸红 娇羞 羞涩 捂脸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"比心": {
|
||||
"sticker_id": "252", "package_id": "1003", "name": "比心",
|
||||
"description": "笔芯 爱你 爱心手势 love heart 喜欢你",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"委屈": {
|
||||
"sticker_id": "125", "package_id": "1003", "name": "委屈",
|
||||
"description": "难过 想哭 可怜巴巴 瘪嘴 受伤 被欺负",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"亲亲": {
|
||||
"sticker_id": "146", "package_id": "1003", "name": "亲亲",
|
||||
"description": "么么 mua 亲一下 kiss 飞吻 啵",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"酷": {
|
||||
"sticker_id": "131", "package_id": "1003", "name": "酷",
|
||||
"description": "帅 墨镜 cool 高冷 有型 swagger",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"睡": {
|
||||
"sticker_id": "145", "package_id": "1003", "name": "睡",
|
||||
"description": "睡觉 困 zzZ 打盹 躺平 休眠 sleepy",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"发呆": {
|
||||
"sticker_id": "152", "package_id": "1003", "name": "发呆",
|
||||
"description": "懵 愣住 放空 呆滞 出神 脑子空白",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"可怜": {
|
||||
"sticker_id": "157", "package_id": "1003", "name": "可怜",
|
||||
"description": "卖萌 求饶 委屈巴巴 弱小 拜托 眼巴巴",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"摊手": {
|
||||
"sticker_id": "200", "package_id": "1003", "name": "摊手",
|
||||
"description": "无奈 没办法 耸肩 随便 那咋整 whatever",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"头大": {
|
||||
"sticker_id": "213", "package_id": "1003", "name": "头大",
|
||||
"description": "头疼 烦恼 郁闷 难搞 崩溃 一团乱",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吓": {
|
||||
"sticker_id": "256", "package_id": "1003", "name": "吓",
|
||||
"description": "害怕 惊恐 震惊 吓一跳 恐怖 怂",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吐血": {
|
||||
"sticker_id": "203", "package_id": "1003", "name": "吐血",
|
||||
"description": "无语 崩溃 被雷 内伤 一口老血 屮",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哼": {
|
||||
"sticker_id": "185", "package_id": "1003", "name": "哼",
|
||||
"description": "傲娇 生气 不满 撇嘴 不理 赌气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"嘿嘿": {
|
||||
"sticker_id": "220", "package_id": "1003", "name": "嘿嘿",
|
||||
"description": "坏笑 猥琐笑 偷笑 憨笑 得意 你懂的",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"头秃": {
|
||||
"sticker_id": "218", "package_id": "1003", "name": "头秃",
|
||||
"description": "程序员 加班 焦虑 没头发 秃了 肝爆",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"暗中观察": {
|
||||
"sticker_id": "221", "package_id": "1003", "name": "暗中观察",
|
||||
"description": "窥屏 潜水 偷偷看 角落 围观 屏住呼吸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"我酸了": {
|
||||
"sticker_id": "224", "package_id": "1003", "name": "我酸了",
|
||||
"description": "嫉妒 柠檬精 羡慕 吃柠檬 眼红 恰柠檬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"打call": {
|
||||
"sticker_id": "246", "package_id": "1003", "name": "打call",
|
||||
"description": "应援 加油 支持 喝彩 助威 call",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"庆祝": {
|
||||
"sticker_id": "251", "package_id": "1003", "name": "庆祝",
|
||||
"description": "祝贺 开心 耶 party 胜利 干杯",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"奋斗": {
|
||||
"sticker_id": "151", "package_id": "1003", "name": "奋斗",
|
||||
"description": "努力 加油 拼搏 冲 干劲 卷起来",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"惊讶": {
|
||||
"sticker_id": "143", "package_id": "1003", "name": "惊讶",
|
||||
"description": "震惊 哇 不敢相信 OMG 居然 这么离谱",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"疑问": {
|
||||
"sticker_id": "144", "package_id": "1003", "name": "疑问",
|
||||
"description": "问号 不懂 啥 为什么 啥情况 懵逼问",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"仔细分析": {
|
||||
"sticker_id": "248", "package_id": "1003", "name": "仔细分析",
|
||||
"description": "思考 推敲 认真 研究 琢磨 让我想想",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"撅嘴": {
|
||||
"sticker_id": "184", "package_id": "1003", "name": "撅嘴",
|
||||
"description": "嘟嘴 卖萌 不高兴 撒娇 嘴翘",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"泪奔": {
|
||||
"sticker_id": "199", "package_id": "1003", "name": "泪奔",
|
||||
"description": "大哭 伤心 破防 感动哭 泪流满面 呜呜",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"尊嘟假嘟": {
|
||||
"sticker_id": "276", "package_id": "1003", "name": "尊嘟假嘟",
|
||||
"description": "真的假的 真假 可爱问 你骗我 是不是",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"略略略": {
|
||||
"sticker_id": "113", "package_id": "1003", "name": "略略略",
|
||||
"description": "调皮 吐舌 不服 略 气死你 鬼脸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"困": {
|
||||
"sticker_id": "180", "package_id": "1003", "name": "困",
|
||||
"description": "想睡 倦 打哈欠 睁不开眼 好困啊 sleepy",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"折磨": {
|
||||
"sticker_id": "181", "package_id": "1003", "name": "折磨",
|
||||
"description": "难受 痛苦 煎熬 蚌埠住了 受不了 要命",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"抠鼻": {
|
||||
"sticker_id": "182", "package_id": "1003", "name": "抠鼻",
|
||||
"description": "不屑 无聊 淡定 无所谓 鄙视 挖鼻",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"鼓掌": {
|
||||
"sticker_id": "183", "package_id": "1003", "name": "鼓掌",
|
||||
"description": "拍手 叫好 赞同 666 喝彩 掌声",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"斜眼笑": {
|
||||
"sticker_id": "204", "package_id": "1003", "name": "斜眼笑",
|
||||
"description": "滑稽 坏笑 doge 意味深长 阴阳怪气 嘿嘿嘿",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"辣眼睛": {
|
||||
"sticker_id": "216", "package_id": "1003", "name": "辣眼睛",
|
||||
"description": "看不下去 cringe 毁三观 太丑了 瞎了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哦哟": {
|
||||
"sticker_id": "217", "package_id": "1003", "name": "哦哟",
|
||||
"description": "惊讶 起哄 哇哦 有戏 不简单 哟",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吃瓜": {
|
||||
"sticker_id": "222", "package_id": "1003", "name": "吃瓜",
|
||||
"description": "围观 看戏 八卦 路人 看热闹 板凳",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"狗头": {
|
||||
"sticker_id": "225", "package_id": "1003", "name": "狗头",
|
||||
"description": "doge 保命 开玩笑 滑稽 反讽 懂的都懂",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"敬礼": {
|
||||
"sticker_id": "227", "package_id": "1003", "name": "敬礼",
|
||||
"description": "salute 尊重 收到 遵命 致敬 报告",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哦": {
|
||||
"sticker_id": "231", "package_id": "1003", "name": "哦",
|
||||
"description": "知道了 明白 敷衍 嗯 这样啊 收到",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"拿到红包": {
|
||||
"sticker_id": "236", "package_id": "1003", "name": "拿到红包",
|
||||
"description": "红包 谢谢老板 发财 开心 抢到了 欧气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"牛吖": {
|
||||
"sticker_id": "239", "package_id": "1003", "name": "牛吖",
|
||||
"description": "牛 厉害 强 666 佩服 大佬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"贴贴": {
|
||||
"sticker_id": "272", "package_id": "1003", "name": "贴贴",
|
||||
"description": "抱抱 亲昵 蹭蹭 亲密 靠靠 撒娇贴",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"爱心": {
|
||||
"sticker_id": "138", "package_id": "1003", "name": "爱心",
|
||||
"description": "心 love 喜欢你 红心 示爱 么么哒",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"晚安": {
|
||||
"sticker_id": "170", "package_id": "1003", "name": "晚安",
|
||||
"description": "好梦 睡了 night 早点休息 安啦 moon",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"太阳": {
|
||||
"sticker_id": "176", "package_id": "1003", "name": "太阳",
|
||||
"description": "晴天 早上好 阳光 morning 好天气 日",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"柠檬": {
|
||||
"sticker_id": "266", "package_id": "1003", "name": "柠檬",
|
||||
"description": "酸 嫉妒 柠檬精 羡慕 我酸 恰柠檬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"大冤种": {
|
||||
"sticker_id": "267", "package_id": "1003", "name": "大冤种",
|
||||
"description": "倒霉 吃亏 自嘲 好心没好报 背锅 工具人",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吐了": {
|
||||
"sticker_id": "132", "package_id": "1003", "name": "吐了",
|
||||
"description": "恶心 yue 受不了 嫌弃 想吐 生理不适",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"怒": {
|
||||
"sticker_id": "134", "package_id": "1003", "name": "怒",
|
||||
"description": "生气 愤怒 火大 暴躁 气炸 怼",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"玫瑰": {
|
||||
"sticker_id": "165", "package_id": "1003", "name": "玫瑰",
|
||||
"description": "花 示爱 表白 浪漫 送你花 情人节",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"凋谢": {
|
||||
"sticker_id": "119", "package_id": "1003", "name": "凋谢",
|
||||
"description": "花谢 失恋 难过 枯萎 心碎 凉了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"点赞": {
|
||||
"sticker_id": "159", "package_id": "1003", "name": "点赞",
|
||||
"description": "赞 认同 好棒 good like 大拇指 顶",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"握手": {
|
||||
"sticker_id": "164", "package_id": "1003", "name": "握手",
|
||||
"description": "合作 你好 商务 hello deal 成交 友好",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"抱拳": {
|
||||
"sticker_id": "163", "package_id": "1003", "name": "抱拳",
|
||||
"description": "谢谢 失敬 江湖 承让 拜托 有礼",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"ok": {
|
||||
"sticker_id": "169", "package_id": "1003", "name": "ok",
|
||||
"description": "好的 收到 没问题 okay 行 可以 懂了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"拳头": {
|
||||
"sticker_id": "174", "package_id": "1003", "name": "拳头",
|
||||
"description": "加油 干 冲 fight 力量 击拳 硬气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"鞭炮": {
|
||||
"sticker_id": "191", "package_id": "1003", "name": "鞭炮",
|
||||
"description": "过年 喜庆 爆竹 春节 噼里啪啦 红",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"烟花": {
|
||||
"sticker_id": "258", "package_id": "1003", "name": "烟花",
|
||||
"description": "庆典 漂亮 新年 嘭 绽放 节日快乐",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_sticker_by_name(name: str) -> Optional[dict]:
|
||||
"""
|
||||
按名称查找贴纸,支持模糊匹配。
|
||||
|
||||
匹配优先级:
|
||||
1. 完全相等(name)
|
||||
2. name 包含查询词(前缀/子串)
|
||||
3. description 包含查询词(同义词搜索)
|
||||
4. 通用模糊评分(与 sticker-search 同算法),命中即返回得分最高的一条
|
||||
|
||||
返回 sticker dict,找不到返回 None。
|
||||
"""
|
||||
if not name:
|
||||
return None
|
||||
|
||||
query = name.strip()
|
||||
|
||||
if query in STICKER_MAP:
|
||||
return STICKER_MAP[query]
|
||||
|
||||
for key, sticker in STICKER_MAP.items():
|
||||
if query in key or key in query:
|
||||
return sticker
|
||||
|
||||
for sticker in STICKER_MAP.values():
|
||||
desc = sticker.get("description", "")
|
||||
if query in desc:
|
||||
return sticker
|
||||
|
||||
matches = search_stickers(query, limit=1)
|
||||
return matches[0] if matches else None
|
||||
|
||||
|
||||
def get_random_sticker(category: str = None) -> dict:
|
||||
"""
|
||||
随机返回一个贴纸。
|
||||
|
||||
若指定 category,则在 description 中含有该关键词的贴纸里随机选取;
|
||||
category 为 None 时从全表随机。
|
||||
"""
|
||||
if category:
|
||||
candidates = [
|
||||
s for s in STICKER_MAP.values()
|
||||
if category in s.get("description", "") or category in s.get("name", "")
|
||||
]
|
||||
if candidates:
|
||||
return random.choice(candidates)
|
||||
return random.choice(list(STICKER_MAP.values()))
|
||||
|
||||
|
||||
def get_sticker_by_id(sticker_id: str) -> Optional[dict]:
|
||||
"""按 sticker_id 精确查找贴纸。"""
|
||||
if not sticker_id:
|
||||
return None
|
||||
sid = str(sticker_id).strip()
|
||||
for sticker in STICKER_MAP.values():
|
||||
if sticker.get("sticker_id") == sid:
|
||||
return sticker
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 模糊搜索(对齐 chatbot-web yuanbao-openclaw-plugin/sticker-cache.ts.searchStickers)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PUNCT_RE = re.compile(r"[\s\u3000\-_·.,,。!!??\"“”'‘’、/\\]+")
|
||||
|
||||
|
||||
def _normalize_text(raw: str) -> str:
|
||||
return unicodedata.normalize("NFKC", str(raw or "")).strip().lower()
|
||||
|
||||
|
||||
def _compact_text(raw: str) -> str:
|
||||
return _PUNCT_RE.sub("", _normalize_text(raw))
|
||||
|
||||
|
||||
def _multiset_char_hit_ratio(needle: str, haystack: str) -> float:
|
||||
if not needle:
|
||||
return 0.0
|
||||
bag: dict[str, int] = {}
|
||||
for ch in haystack:
|
||||
bag[ch] = bag.get(ch, 0) + 1
|
||||
hits = 0
|
||||
for ch in needle:
|
||||
n = bag.get(ch, 0)
|
||||
if n > 0:
|
||||
hits += 1
|
||||
bag[ch] = n - 1
|
||||
return hits / len(needle)
|
||||
|
||||
|
||||
def _bigram_jaccard(a: str, b: str) -> float:
|
||||
if len(a) < 2 or len(b) < 2:
|
||||
return 0.0
|
||||
A = {a[i:i + 2] for i in range(len(a) - 1)}
|
||||
B = {b[i:i + 2] for i in range(len(b) - 1)}
|
||||
inter = len(A & B)
|
||||
union = len(A) + len(B) - inter
|
||||
return inter / union if union else 0.0
|
||||
|
||||
|
||||
def _longest_subsequence_ratio(needle: str, haystack: str) -> float:
|
||||
if not needle:
|
||||
return 0.0
|
||||
j = 0
|
||||
for ch in haystack:
|
||||
if j >= len(needle):
|
||||
break
|
||||
if ch == needle[j]:
|
||||
j += 1
|
||||
return j / len(needle)
|
||||
|
||||
|
||||
def _score_field(haystack: str, query: str) -> float:
|
||||
hay = _normalize_text(haystack)
|
||||
q = _normalize_text(query)
|
||||
if not hay or not q:
|
||||
return 0.0
|
||||
hay_c = _compact_text(haystack)
|
||||
q_c = _compact_text(query)
|
||||
best = 0.0
|
||||
if hay == q:
|
||||
best = max(best, 100.0)
|
||||
if q in hay:
|
||||
best = max(best, 92 + min(6, len(q)))
|
||||
if len(q) >= 2 and hay.startswith(q):
|
||||
best = max(best, 88.0)
|
||||
if q_c and q_c in hay_c:
|
||||
best = max(best, 86.0)
|
||||
best = max(best, _multiset_char_hit_ratio(q_c, hay_c) * 62)
|
||||
best = max(best, _bigram_jaccard(q_c, hay_c) * 58)
|
||||
best = max(best, _longest_subsequence_ratio(q_c, hay_c) * 52)
|
||||
if len(q) == 1 and q in hay:
|
||||
best = max(best, 68.0)
|
||||
return best
|
||||
|
||||
|
||||
def search_stickers(query: str, limit: int = 10) -> list[dict]:
|
||||
"""
|
||||
在内置贴纸表中按模糊匹配排序返回前 N 条结果。
|
||||
|
||||
评分综合 name/description 字段的子串、字符多重集覆盖、bigram Jaccard、子序列比例。
|
||||
name 权重略高于 description(×0.88)。空 query 时按字典顺序返回前 N 条。
|
||||
"""
|
||||
safe_limit = max(1, min(500, int(limit) if limit else 10))
|
||||
if not query or not _normalize_text(query):
|
||||
return list(STICKER_MAP.values())[:safe_limit]
|
||||
|
||||
scored: list[tuple[float, dict]] = []
|
||||
for sticker in STICKER_MAP.values():
|
||||
name_s = _score_field(sticker.get("name", ""), query)
|
||||
desc_s = _score_field(sticker.get("description", ""), query) * 0.88
|
||||
sid = str(sticker.get("sticker_id", "")).strip()
|
||||
q_norm = _normalize_text(query)
|
||||
id_s = 0.0
|
||||
if sid and q_norm:
|
||||
sid_norm = _normalize_text(sid)
|
||||
if sid_norm == q_norm:
|
||||
id_s = 100.0
|
||||
elif q_norm in sid_norm:
|
||||
id_s = 84.0
|
||||
scored.append((max(name_s, desc_s, id_s), sticker))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
top = scored[0][0] if scored else 0
|
||||
if top <= 0:
|
||||
return [s for _, s in scored[:safe_limit]]
|
||||
|
||||
if top >= 22:
|
||||
floor = 18.0
|
||||
elif top >= 12:
|
||||
floor = max(10.0, top * 0.5)
|
||||
else:
|
||||
floor = max(6.0, top * 0.35)
|
||||
|
||||
filtered = [pair for pair in scored if pair[0] >= floor]
|
||||
out = filtered if filtered else scored
|
||||
return [s for _, s in out[:safe_limit]]
|
||||
|
||||
|
||||
def build_face_msg_body(
|
||||
face_index: int,
|
||||
face_type: int = 1,
|
||||
data: Optional[str] = None,
|
||||
) -> list:
|
||||
"""
|
||||
构造 TIMFaceElem 消息体。
|
||||
|
||||
Yuanbao 约定:
|
||||
- index 固定传 0(服务端通过 data 字段识别具体表情)
|
||||
- data 为 JSON 字符串,包含 sticker_id / package_id 等字段
|
||||
|
||||
Args:
|
||||
face_index: 保留字段,暂时不影响 wire format(Yuanbao 固定 index=0)。
|
||||
当 face_index > 0 时视为旧版 QQ 表情 ID,直接放入 index。
|
||||
face_type: 保留字段(兼容旧接口,当前未使用)。
|
||||
data: 已序列化的 JSON 字符串;为 None 时仅传 index。
|
||||
|
||||
Returns:
|
||||
符合 Yuanbao TIM 协议的 msg_body list,如::
|
||||
|
||||
[{"msg_type": "TIMFaceElem", "msg_content": {"index": 0, "data": "..."}}]
|
||||
"""
|
||||
msg_content: dict = {"index": face_index}
|
||||
if data is not None:
|
||||
msg_content["data"] = data
|
||||
return [{"msg_type": "TIMFaceElem", "msg_content": msg_content}]
|
||||
|
||||
|
||||
def build_sticker_msg_body(sticker: dict) -> list:
|
||||
"""
|
||||
从 STICKER_MAP 中的 sticker dict 直接构造 TIMFaceElem 消息体。
|
||||
|
||||
这是 send_sticker() 的内部辅助,确保 data 字段与原始 JS 插件一致。
|
||||
"""
|
||||
data_payload = json.dumps(
|
||||
{
|
||||
"sticker_id": sticker["sticker_id"],
|
||||
"package_id": sticker["package_id"],
|
||||
"width": sticker.get("width", 128),
|
||||
"height": sticker.get("height", 128),
|
||||
"formats": sticker.get("formats", "png"),
|
||||
"name": sticker["name"],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
separators=(",", ":"),
|
||||
)
|
||||
return build_face_msg_body(face_index=0, data=data_payload)
|
||||
+777
-2915
File diff suppressed because it is too large
Load Diff
@@ -1,150 +0,0 @@
|
||||
"""Gateway runtime-metadata footer.
|
||||
|
||||
Renders a compact footer showing runtime state (model, context %, cwd) and
|
||||
appends it to the FINAL message of an agent turn when enabled. Off by default
|
||||
to keep replies minimal.
|
||||
|
||||
Config (``~/.hermes/config.yaml``)::
|
||||
|
||||
display:
|
||||
runtime_footer:
|
||||
enabled: true # off by default
|
||||
fields: [model, context_pct, cwd] # order shown; drop any to hide
|
||||
|
||||
Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
|
||||
Users can toggle the global setting with ``/footer on|off`` from both the CLI
|
||||
and any gateway platform.
|
||||
|
||||
The footer is appended to the final response text in ``gateway/run.py`` right
|
||||
before returning the response to the adapter send path — so it only lands on
|
||||
the final message a user sees, not on tool-progress updates or streaming
|
||||
partials. When streaming is on and the final text has already been delivered
|
||||
piecemeal, the footer is sent as a separate trailing message via
|
||||
``send_trailing_footer()``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
|
||||
_SEP = " · "
|
||||
|
||||
|
||||
def _home_relative_cwd(cwd: str) -> str:
|
||||
"""Return *cwd* with ``$HOME`` collapsed to ``~``. Empty string if unset."""
|
||||
if not cwd:
|
||||
return ""
|
||||
try:
|
||||
home = os.path.expanduser("~")
|
||||
p = os.path.abspath(cwd)
|
||||
if home and (p == home or p.startswith(home + os.sep)):
|
||||
return "~" + p[len(home):]
|
||||
return p
|
||||
except Exception:
|
||||
return cwd
|
||||
|
||||
|
||||
def _model_short(model: Optional[str]) -> str:
|
||||
"""Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
|
||||
if not model:
|
||||
return ""
|
||||
return model.rsplit("/", 1)[-1]
|
||||
|
||||
|
||||
def resolve_footer_config(
|
||||
user_config: dict[str, Any] | None,
|
||||
platform_key: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Resolve effective runtime-footer config for *platform_key*.
|
||||
|
||||
Merge order (later wins):
|
||||
1. Built-in defaults (enabled=False)
|
||||
2. ``display.runtime_footer``
|
||||
3. ``display.platforms.<platform_key>.runtime_footer``
|
||||
"""
|
||||
resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
|
||||
cfg = (user_config or {}).get("display") or {}
|
||||
|
||||
global_cfg = cfg.get("runtime_footer")
|
||||
if isinstance(global_cfg, dict):
|
||||
if "enabled" in global_cfg:
|
||||
resolved["enabled"] = bool(global_cfg.get("enabled"))
|
||||
if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
|
||||
resolved["fields"] = [str(f) for f in global_cfg["fields"]]
|
||||
|
||||
if platform_key:
|
||||
platforms = cfg.get("platforms") or {}
|
||||
plat_cfg = platforms.get(platform_key)
|
||||
if isinstance(plat_cfg, dict):
|
||||
plat_footer = plat_cfg.get("runtime_footer")
|
||||
if isinstance(plat_footer, dict):
|
||||
if "enabled" in plat_footer:
|
||||
resolved["enabled"] = bool(plat_footer.get("enabled"))
|
||||
if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
|
||||
resolved["fields"] = [str(f) for f in plat_footer["fields"]]
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def format_runtime_footer(
|
||||
*,
|
||||
model: Optional[str],
|
||||
context_tokens: int,
|
||||
context_length: Optional[int],
|
||||
cwd: Optional[str] = None,
|
||||
fields: Iterable[str] = _DEFAULT_FIELDS,
|
||||
) -> str:
|
||||
"""Render the footer line, or return "" if no fields have data.
|
||||
|
||||
Fields are skipped silently when their underlying data is missing — a
|
||||
partially-populated footer is better than a line with ``?%`` or empty slots.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
for field in fields:
|
||||
if field == "model":
|
||||
m = _model_short(model)
|
||||
if m:
|
||||
parts.append(m)
|
||||
elif field == "context_pct":
|
||||
if context_length and context_length > 0 and context_tokens >= 0:
|
||||
pct = max(0, min(100, round((context_tokens / context_length) * 100)))
|
||||
parts.append(f"{pct}%")
|
||||
elif field == "cwd":
|
||||
rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
|
||||
if rel:
|
||||
parts.append(rel)
|
||||
# Unknown field names are silently ignored.
|
||||
|
||||
if not parts:
|
||||
return ""
|
||||
return _SEP.join(parts)
|
||||
|
||||
|
||||
def build_footer_line(
|
||||
*,
|
||||
user_config: dict[str, Any] | None,
|
||||
platform_key: str | None,
|
||||
model: Optional[str],
|
||||
context_tokens: int,
|
||||
context_length: Optional[int],
|
||||
cwd: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Top-level entry point used by gateway/run.py.
|
||||
|
||||
Returns the footer text (empty string when disabled or no data). Callers
|
||||
append this to the final response themselves, preserving a single blank
|
||||
line of separation.
|
||||
"""
|
||||
cfg = resolve_footer_config(user_config, platform_key)
|
||||
if not cfg.get("enabled"):
|
||||
return ""
|
||||
return format_runtime_footer(
|
||||
model=model,
|
||||
context_tokens=context_tokens,
|
||||
context_length=context_length,
|
||||
cwd=cwd,
|
||||
fields=cfg.get("fields") or _DEFAULT_FIELDS,
|
||||
)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user