Compare commits
22 Commits
bb/cli-res
...
sid/founda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
987962f453 | ||
|
|
25072fe690 | ||
|
|
ff99611e16 | ||
|
|
76aebd73c3 | ||
|
|
a1e667b9f2 | ||
|
|
4b16341975 | ||
|
|
65ca3ba93b | ||
|
|
8bff8bf2c0 | ||
|
|
acd1f17b88 | ||
|
|
850973295e | ||
|
|
847ffca715 | ||
|
|
67bc441099 | ||
|
|
a9ed7cb3b4 | ||
|
|
15ac253b11 | ||
|
|
fb6d37495b | ||
|
|
72e7c0ce34 | ||
|
|
f8d2365795 | ||
|
|
ca2b6a529e | ||
|
|
224e6d46d9 | ||
|
|
d3dde0b459 | ||
|
|
3f4c5ac71e | ||
|
|
08c378356d |
@@ -5,15 +5,7 @@
|
||||
|
||||
# Dependencies
|
||||
node_modules
|
||||
**/node_modules
|
||||
.venv
|
||||
**/.venv
|
||||
|
||||
# Built artifacts that are regenerated inside the image. Excluded so local
|
||||
# rebuilds on the developer's machine don't invalidate the npm-install layer
|
||||
# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
|
||||
ui-tui/dist/
|
||||
ui-tui/packages/hermes-ink/dist/
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
@@ -22,10 +14,3 @@ ui-tui/packages/hermes-ink/dist/
|
||||
.env
|
||||
|
||||
*.md
|
||||
|
||||
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
|
||||
data/
|
||||
|
||||
# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues)
|
||||
hermes-config/
|
||||
runtime/
|
||||
|
||||
80
.env.example
80
.env.example
@@ -14,14 +14,6 @@
|
||||
# LLM_MODEL is no longer read from .env — this line is kept for reference only.
|
||||
# LLM_MODEL=anthropic/claude-opus-4.6
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (NovitaAI)
|
||||
# =============================================================================
|
||||
# NovitaAI — 90+ models, pay-per-use
|
||||
# Get your key at: https://novita.ai/settings/key-management
|
||||
# NOVITA_API_KEY=
|
||||
# NOVITA_BASE_URL=https://api.novita.ai/openai/v1 # Override default base URL
|
||||
|
||||
# =============================================================================
|
||||
# LLM PROVIDER (Google AI Studio / Gemini)
|
||||
# =============================================================================
|
||||
@@ -151,18 +143,6 @@
|
||||
# Also requires ~/.honcho/config.json with enabled=true (see README).
|
||||
# HONCHO_API_KEY=
|
||||
|
||||
# =============================================================================
|
||||
# HYPERLIQUID OPTIONAL SKILL
|
||||
# =============================================================================
|
||||
# Optional defaults for the Hyperliquid skill in optional-skills/blockchain/hyperliquid
|
||||
#
|
||||
# Hyperliquid API base URL override
|
||||
# Default: https://api.hyperliquid.xyz
|
||||
# HYPERLIQUID_API_URL=https://api.hyperliquid-testnet.xyz
|
||||
#
|
||||
# Default address for account-level commands like state, fills, orders, and review
|
||||
# HYPERLIQUID_USER_ADDRESS=0x0000000000000000000000000000000000000000
|
||||
|
||||
# =============================================================================
|
||||
# TERMINAL TOOL CONFIGURATION
|
||||
# =============================================================================
|
||||
@@ -264,15 +244,6 @@ BROWSERBASE_PROXIES=true
|
||||
# Uses custom Chromium build to avoid bot detection altogether
|
||||
BROWSERBASE_ADVANCED_STEALTH=false
|
||||
|
||||
# Browser engine for local mode (default: auto = Chrome)
|
||||
# "auto" — use Chrome (don't pass --engine flag)
|
||||
# "lightpanda" — use Lightpanda (1.3-5.8x faster navigation, no screenshots)
|
||||
# "chrome" — explicitly request Chrome
|
||||
# Requires agent-browser v0.25.3+. Lightpanda commands that fail or return
|
||||
# empty results are automatically retried with Chrome.
|
||||
# Also configurable via browser.engine in config.yaml.
|
||||
# AGENT_BROWSER_ENGINE=auto
|
||||
|
||||
# Browser session timeout in seconds (default: 300)
|
||||
# Sessions are cleaned up after this duration of inactivity
|
||||
BROWSER_SESSION_TIMEOUT=300
|
||||
@@ -281,20 +252,6 @@ BROWSER_SESSION_TIMEOUT=300
|
||||
# Browser sessions are automatically closed after this period of no activity
|
||||
BROWSER_INACTIVITY_TIMEOUT=120
|
||||
|
||||
# Camofox local anti-detection browser (Camoufox-based Firefox).
|
||||
# Set CAMOFOX_URL to route the browser tools through a local Camofox server
|
||||
# instead of agent-browser/Browserbase. See docs/user-guide/features/browser.md.
|
||||
# CAMOFOX_URL=http://localhost:9377
|
||||
|
||||
# Externally managed Camofox sessions — when another app owns the visible
|
||||
# Camofox browser, set these so Hermes shares the same userId/profile instead
|
||||
# of creating its own isolated session.
|
||||
# CAMOFOX_USER_ID=
|
||||
# CAMOFOX_SESSION_KEY=
|
||||
# Set to true to reuse an already-open Camofox tab for this identity before
|
||||
# creating a new one (useful for gateway restarts).
|
||||
# CAMOFOX_ADOPT_EXISTING_TAB=false
|
||||
|
||||
# =============================================================================
|
||||
# SESSION LOGGING
|
||||
# =============================================================================
|
||||
@@ -441,40 +398,3 @@ IMAGE_TOOLS_DEBUG=false
|
||||
# Override STT provider endpoints (for proxies or self-hosted instances)
|
||||
# GROQ_BASE_URL=https://api.groq.com/openai/v1
|
||||
# STT_OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# MICROSOFT TEAMS INTEGRATION
|
||||
# =============================================================================
|
||||
# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
|
||||
# Or use Azure Portal: Azure Active Directory → App registrations → New registration
|
||||
# Then add the bot to Teams via the Bot Framework or App Studio.
|
||||
#
|
||||
# TEAMS_CLIENT_ID= # Azure AD App (client) ID
|
||||
# TEAMS_CLIENT_SECRET= # Azure AD client secret value
|
||||
# TEAMS_TENANT_ID= # Azure AD tenant ID (or "common" for multi-tenant)
|
||||
# TEAMS_ALLOWED_USERS= # Comma-separated AAD object IDs or UPNs
|
||||
# TEAMS_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# TEAMS_HOME_CHANNEL= # Default channel/chat ID for cron delivery
|
||||
# TEAMS_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
# TEAMS_PORT=3978 # Webhook listen port (Bot Framework default)
|
||||
|
||||
# =============================================================================
|
||||
# GOOGLE CHAT INTEGRATION
|
||||
# =============================================================================
|
||||
# Connects via Cloud Pub/Sub pull subscription (no public URL required).
|
||||
# Setup walkthrough: website/docs/user-guide/messaging/google_chat.md.
|
||||
# 1. Create a GCP project, enable the Google Chat API and Cloud Pub/Sub.
|
||||
# 2. Create a Service Account with roles/pubsub.subscriber on the
|
||||
# subscription (NOT project-wide); download the JSON key.
|
||||
# 3. Configure your Chat app at console.cloud.google.com/apis/credentials
|
||||
# → Google Chat API → Configuration → Cloud Pub/Sub topic.
|
||||
# 4. (Optional, for native attachment delivery) Each user runs
|
||||
# `/setup-files` once in their own DM after Pub/Sub is wired up.
|
||||
#
|
||||
# GOOGLE_CHAT_PROJECT_ID= # GCP project hosting the topic (or set GOOGLE_CLOUD_PROJECT)
|
||||
# GOOGLE_CHAT_SUBSCRIPTION_NAME= # Full path: projects/<id>/subscriptions/<name>
|
||||
# GOOGLE_CHAT_SERVICE_ACCOUNT_JSON= # Path to SA JSON (or set GOOGLE_APPLICATION_CREDENTIALS)
|
||||
# GOOGLE_CHAT_ALLOWED_USERS= # Comma-separated emails allowed to talk to the bot
|
||||
# GOOGLE_CHAT_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# GOOGLE_CHAT_HOME_CHANNEL= # Default space (spaces/XXXX) for cron delivery
|
||||
# GOOGLE_CHAT_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
|
||||
5
.git-blame-ignore-revs
Normal file
5
.git-blame-ignore-revs
Normal file
@@ -0,0 +1,5 @@
|
||||
# hermes_agent package restructure (PR 1/3)
|
||||
# Commit 2: pure git mv — all source files into hermes_agent/
|
||||
65ca3ba93b3fa7fd2b15af5b62d54020061f3672
|
||||
# Commit 3: rewrite all imports for hermes_agent package
|
||||
4b16341975a1217588054f567d0f76dc5a3cc481
|
||||
47
.github/actions/hermes-smoke-test/action.yml
vendored
47
.github/actions/hermes-smoke-test/action.yml
vendored
@@ -1,47 +0,0 @@
|
||||
name: Hermes smoke test
|
||||
description: >
|
||||
Run the image's built-in entrypoint against `--help` and `dashboard --help`
|
||||
to catch basic runtime regressions before publishing. Requires the image
|
||||
to already be loaded into the local Docker daemon under `image`.
|
||||
|
||||
Works identically on amd64 and arm64 runners.
|
||||
|
||||
inputs:
|
||||
image:
|
||||
description: Fully-qualified image tag (e.g. nousresearch/hermes-agent:test)
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Ensure /tmp/hermes-test is hermes-writable
|
||||
shell: bash
|
||||
run: |
|
||||
# The image runs as the hermes user (UID 10000). GitHub Actions
|
||||
# creates /tmp/hermes-test root-owned by default, which hermes
|
||||
# can't write to — chown it to match the in-container UID before
|
||||
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
|
||||
# with their own UID hit the same issue and have their own
|
||||
# remediations (HERMES_UID env var, or chown locally).
|
||||
mkdir -p /tmp/hermes-test
|
||||
sudo chown -R 10000:10000 /tmp/hermes-test
|
||||
|
||||
- name: hermes --help
|
||||
shell: bash
|
||||
run: |
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
"${{ inputs.image }}" --help
|
||||
|
||||
- name: hermes dashboard --help
|
||||
shell: bash
|
||||
run: |
|
||||
# Regression guard for #9153: dashboard was present in source but
|
||||
# missing from the published image. If this fails, something in
|
||||
# the Dockerfile is excluding the dashboard subcommand from the
|
||||
# installed package.
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
"${{ inputs.image }}" dashboard --help
|
||||
14
.github/actions/nix-setup/action.yml
vendored
14
.github/actions/nix-setup/action.yml
vendored
@@ -1,18 +1,8 @@
|
||||
name: 'Setup Nix'
|
||||
description: 'Install Nix and configure Cachix binary cache'
|
||||
|
||||
inputs:
|
||||
cachix-auth-token:
|
||||
description: 'Cachix auth token (enables push). Omit for read-only.'
|
||||
required: false
|
||||
default: ''
|
||||
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
|
||||
with:
|
||||
name: hermes-agent
|
||||
authToken: ${{ inputs.cachix-auth-token }}
|
||||
continue-on-error: true
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
|
||||
44
.github/dependabot.yml
vendored
44
.github/dependabot.yml
vendored
@@ -1,44 +0,0 @@
|
||||
# Dependabot configuration for hermes-agent.
|
||||
#
|
||||
# Deliberately scoped to github-actions only.
|
||||
#
|
||||
# We do NOT enable Dependabot for pip / npm / any source-dependency ecosystem
|
||||
# because we pin source dependencies exactly (uv.lock, package-lock.json) as
|
||||
# part of our supply-chain posture. Automatic version-bump PRs against those
|
||||
# pins would undermine the strategy — pins are moved deliberately, after
|
||||
# review, not on a schedule.
|
||||
#
|
||||
# github-actions is the exception: action pins (we use full commit SHAs per
|
||||
# supply-chain policy) must be updated when upstream actions publish
|
||||
# patches — usually themselves security fixes. Dependabot opens a PR with
|
||||
# the new SHA and release notes; we review and merge like any other PR.
|
||||
#
|
||||
# Security-update PRs for source dependencies (opened ONLY when a CVE is
|
||||
# published affecting a currently-pinned version) are enabled separately
|
||||
# via the repo's Dependabot security updates setting
|
||||
# (Settings → Code security → Dependabot → Dependabot security updates).
|
||||
# Those are CVE-only, not schedule-driven, and do not conflict with our
|
||||
# pinning strategy — they fire when a pinned version becomes known-bad,
|
||||
# which is exactly when we want to move the pin.
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- "dependencies"
|
||||
- "github-actions"
|
||||
commit-message:
|
||||
prefix: "chore(actions)"
|
||||
include: "scope"
|
||||
groups:
|
||||
# Batch routine action bumps into one PR per week to reduce noise.
|
||||
# Security updates still open individually and bypass grouping.
|
||||
actions-minor-patch:
|
||||
update-types:
|
||||
- "minor"
|
||||
- "patch"
|
||||
13
.github/workflows/deploy-site.yml
vendored
13
.github/workflows/deploy-site.yml
vendored
@@ -53,9 +53,6 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Build skills index (if not already present)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -76,16 +73,6 @@ jobs:
|
||||
run: |
|
||||
mkdir -p _site/docs
|
||||
cp -r website/build/* _site/docs/
|
||||
# llms.txt / llms-full.txt are also published at the site root
|
||||
# (https://hermes-agent.nousresearch.com/llms.txt) because some
|
||||
# agents and IDE plugins probe the classic root-level path rather
|
||||
# than /docs/llms.txt. Same file, two URLs, one source of truth.
|
||||
if [ -f website/build/llms.txt ]; then
|
||||
cp website/build/llms.txt _site/llms.txt
|
||||
fi
|
||||
if [ -f website/build/llms-full.txt ]; then
|
||||
cp website/build/llms-full.txt _site/llms-full.txt
|
||||
fi
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
|
||||
|
||||
527
.github/workflows/docker-publish.yml
vendored
527
.github/workflows/docker-publish.yml
vendored
@@ -10,60 +10,37 @@ on:
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
- '.github/actions/hermes-smoke-test/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
- '.github/actions/hermes-smoke-test/**'
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Concurrency: push/release runs are NEVER cancelled so every merge gets its
|
||||
# own SHA-tagged image; :main and :latest are guarded separately by the
|
||||
# move-main and move-latest jobs. PR runs reuse a PR-scoped group with
|
||||
# cancel-in-progress: true so rapid pushes to the same PR collapse to the
|
||||
# latest commit.
|
||||
concurrency:
|
||||
group: docker-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
env:
|
||||
IMAGE_NAME: nousresearch/hermes-agent
|
||||
group: docker-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build amd64 natively. This job also runs the smoke tests (basic --help
|
||||
# and the dashboard subcommand regression guard from #9153), because amd64
|
||||
# is the only arch we can `load` into the local daemon on an amd64 runner.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-amd64:
|
||||
build-and-push:
|
||||
# Only run on the upstream repository, not on forks
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
# Build amd64 only so we can `load` the image for smoke testing.
|
||||
# `load: true` cannot export a multi-arch manifest to the local daemon.
|
||||
# The multi-arch build follows on push to main / release.
|
||||
- name: Build image (amd64, smoke test)
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
@@ -71,14 +48,24 @@ jobs:
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/amd64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
cache-from: type=gha,scope=docker-amd64
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
tags: nousresearch/hermes-agent:test
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Smoke test image
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
- name: Test image starts
|
||||
run: |
|
||||
# The image runs as the hermes user (UID 10000). GitHub Actions
|
||||
# creates /tmp/hermes-test root-owned by default, which hermes
|
||||
# can't write to — chown it to match the in-container UID before
|
||||
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
|
||||
# with their own UID hit the same issue and have their own
|
||||
# remediations (HERMES_UID env var, or chown locally).
|
||||
mkdir -p /tmp/hermes-test
|
||||
sudo chown -R 10000:10000 /tmp/hermes-test
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
nousresearch/hermes-agent:test --help
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
@@ -87,448 +74,26 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Push amd64 by digest only (no tag). The merge job assembles the
|
||||
# tagged manifest list. `push-by-digest=true` is docker's recommended
|
||||
# pattern for multi-runner multi-platform builds.
|
||||
#
|
||||
# We apply the OCI revision label here (and again on arm64) because
|
||||
# the move-main / move-latest jobs read it off the linux/amd64
|
||||
# sub-manifest config of the floating tag to decide whether it's safe
|
||||
# to advance. The label must be on each per-arch image — manifest
|
||||
# lists themselves don't carry image config labels.
|
||||
- name: Push amd64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
platforms: linux/amd64
|
||||
labels: |
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=gha,scope=docker-amd64
|
||||
cache-to: type=gha,mode=max,scope=docker-amd64
|
||||
|
||||
# Write the digest to a file and upload it as an artifact so the
|
||||
# merge job can stitch both per-arch digests into a manifest list.
|
||||
- name: Export digest
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.push.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: digest-amd64
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build arm64 natively on GitHub's free arm64 runner. This replaces the
|
||||
# previous QEMU-emulated arm64 build, which was ~5-10x slower and shared
|
||||
# a cache scope with amd64. Matches the amd64 job's shape: build+load,
|
||||
# smoke test, then on push/release push by digest.
|
||||
# ---------------------------------------------------------------------------
|
||||
build-arm64:
|
||||
if: github.repository == 'NousResearch/hermes-agent'
|
||||
runs-on: ubuntu-24.04-arm
|
||||
timeout-minutes: 45
|
||||
outputs:
|
||||
digest: ${{ steps.push.outputs.digest }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
# Build once, load into the local daemon for smoke testing. Cached
|
||||
# to gha with a per-arch scope; the push step below reuses every
|
||||
# layer from this build.
|
||||
- name: Build image (arm64, smoke test)
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
load: true
|
||||
platforms: linux/arm64
|
||||
tags: ${{ env.IMAGE_NAME }}:test
|
||||
cache-from: type=gha,scope=docker-arm64
|
||||
cache-to: type=gha,mode=max,scope=docker-arm64
|
||||
|
||||
- name: Smoke test image
|
||||
uses: ./.github/actions/hermes-smoke-test
|
||||
with:
|
||||
image: ${{ env.IMAGE_NAME }}:test
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Push arm64 by digest
|
||||
id: push
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
platforms: linux/arm64
|
||||
labels: |
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=gha,scope=docker-arm64
|
||||
cache-to: type=gha,mode=max,scope=docker-arm64
|
||||
|
||||
- name: Export digest
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.push.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest artifact
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: digest-arm64
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stitch both per-arch digests into a single tagged multi-arch manifest.
|
||||
# This is a registry-side operation — no building, no layer re-push —
|
||||
# so it runs in ~30 seconds. On main pushes it produces :sha-<sha>.
|
||||
# On releases it produces :<release_tag_name>.
|
||||
# ---------------------------------------------------------------------------
|
||||
merge:
|
||||
if: github.repository == 'NousResearch/hermes-agent' && (github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release')
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-amd64, build-arm64]
|
||||
timeout-minutes: 10
|
||||
outputs:
|
||||
pushed_sha_tag: ${{ steps.mark_pushed.outputs.pushed }}
|
||||
pushed_release_tag: ${{ steps.mark_release_pushed.outputs.pushed }}
|
||||
release_tag: ${{ steps.tag.outputs.tag }}
|
||||
steps:
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digest-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Compute the tag for this run. Main pushes use sha-<sha> (so every
|
||||
# commit gets its own immutable tag); releases use the release tag name.
|
||||
- name: Compute tag
|
||||
id: tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "release" ]; then
|
||||
echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "tag=sha-${{ github.sha }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Build the arg array from each digest file (filename = the digest
|
||||
# hex, with no sha256: prefix; empty file content, only the name
|
||||
# matters). Using an array avoids shellcheck SC2046 and keeps
|
||||
# every digest a single argv token even under pathological names.
|
||||
args=()
|
||||
for digest_file in *; do
|
||||
args+=("${IMAGE_NAME}@sha256:${digest_file}")
|
||||
done
|
||||
docker buildx imagetools create \
|
||||
-t "${IMAGE_NAME}:${TAG}" \
|
||||
"${args[@]}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
TAG: ${{ steps.tag.outputs.tag }}
|
||||
|
||||
# Signal to move-main that the SHA tag is live. Only on main pushes;
|
||||
# releases set pushed_release_tag instead.
|
||||
- name: Mark SHA tag pushed
|
||||
id: mark_pushed
|
||||
- name: Push multi-arch image (main branch)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: nousresearch/hermes-agent:latest
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
# Signal to move-latest that the release tag is live.
|
||||
- name: Mark release tag pushed
|
||||
id: mark_release_pushed
|
||||
- name: Push multi-arch image (release)
|
||||
if: github.event_name == 'release'
|
||||
run: echo "pushed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :main to point at the SHA tag the merge job pushed.
|
||||
#
|
||||
# :main is the floating tag that tracks the tip of the main branch. Every
|
||||
# merge to main retags :main forward. Users who want "latest dev build"
|
||||
# pull :main; users who want stable releases pull :latest.
|
||||
#
|
||||
# The real serialization guarantee comes from the top-level concurrency
|
||||
# group (`docker-${{ github.ref }}` with `cancel-in-progress: false`),
|
||||
# which ensures at most one workflow run for this ref executes at a time.
|
||||
# That means two move-main steps for the same ref cannot overlap.
|
||||
#
|
||||
# This job has its own concurrency group as defense-in-depth: if the
|
||||
# top-level group is ever loosened, queued move-mains will run serially
|
||||
# in arrival order, each one running the ancestor check below and either
|
||||
# advancing :main or skipping. `cancel-in-progress: false` matches the
|
||||
# top-level setting — we don't want rapid pushes to cancel a queued
|
||||
# move-main, because the ancestor check is the real safety mechanism
|
||||
# and queueing is cheap (move-main is a ~30s registry op).
|
||||
#
|
||||
# Combined with the ancestor check, this means :main only ever moves
|
||||
# forward in git history.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-main:
|
||||
if: |
|
||||
github.repository == 'NousResearch/hermes-agent'
|
||||
&& github.event_name == 'push'
|
||||
&& github.ref == 'refs/heads/main'
|
||||
&& needs.merge.outputs.pushed_sha_tag == 'true'
|
||||
needs: merge
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
concurrency:
|
||||
group: docker-move-main-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Read the git revision label off the current :main manifest, then
|
||||
# use `git merge-base --is-ancestor` to check whether our commit is a
|
||||
# descendant of it. If :main doesn't exist yet, or its label is
|
||||
# missing, we treat that as "safe to publish". If another run already
|
||||
# advanced :main past us (or diverged), we skip and leave it alone.
|
||||
- name: Decide whether to move :main
|
||||
id: main_check
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
|
||||
# Pull the JSON for the linux/amd64 sub-manifest's config and extract
|
||||
# the OCI revision label with jq — Go template field access can't
|
||||
# handle dots in map keys, so using json+jq is the robust route.
|
||||
image_json=$(
|
||||
docker buildx imagetools inspect "${image}:main" \
|
||||
--format '{{ json (index .Image "linux/amd64") }}' \
|
||||
2>/dev/null || true
|
||||
)
|
||||
|
||||
if [ -z "${image_json}" ]; then
|
||||
echo "No existing :main (or inspect failed) — safe to publish."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
current_sha=$(
|
||||
printf '%s' "${image_json}" \
|
||||
| jq -r '.config.Labels."org.opencontainers.image.revision" // ""'
|
||||
)
|
||||
|
||||
if [ -z "${current_sha}" ]; then
|
||||
echo "Registry :main has no revision label — safe to publish."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Registry :main is at ${current_sha}"
|
||||
echo "This run is at ${GITHUB_SHA}"
|
||||
|
||||
if [ "${current_sha}" = "${GITHUB_SHA}" ]; then
|
||||
echo ":main already points at our SHA — nothing to do."
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Make sure we have the :main commit locally for merge-base.
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
git fetch --no-tags --prune origin \
|
||||
"+refs/heads/main:refs/remotes/origin/main" \
|
||||
|| true
|
||||
fi
|
||||
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
echo "Registry :main points at an unknown commit (${current_sha}); refusing to overwrite."
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Our SHA must be a descendant of the current :main to be safe.
|
||||
if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then
|
||||
echo "Our commit is a descendant of :main — safe to advance."
|
||||
echo "push_main=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "Another run advanced :main past us (or diverged) — leaving it alone."
|
||||
echo "push_main=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Retag the already-pushed SHA manifest as :main. This is a registry-
|
||||
# side operation — no rebuild, no layer re-push — so it's quick and
|
||||
# atomic per-tag. The ancestor check above plus the cancel-in-progress
|
||||
# concurrency on this job together guarantee we only ever move :main
|
||||
# forward in git history.
|
||||
- name: Move :main to this SHA
|
||||
if: steps.main_check.outputs.push_main == 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
docker buildx imagetools create \
|
||||
--tag "${image}:main" \
|
||||
"${image}:sha-${GITHUB_SHA}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Move :latest to point at the release tag the merge job pushed.
|
||||
#
|
||||
# :latest is the floating tag that tracks the most recent stable release.
|
||||
# Only `release: published` events advance it — never main pushes.
|
||||
#
|
||||
# We still run an ancestor check against the existing :latest so that a
|
||||
# backport release on an older branch (e.g. patching v1.1.5 after v1.2.3
|
||||
# is out) doesn't drag :latest backwards. The check is the same shape as
|
||||
# move-main: read the OCI revision label off the current :latest, look up
|
||||
# that commit in git, and only advance if our release commit is a strict
|
||||
# descendant.
|
||||
# ---------------------------------------------------------------------------
|
||||
move-latest:
|
||||
if: |
|
||||
github.repository == 'NousResearch/hermes-agent'
|
||||
&& github.event_name == 'release'
|
||||
&& needs.merge.outputs.pushed_release_tag == 'true'
|
||||
needs: merge
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
concurrency:
|
||||
group: docker-move-latest
|
||||
cancel-in-progress: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 1000
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Decide whether to move :latest
|
||||
id: latest_check
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
|
||||
image_json=$(
|
||||
docker buildx imagetools inspect "${image}:latest" \
|
||||
--format '{{ json (index .Image "linux/amd64") }}' \
|
||||
2>/dev/null || true
|
||||
)
|
||||
|
||||
if [ -z "${image_json}" ]; then
|
||||
echo "No existing :latest (or inspect failed) — safe to publish."
|
||||
echo "push_latest=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
current_sha=$(
|
||||
printf '%s' "${image_json}" \
|
||||
| jq -r '.config.Labels."org.opencontainers.image.revision" // ""'
|
||||
)
|
||||
|
||||
if [ -z "${current_sha}" ]; then
|
||||
echo "Registry :latest has no revision label — safe to publish."
|
||||
echo "push_latest=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Registry :latest is at ${current_sha}"
|
||||
echo "This release is at ${GITHUB_SHA}"
|
||||
|
||||
if [ "${current_sha}" = "${GITHUB_SHA}" ]; then
|
||||
echo ":latest already points at our SHA — nothing to do."
|
||||
echo "push_latest=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Make sure we have the :latest commit locally for merge-base.
|
||||
# Releases can be cut from any branch, so fetch broadly.
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
git fetch --no-tags --prune origin \
|
||||
"+refs/heads/main:refs/remotes/origin/main" \
|
||||
|| true
|
||||
fi
|
||||
|
||||
if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
|
||||
echo "Registry :latest points at an unknown commit (${current_sha}); refusing to overwrite."
|
||||
echo "push_latest=false" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Our release SHA must be a descendant of the current :latest.
|
||||
# Backport releases on older branches won't satisfy this and will
|
||||
# be left alone — :latest stays on the newer release.
|
||||
if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then
|
||||
echo "Our release commit is a descendant of :latest — safe to advance."
|
||||
echo "push_latest=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "Existing :latest is newer than this release (likely a backport) — leaving it alone."
|
||||
echo "push_latest=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Retag the already-pushed release manifest as :latest.
|
||||
- name: Move :latest to this release tag
|
||||
if: steps.latest_check.outputs.push_latest == 'true'
|
||||
env:
|
||||
RELEASE_TAG: ${{ needs.merge.outputs.release_tag }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
image=nousresearch/hermes-agent
|
||||
docker buildx imagetools create \
|
||||
--tag "${image}:latest" \
|
||||
"${image}:${RELEASE_TAG}"
|
||||
context: .
|
||||
file: Dockerfile
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: nousresearch/hermes-agent:${{ github.event.release.tag_name }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
3
.github/workflows/docs-site-checks.yml
vendored
3
.github/workflows/docs-site-checks.yml
vendored
@@ -36,9 +36,6 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
||||
202
.github/workflows/lint.yml
vendored
202
.github/workflows/lint.yml
vendored
@@ -1,202 +0,0 @@
|
||||
name: Lint (ruff + ty)
|
||||
|
||||
# Two things here:
|
||||
# 1. Advisory diff — ruff + ty diagnostics as a diff vs the target branch.
|
||||
# Posts a Markdown summary and a PR comment. Exit zero always.
|
||||
# 2. Blocking ``ruff check .`` — enforces the explicit rules in
|
||||
# ``[tool.ruff.lint.select]`` (currently PLW1514). Failure blocks merge.
|
||||
# Separate job so the advisory diff still runs and posts even when
|
||||
# enforcement fails.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- "**/*.md"
|
||||
- "docs/**"
|
||||
- "website/**"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write # needed to post/update PR comments
|
||||
|
||||
concurrency:
|
||||
group: lint-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint-diff:
|
||||
name: ruff + ty diff
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 0 # need full history for merge-base + worktree
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff + ty
|
||||
run: |
|
||||
uv tool install ruff
|
||||
uv tool install ty
|
||||
|
||||
- name: Determine base ref
|
||||
id: base
|
||||
run: |
|
||||
# For PRs, diff against the merge base with the target branch.
|
||||
# For pushes to main, diff against the previous commit on main.
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
BASE_SHA=$(git merge-base "origin/${{ github.base_ref }}" HEAD)
|
||||
BASE_REF="origin/${{ github.base_ref }}"
|
||||
else
|
||||
BASE_SHA=$(git rev-parse HEAD~1 2>/dev/null || git rev-parse HEAD)
|
||||
BASE_REF="HEAD~1"
|
||||
fi
|
||||
echo "sha=${BASE_SHA}" >> "$GITHUB_OUTPUT"
|
||||
echo "ref=${BASE_REF}" >> "$GITHUB_OUTPUT"
|
||||
echo "Base SHA: ${BASE_SHA}"
|
||||
echo "Base ref: ${BASE_REF}"
|
||||
|
||||
- name: Run ruff + ty on HEAD
|
||||
run: |
|
||||
mkdir -p .lint-reports/head
|
||||
ruff check --output-format json --exit-zero \
|
||||
> .lint-reports/head/ruff.json || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> .lint-reports/head/ty.json || true
|
||||
echo "HEAD ruff: $(wc -c < .lint-reports/head/ruff.json) bytes"
|
||||
echo "HEAD ty: $(wc -c < .lint-reports/head/ty.json) bytes"
|
||||
|
||||
- name: Run ruff + ty on base (via git worktree)
|
||||
run: |
|
||||
mkdir -p .lint-reports/base
|
||||
# Use a worktree so we don't clobber the main checkout. If the basex
|
||||
# SHA is identical to HEAD (e.g. first commit), skip and leave the
|
||||
# base reports empty — the diff script handles missing files.
|
||||
HEAD_SHA=$(git rev-parse HEAD)
|
||||
BASE_SHA="${{ steps.base.outputs.sha }}"
|
||||
if [ "$BASE_SHA" = "$HEAD_SHA" ]; then
|
||||
echo "Base SHA == HEAD SHA, skipping base scan."
|
||||
echo '[]' > .lint-reports/base/ruff.json
|
||||
echo '[]' > .lint-reports/base/ty.json
|
||||
else
|
||||
git worktree add --detach /tmp/lint-base "$BASE_SHA"
|
||||
(
|
||||
cd /tmp/lint-base
|
||||
ruff check --output-format json --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ruff.json" || true
|
||||
ty check --output-format gitlab --exit-zero \
|
||||
> "$GITHUB_WORKSPACE/.lint-reports/base/ty.json" || true
|
||||
)
|
||||
git worktree remove --force /tmp/lint-base
|
||||
fi
|
||||
echo "base ruff: $(wc -c < .lint-reports/base/ruff.json) bytes"
|
||||
echo "base ty: $(wc -c < .lint-reports/base/ty.json) bytes"
|
||||
|
||||
- name: Generate diff summary
|
||||
run: |
|
||||
python scripts/lint_diff.py \
|
||||
--base-ruff .lint-reports/base/ruff.json \
|
||||
--head-ruff .lint-reports/head/ruff.json \
|
||||
--base-ty .lint-reports/base/ty.json \
|
||||
--head-ty .lint-reports/head/ty.json \
|
||||
--base-ref "${{ steps.base.outputs.ref }}" \
|
||||
--head-ref "${{ github.event_name == 'pull_request' && github.head_ref || github.ref_name }}" \
|
||||
--output .lint-reports/summary.md
|
||||
cat .lint-reports/summary.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
- name: Upload reports as artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
|
||||
with:
|
||||
name: lint-reports
|
||||
path: .lint-reports/
|
||||
retention-days: 14
|
||||
|
||||
- name: Post / update PR comment
|
||||
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
|
||||
continue-on-error: true
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const body = fs.readFileSync('.lint-reports/summary.md', 'utf8');
|
||||
const marker = '<!-- lint-diff-summary -->';
|
||||
const fullBody = marker + '\n' + body;
|
||||
|
||||
const { data: comments } = await github.rest.issues.listComments({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
});
|
||||
const existing = comments.find(c => c.body && c.body.includes(marker));
|
||||
if (existing) {
|
||||
await github.rest.issues.updateComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
comment_id: existing.id,
|
||||
body: fullBody,
|
||||
});
|
||||
} else {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: context.issue.number,
|
||||
body: fullBody,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
ruff-blocking:
|
||||
# Enforce the rules in pyproject.toml [tool.ruff.lint.select]. Currently
|
||||
# PLW1514 (unspecified-encoding) — catches bare ``open()`` /
|
||||
# ``read_text()`` / ``write_text()`` calls that default to locale
|
||||
# encoding on Windows. Failure here blocks merge; the advisory
|
||||
# ``lint-diff`` job above runs independently so reviewers still get
|
||||
# the diff comment even when enforcement fails.
|
||||
name: ruff enforcement (blocking)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
- name: Install ruff
|
||||
run: uv tool install ruff
|
||||
|
||||
- name: ruff check .
|
||||
# No --exit-zero, no || true. Exit code propagates to the job,
|
||||
# which propagates to the required-check gate.
|
||||
run: |
|
||||
ruff check .
|
||||
|
||||
windows-footguns:
|
||||
# Static guardrails on Windows-unsafe Python primitives — os.kill(pid, 0),
|
||||
# os.killpg, os.setsid, signal.SIGKILL without getattr fallback,
|
||||
# shebang scripts via subprocess, bare open() without encoding=, etc.
|
||||
# See scripts/check-windows-footguns.py for the full rule list.
|
||||
name: Windows footguns (blocking)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Run footgun checker
|
||||
run: python scripts/check-windows-footguns.py --all
|
||||
68
.github/workflows/nix-lockfile-check.yml
vendored
Normal file
68
.github/workflows/nix-lockfile-check.yml
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
name: Nix Lockfile Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-lockfile-check-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
|
||||
- name: Resolve head SHA
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check lockfile hashes
|
||||
id: check
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
- name: Post sticky PR comment (stale)
|
||||
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
|
||||
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Fail if stale
|
||||
if: steps.check.outputs.stale == 'true'
|
||||
run: exit 1
|
||||
111
.github/workflows/nix-lockfile-fix.yml
vendored
111
.github/workflows/nix-lockfile-fix.yml
vendored
@@ -1,13 +1,6 @@
|
||||
name: Nix Lockfile Fix
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'web/package-lock.json'
|
||||
- 'web/package.json'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr_number:
|
||||
@@ -26,105 +19,9 @@ concurrency:
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# ── Auto-fix on main ───────────────────────────────────────────────
|
||||
# Fires when a push to main touches package.json or package-lock.json
|
||||
# in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
|
||||
# update commit directly to main so Nix builds never stay broken.
|
||||
#
|
||||
# Safety invariants:
|
||||
# 1. The fix commit only touches nix/*.nix files, which are NOT in
|
||||
# the paths filter above, so this cannot re-trigger itself.
|
||||
# 2. An explicit file-whitelist check before commit aborts if
|
||||
# fix-lockfiles ever modifies unexpected files.
|
||||
# 3. Job-level concurrency with cancel-in-progress: true ensures
|
||||
# back-to-back pushes collapse to the newest; ref: main checkout
|
||||
# always operates on the latest branch state.
|
||||
# 4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
|
||||
# triggers downstream nix.yml verification.
|
||||
auto-fix-main:
|
||||
if: github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
concurrency:
|
||||
group: auto-fix-main
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00 # v1.9.3
|
||||
with:
|
||||
app-id: ${{ secrets.APP_ID }}
|
||||
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure only nix files were modified — prevents accidental
|
||||
# self-triggering if fix-lockfiles ever touches package files.
|
||||
unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
|
||||
if [ -n "$unexpected" ]; then
|
||||
echo "::error::Unexpected modified files: $unexpected"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Record the base SHA before committing — used to detect package
|
||||
# file changes if we need to rebase after a non-fast-forward push.
|
||||
BASE_SHA="$(git rev-parse HEAD)"
|
||||
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
|
||||
-m "Source: $GITHUB_SHA" \
|
||||
-m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
|
||||
|
||||
# Retry push with rebase in case main advanced with an unrelated
|
||||
# commit during the nix build. Without this, a non-fast-forward
|
||||
# rejection silently loses the fix. If package files changed during
|
||||
# the rebase, abort — a fresh auto-fix run will handle the new state.
|
||||
for attempt in 1 2 3; do
|
||||
if git push origin HEAD:main; then
|
||||
exit 0
|
||||
fi
|
||||
echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
|
||||
git fetch origin main
|
||||
|
||||
# If package files changed between our base and the new main,
|
||||
# our computed hashes are stale. Abort and let the next triggered
|
||||
# run recompute from the correct package-lock state.
|
||||
pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
|
||||
'ui-tui/package-lock.json' 'ui-tui/package.json' \
|
||||
'web/package-lock.json' 'web/package.json' || true)"
|
||||
if [ -n "$pkg_changed" ]; then
|
||||
echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git rebase origin/main
|
||||
done
|
||||
echo "::error::Failed to push after 3 rebase attempts"
|
||||
exit 1
|
||||
|
||||
# ── PR fix (manual / checkbox) ─────────────────────────────────────
|
||||
# Existing behavior: run on manual dispatch OR when a task-list
|
||||
# checkbox in the sticky lockfile-check comment flips from [ ] to [x].
|
||||
fix:
|
||||
# Run on manual dispatch OR when a task-list checkbox in the sticky
|
||||
# lockfile-check comment flips from `[ ]` to `[x]`.
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'issue_comment'
|
||||
@@ -202,12 +99,10 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
|
||||
84
.github/workflows/nix.yml
vendored
84
.github/workflows/nix.yml
vendored
@@ -7,7 +7,6 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-${{ github.ref }}
|
||||
@@ -23,95 +22,12 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Resolve head SHA
|
||||
if: github.event_name == 'pull_request'
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check flake
|
||||
id: flake
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
- name: Build package
|
||||
id: build
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix build --print-build-logs
|
||||
|
||||
# When the real Nix build fails, run a targeted diagnostic to see if
|
||||
# the failure is specifically a stale npm lockfile hash in one of the
|
||||
# known npm subpackages (tui / web). This avoids surfacing a generic
|
||||
# "build failed" message when the fix is a single known command.
|
||||
- name: Diagnose npm lockfile hashes
|
||||
id: hash_check
|
||||
if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
# If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
|
||||
# etc.) it won't set stale=true/false. Treat that as a distinct failure
|
||||
# mode rather than silently ignoring it.
|
||||
- name: Fail if hash check crashed without reporting
|
||||
if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
|
||||
run: |
|
||||
echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
|
||||
exit 1
|
||||
|
||||
- name: Post sticky PR comment (stale hashes)
|
||||
if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.hash_check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles` and commit the diff
|
||||
|
||||
# Clear the sticky comment when either the build passed outright (no
|
||||
# hash check needed) or the hash check explicitly returned stale=false
|
||||
# (build failed for a non-hash reason).
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
runner.os == 'Linux' &&
|
||||
(steps.hash_check.outputs.stale == 'false' ||
|
||||
(steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Final fail if build or flake failed
|
||||
if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
|
||||
run: |
|
||||
if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
|
||||
echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
|
||||
else
|
||||
echo "::error::Nix build/flake check failed. See logs above."
|
||||
fi
|
||||
exit 1
|
||||
|
||||
- name: Evaluate flake (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
run: nix flake show --json > /dev/null
|
||||
|
||||
67
.github/workflows/osv-scanner.yml
vendored
67
.github/workflows/osv-scanner.yml
vendored
@@ -1,67 +0,0 @@
|
||||
name: OSV-Scanner
|
||||
|
||||
# Scans lockfiles (uv.lock, package-lock.json) against the OSV vulnerability
|
||||
# database. Runs on every PR that touches a lockfile and on a weekly schedule
|
||||
# against main.
|
||||
#
|
||||
# This is detection-only — OSV-Scanner does NOT open PRs or modify pins.
|
||||
# It reports known CVEs in currently-pinned dependency versions so we can
|
||||
# decide when and how to patch on our own schedule. Our pinning strategy
|
||||
# (full SHA / exact version) is preserved; only the notification signal
|
||||
# is added.
|
||||
#
|
||||
# Complements the existing supply-chain-audit.yml workflow (which scans
|
||||
# for malicious code patterns in PR diffs) by covering the orthogonal
|
||||
# "currently-pinned dep became known-vulnerable" case.
|
||||
#
|
||||
# Uses Google's officially-recommended reusable workflow, pinned by SHA.
|
||||
# Findings land in the repo's Security tab (Code Scanning > OSV-Scanner).
|
||||
# fail-on-vuln is disabled so the job does not block merges on pre-existing
|
||||
# vulnerabilities in pinned deps that we may need to patch deliberately.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'website/package.json'
|
||||
- 'website/package-lock.json'
|
||||
- '.github/workflows/osv-scanner.yml'
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'package.json'
|
||||
- 'package-lock.json'
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'website/package-lock.json'
|
||||
schedule:
|
||||
# Weekly scan against main — catches CVEs published after merge for
|
||||
# deps that haven't changed since.
|
||||
- cron: '0 9 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
# Required by the reusable workflow to upload SARIF to the Security tab.
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan lockfiles
|
||||
uses: google/osv-scanner-action/.github/workflows/osv-scanner-reusable.yml@c51854704019a247608d928f370c98740469d4b5 # v2.3.5
|
||||
with:
|
||||
# Scan explicit lockfiles rather than recursing, so we only look at
|
||||
# the three sources of truth and skip vendored / test / worktree dirs.
|
||||
scan-args: |-
|
||||
--lockfile=uv.lock
|
||||
--lockfile=ui-tui/package-lock.json
|
||||
--lockfile=website/package-lock.json
|
||||
fail-on-vuln: false
|
||||
5
.github/workflows/tests.yml
vendored
5
.github/workflows/tests.yml
vendored
@@ -55,14 +55,11 @@ jobs:
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
|
||||
119
.github/workflows/uv-lockfile-check.yml
vendored
119
.github/workflows/uv-lockfile-check.yml
vendored
@@ -1,119 +0,0 @@
|
||||
name: uv.lock check
|
||||
|
||||
# Verify uv.lock is in sync with pyproject.toml. Blocking check — PRs
|
||||
# that modify pyproject.toml without regenerating uv.lock (or vice versa)
|
||||
# must not merge, because the Docker build's `uv sync --frozen` step will
|
||||
# fail on a stale lockfile and we'd rather catch it here than in the
|
||||
# docker-publish workflow on main.
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# IMPORTANT: this check runs against the MERGED state, not just your branch
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
#
|
||||
# For `pull_request` events, GitHub checks out `refs/pull/<N>/merge` by
|
||||
# default — a synthetic commit that merges your PR branch into the CURRENT
|
||||
# state of `main`. That means the pyproject.toml evaluated here is
|
||||
# `main's pyproject.toml + your PR's changes to pyproject.toml`, not just
|
||||
# what's on your branch.
|
||||
#
|
||||
# Failure mode this creates: if `main` has advanced since you branched
|
||||
# (e.g. someone merged a PR that added a dep to pyproject.toml + its
|
||||
# corresponding uv.lock entries), your branch's uv.lock is missing those
|
||||
# new entries. `uv lock --check` resolves against the merged pyproject
|
||||
# and sees a lockfile that doesn't cover all the current deps → fails
|
||||
# with "The lockfile at uv.lock needs to be updated."
|
||||
#
|
||||
# This can be confusing: `uv lock --check` passes locally (your branch
|
||||
# is internally consistent) but fails in CI (merged state isn't).
|
||||
#
|
||||
# Fix is to sync your branch with main and regenerate the lockfile:
|
||||
#
|
||||
# git fetch origin main
|
||||
# git rebase origin/main # or merge, whatever the repo prefers
|
||||
# uv lock # regenerates uv.lock against new pyproject.toml
|
||||
# git add uv.lock
|
||||
# git commit -m "chore: refresh uv.lock after rebase onto main"
|
||||
# git push --force-with-lease # if you rebased
|
||||
#
|
||||
# If you also changed pyproject.toml in your PR, `uv lock` handles that
|
||||
# at the same time — one regeneration covers both your changes and the
|
||||
# drift from main.
|
||||
#
|
||||
# This is the correct behavior! The check is protecting main's Docker
|
||||
# build: a post-merge build would see the same merged state and fail
|
||||
# the same way. Better to catch it here than after merge.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/uv-lockfile-check.yml'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- '.github/workflows/uv-lockfile-check.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: uv-lockfile-check-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: uv lock --check
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
# `uv lock --check` re-resolves the project from pyproject.toml and
|
||||
# compares the result to uv.lock, exiting non-zero if they disagree.
|
||||
# No network writes, no file modifications.
|
||||
#
|
||||
# On PRs this runs against the merge commit (see comment at the top
|
||||
# of this file) — failures often mean "your branch is behind main,
|
||||
# rebase and regenerate uv.lock."
|
||||
- name: Verify uv.lock is up-to-date
|
||||
run: |
|
||||
if ! uv lock --check; then
|
||||
cat <<'EOF' >> "$GITHUB_STEP_SUMMARY"
|
||||
## ❌ uv.lock is out of sync with pyproject.toml
|
||||
|
||||
**If this is a PR:** this check runs against the merged state
|
||||
(your branch + current `main`), not just your branch. If
|
||||
`uv lock --check` passes locally, your branch is likely behind
|
||||
`main` — recent changes to `pyproject.toml` on `main` aren't
|
||||
reflected in your branch's `uv.lock` yet.
|
||||
|
||||
To fix, sync with main and regenerate the lockfile:
|
||||
|
||||
```bash
|
||||
git fetch origin main
|
||||
git rebase origin/main # or `git merge origin/main`
|
||||
uv lock # regenerate against new pyproject.toml
|
||||
git add uv.lock
|
||||
git commit -m "chore: refresh uv.lock after syncing with main"
|
||||
git push --force-with-lease # drop --force-with-lease if you merged
|
||||
```
|
||||
|
||||
**If you only changed pyproject.toml:** run `uv lock` locally
|
||||
and commit the result.
|
||||
|
||||
This check is blocking because the Docker image build uses
|
||||
`uv sync --frozen --extra all`, which rejects stale lockfiles
|
||||
— catching it here avoids a ~15 min failed docker-publish run
|
||||
on `main` post-merge.
|
||||
EOF
|
||||
echo "::error title=uv.lock out of sync::Run \`uv lock\` locally and commit the result. If on a PR, sync with main first."
|
||||
exit 1
|
||||
fi
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,4 +1,3 @@
|
||||
.DS_Store
|
||||
/venv/
|
||||
/_pycache/
|
||||
*.pyc*
|
||||
@@ -69,4 +68,3 @@ mini-swe-agent/
|
||||
.nix-stamps/
|
||||
result
|
||||
website/static/api/skills-index.json
|
||||
models-dev-upstream/
|
||||
|
||||
650
AGENTS.md
650
AGENTS.md
@@ -5,108 +5,100 @@ Instructions for AI coding assistants and developers working on the hermes-agent
|
||||
## Development Environment
|
||||
|
||||
```bash
|
||||
# Prefer .venv; fall back to venv if that's what your checkout has.
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
source venv/bin/activate # ALWAYS activate before running Python
|
||||
```
|
||||
|
||||
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
|
||||
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
|
||||
main checkout).
|
||||
|
||||
## Project Structure
|
||||
|
||||
File counts shift constantly — don't treat the tree below as exhaustive.
|
||||
The canonical source is the filesystem. The notes call out the load-bearing
|
||||
entry points you'll actually edit.
|
||||
|
||||
```
|
||||
hermes-agent/
|
||||
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
|
||||
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
|
||||
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
|
||||
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
|
||||
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
|
||||
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
|
||||
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
|
||||
├── batch_runner.py # Parallel batch processing
|
||||
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
|
||||
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
|
||||
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
|
||||
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
|
||||
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # yuanbao, webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Extension point for always-registered gateway hooks (none shipped)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
│ ├── model-providers/ # Inference backend plugins (openrouter, anthropic, gmi, ...)
|
||||
│ ├── kanban/ # Multi-agent board dispatcher + worker plugin
|
||||
│ ├── hermes-achievements/ # Gamified achievement tracking
|
||||
│ ├── observability/ # Metrics / traces / logs plugin
|
||||
│ ├── image_gen/ # Image-generation providers
|
||||
│ └── <others>/ # disk-cleanup, example-dashboard, google_meet, platforms,
|
||||
│ # spotify, strike-freedom-cockpit, ...
|
||||
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
|
||||
├── skills/ # Built-in skills bundled with the repo
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
|
||||
├── hermes_agent/ # Single installable package
|
||||
│ ├── agent/ # Core conversation loop and agent internals
|
||||
│ │ ├── loop.py # AIAgent class — core conversation loop
|
||||
│ │ ├── prompt_builder.py # System prompt assembly
|
||||
│ │ ├── context/ # Context management (engine, compressor, references)
|
||||
│ │ ├── memory/ # Memory management (manager, provider)
|
||||
│ │ ├── image_gen/ # Image generation (provider, registry)
|
||||
│ │ ├── display.py # KawaiiSpinner, tool preview formatting
|
||||
│ │ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
|
||||
│ │ └── trajectory.py # Trajectory saving helpers
|
||||
│ ├── providers/ # LLM provider adapters and transports
|
||||
│ │ ├── anthropic_adapter.py # Anthropic adapter
|
||||
│ │ ├── anthropic_transport.py # Anthropic transport
|
||||
│ │ ├── metadata.py # Model context lengths, token estimation
|
||||
│ │ ├── auxiliary.py # Auxiliary LLM client (vision, summarization)
|
||||
│ │ ├── caching.py # Anthropic prompt caching
|
||||
│ │ └── credential_pool.py # Credential management
|
||||
│ ├── tools/ # Tool implementations
|
||||
│ │ ├── dispatch.py # Tool orchestration, discover_builtin_tools()
|
||||
│ │ ├── toolsets.py # Toolset definitions
|
||||
│ │ ├── registry.py # Central tool registry
|
||||
│ │ ├── terminal.py # Terminal orchestration
|
||||
│ │ ├── browser/ # Browser tools (tool, cdp, camofox, providers/)
|
||||
│ │ ├── mcp/ # MCP client and server
|
||||
│ │ ├── skills/ # Skill management (manager, tool, hub, guard, sync)
|
||||
│ │ ├── media/ # Voice, TTS, transcription, image gen
|
||||
│ │ ├── files/ # File operations (tools, operations, state)
|
||||
│ │ └── security/ # Path security, URL safety, approval
|
||||
│ ├── backends/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
|
||||
│ ├── cli/ # CLI subcommands and setup
|
||||
│ │ ├── main.py # Entry point — all `hermes` subcommands
|
||||
│ │ ├── repl.py # HermesCLI class — interactive CLI orchestrator
|
||||
│ │ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
|
||||
│ │ ├── commands.py # Slash command definitions
|
||||
│ │ ├── auth/ # Provider credential resolution
|
||||
│ │ ├── models/ # Model catalog, provider lists, switching
|
||||
│ │ └── ui/ # Banner, colors, skin engine, callbacks, tips
|
||||
│ ├── gateway/ # Messaging platform gateway
|
||||
│ │ ├── run.py # Main loop, slash commands, message dispatch
|
||||
│ │ ├── session.py # SessionStore — conversation persistence
|
||||
│ │ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, etc.
|
||||
│ ├── acp/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
│ ├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
│ ├── plugins/ # Plugin system (memory providers, context engines)
|
||||
│ ├── constants.py # Shared constants
|
||||
│ ├── state.py # SessionDB — SQLite session store
|
||||
│ ├── logging.py # Logging configuration
|
||||
│ └── utils.py # Shared utilities
|
||||
├── tui_gateway/ # Python JSON-RPC backend for the TUI
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler — jobs.py, scheduler.py
|
||||
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
|
||||
├── website/ # Docusaurus docs site
|
||||
└── tests/ # Pytest suite (~17k tests across ~900 files as of May 2026)
|
||||
├── tests/ # Pytest suite
|
||||
└── web/ # Vite + React web dashboard
|
||||
```
|
||||
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
|
||||
**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
|
||||
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
|
||||
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
|
||||
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
|
||||
|
||||
## File Dependency Chain
|
||||
|
||||
```
|
||||
tools/registry.py (no deps — imported by all tool files)
|
||||
hermes_agent/tools/registry.py (no deps — imported by all tool files)
|
||||
↑
|
||||
tools/*.py (each calls registry.register() at import time)
|
||||
hermes_agent/tools/*.py (each calls registry.register() at import time)
|
||||
↑
|
||||
model_tools.py (imports tools/registry + triggers tool discovery)
|
||||
hermes_agent/tools/dispatch.py (imports registry + triggers tool discovery)
|
||||
↑
|
||||
run_agent.py, cli.py, batch_runner.py, environments/
|
||||
hermes_agent/agent/loop.py, hermes_agent/cli/repl.py, environments/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## AIAgent Class (run_agent.py)
|
||||
|
||||
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
|
||||
session context, budget, credential pool, etc.). The signature below is the
|
||||
minimum subset you'll usually touch — read `run_agent.py` for the full list.
|
||||
## AIAgent Class (hermes_agent/agent/loop.py)
|
||||
|
||||
```python
|
||||
class AIAgent:
|
||||
def __init__(self,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
provider: str = None,
|
||||
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
|
||||
model: str = "", # empty → resolved from config/provider later
|
||||
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
|
||||
model: str = "anthropic/claude-opus-4.6",
|
||||
max_iterations: int = 90,
|
||||
enabled_toolsets: list = None,
|
||||
disabled_toolsets: list = None,
|
||||
quiet_mode: bool = False,
|
||||
save_trajectories: bool = False,
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
platform: str = None, # "cli", "telegram", etc.
|
||||
session_id: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
credential_pool=None,
|
||||
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
|
||||
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
|
||||
# ... plus provider, api_mode, callbacks, routing params
|
||||
): ...
|
||||
|
||||
def chat(self, message: str) -> str:
|
||||
@@ -119,13 +111,10 @@ class AIAgent:
|
||||
|
||||
### Agent Loop
|
||||
|
||||
The core loop is inside `run_conversation()` — entirely synchronous, with
|
||||
interrupt checks, budget tracking, and a one-turn grace call:
|
||||
The core loop is inside `run_conversation()` — entirely synchronous:
|
||||
|
||||
```python
|
||||
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
|
||||
or self._budget_grace_call:
|
||||
if self._interrupt_requested: break
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
|
||||
if response.tool_calls:
|
||||
for tool_call in response.tool_calls:
|
||||
@@ -136,19 +125,18 @@ while (api_call_count < self.max_iterations and self.iteration_budget.remaining
|
||||
return response.content
|
||||
```
|
||||
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
|
||||
Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
|
||||
|
||||
---
|
||||
|
||||
## CLI Architecture (cli.py)
|
||||
## CLI Architecture (hermes_agent/cli/repl.py)
|
||||
|
||||
- **Rich** for banner/panels, **prompt_toolkit** for input with autocomplete
|
||||
- **KawaiiSpinner** (`agent/display.py`) — animated faces during API calls, `┊` activity feed for tool results
|
||||
- `load_cli_config()` in cli.py merges hardcoded defaults + user config YAML
|
||||
- **Skin engine** (`hermes_cli/skin_engine.py`) — data-driven CLI theming; initialized from `display.skin` config key at startup; skins customize banner colors, spinner faces/verbs/wings, tool prefix, response box, branding text
|
||||
- **KawaiiSpinner** (`hermes_agent/agent/display.py`) — animated faces during API calls, `┊` activity feed for tool results
|
||||
- `load_cli_config()` in repl.py merges hardcoded defaults + user config YAML
|
||||
- **Skin engine** (`hermes_agent/cli/ui/skin_engine.py`) — data-driven CLI theming; initialized from `display.skin` config key at startup; skins customize banner colors, spinner faces/verbs/wings, tool prefix, response box, branding text
|
||||
- `process_command()` is a method on `HermesCLI` — dispatches on canonical command name resolved via `resolve_command()` from the central registry
|
||||
- Skill slash commands: `agent/skill_commands.py` scans `~/.hermes/skills/`, injects as **user message** (not system prompt) to preserve prompt caching
|
||||
- Skill slash commands: `hermes_agent/agent/skill_commands.py` scans `~/.hermes/skills/`, injects as **user message** (not system prompt) to preserve prompt caching
|
||||
|
||||
### Slash Command Registry (`hermes_cli/commands.py`)
|
||||
|
||||
@@ -246,33 +234,11 @@ npm run fmt # prettier
|
||||
npm test # vitest
|
||||
```
|
||||
|
||||
### TUI in the Dashboard (`hermes dashboard` → `/chat`)
|
||||
|
||||
The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
|
||||
|
||||
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
|
||||
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
|
||||
|
||||
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
|
||||
|
||||
---
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
For most custom or local-only tools, do **not** edit Hermes core. Use the plugin
|
||||
route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and
|
||||
`~/.hermes/plugins/<name>/__init__.py`, then register tools with
|
||||
`ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be
|
||||
enabled or disabled without touching `tools/` or `toolsets.py`.
|
||||
|
||||
Use the built-in route below only when the user is explicitly contributing a new
|
||||
core Hermes tool that should ship in the base system.
|
||||
|
||||
Built-in/core tools require changes in **2 files**:
|
||||
Requires changes in **2 files**:
|
||||
|
||||
**1. Create `tools/your_tool.py`:**
|
||||
```python
|
||||
@@ -295,9 +261,9 @@ registry.register(
|
||||
)
|
||||
```
|
||||
|
||||
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. **This step is required:** auto-discovery imports the tool and registers its schema, but the tool is only *exposed to an agent* if its name appears in a toolset. `_HERMES_CORE_TOOLS` is not dead code — it's the default bundle every platform's base toolset inherits from.
|
||||
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
|
||||
|
||||
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain. Wiring into a toolset is still a deliberate, manual step.
|
||||
Auto-discovery: any `hermes_agent/tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
|
||||
|
||||
The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
|
||||
|
||||
@@ -305,7 +271,7 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
|
||||
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
|
||||
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
|
||||
|
||||
---
|
||||
|
||||
@@ -313,29 +279,9 @@ The registry handles schema collection, dispatch, availability checking, and err
|
||||
|
||||
### config.yaml options:
|
||||
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
|
||||
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
|
||||
ONLY if you need to actively migrate/transform existing user config
|
||||
(renaming keys, changing structure). Adding a new key to an existing
|
||||
section is handled automatically by the deep-merge and does NOT require
|
||||
a version bump.
|
||||
2. Bump `_config_version` (currently 5) to trigger migration for existing users
|
||||
|
||||
### Top-level `config.yaml` sections (non-exhaustive):
|
||||
|
||||
`model`, `agent`, `terminal`, `compression`, `display`, `stt`, `tts`,
|
||||
`memory`, `security`, `delegation`, `smart_model_routing`, `checkpoints`,
|
||||
`auxiliary`, `curator`, `skills`, `gateway`, `logging`, `cron`, `profiles`,
|
||||
`plugins`, `honcho`.
|
||||
|
||||
`auxiliary` holds per-task overrides for side-LLM work (curator, vision,
|
||||
embedding, title generation, session_search, etc.) — each task can pin
|
||||
its own provider/model/base_url/max_tokens/reasoning_effort. See
|
||||
`agent/auxiliary_client.py::_resolve_auto` for resolution order.
|
||||
|
||||
`curator` holds the background skill-maintenance config —
|
||||
`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
|
||||
`archive_after_days`, `backup` (nested).
|
||||
|
||||
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
|
||||
### .env variables:
|
||||
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
|
||||
```python
|
||||
"NEW_API_KEY": {
|
||||
@@ -347,29 +293,13 @@ its own provider/model/base_url/max_tokens/reasoning_effort. See
|
||||
},
|
||||
```
|
||||
|
||||
Non-secret settings (timeouts, thresholds, feature flags, paths, display
|
||||
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
|
||||
env var mirror for backward compatibility, bridge it from `config.yaml` to
|
||||
the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
|
||||
|
||||
### Config loaders (three paths — know which one you're in):
|
||||
### Config loaders (two separate systems):
|
||||
|
||||
| Loader | Used by | Location |
|
||||
|--------|---------|----------|
|
||||
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
|
||||
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
|
||||
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
|
||||
|
||||
If you add a new key and the CLI sees it but the gateway doesn't (or vice
|
||||
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
|
||||
|
||||
### Working directory:
|
||||
- **CLI** — uses the process's current directory (`os.getcwd()`).
|
||||
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
|
||||
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
|
||||
removed** — the config loader prints a deprecation warning if it's set in
|
||||
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
|
||||
`terminal.cwd` in `config.yaml`.
|
||||
| `load_cli_config()` | CLI mode | `cli.py` |
|
||||
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
|
||||
| Direct YAML load | Gateway | `gateway/run.py` |
|
||||
|
||||
---
|
||||
|
||||
@@ -462,380 +392,7 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
|
||||
|
||||
---
|
||||
|
||||
## Plugins
|
||||
|
||||
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
|
||||
repo-shipped plugins can be discovered alongside user-installed ones in
|
||||
`~/.hermes/plugins/` and pip-installed entry points.
|
||||
|
||||
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
|
||||
|
||||
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
|
||||
and pip entry points. Each plugin exposes a `register(ctx)` function that
|
||||
can:
|
||||
|
||||
- Register Python-callback lifecycle hooks:
|
||||
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
|
||||
`on_session_start`, `on_session_end`
|
||||
- Register new tools via `ctx.register_tool(...)`
|
||||
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
|
||||
plugin's argparse tree is wired into `hermes` at startup so
|
||||
`hermes <pluginname> <subcmd>` works with no change to `main.py`
|
||||
|
||||
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
|
||||
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
|
||||
as a side effect of importing `model_tools.py`. Code paths that read plugin
|
||||
state without importing `model_tools.py` first must call `discover_plugins()`
|
||||
explicitly (it's idempotent).
|
||||
|
||||
### Memory-provider plugins (`plugins/memory/<name>/`)
|
||||
|
||||
Separate discovery system for pluggable memory backends. Current built-in
|
||||
providers include **honcho, mem0, supermemory, byterover, hindsight,
|
||||
holographic, openviking, retaindb**.
|
||||
|
||||
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
|
||||
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
|
||||
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
|
||||
`post_setup(hermes_home, config)` for setup-wizard integration.
|
||||
|
||||
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
|
||||
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
|
||||
it at argparse setup time and wires it into `hermes <plugin>`. The
|
||||
framework only exposes CLI commands for the **currently active** memory
|
||||
provider (read from `memory.provider` in config.yaml), so disabled
|
||||
providers don't clutter `hermes --help`.
|
||||
|
||||
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
|
||||
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
|
||||
If a plugin needs a capability the framework doesn't expose, expand the
|
||||
generic plugin surface (new hook, new ctx method) — never hardcode
|
||||
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
|
||||
honcho argparse from `main.py` for exactly this reason.
|
||||
|
||||
**No new in-tree memory providers (policy, May 2026):** the set of
|
||||
built-in memory providers under `plugins/memory/` is closed. New memory
|
||||
backends must ship as **standalone plugin repos** that users install
|
||||
into `~/.hermes/plugins/` (or via pip entry points) — they implement
|
||||
the same `MemoryProvider` ABC, register through the same discovery
|
||||
path, and integrate via `hermes memory setup` / `post_setup()` without
|
||||
landing in this tree. PRs that add a new directory under
|
||||
`plugins/memory/` will be closed with a pointer to publish the
|
||||
provider as its own repo. Existing in-tree providers stay; bug fixes
|
||||
to them are welcome.
|
||||
|
||||
### Model-provider plugins (`plugins/model-providers/<name>/`)
|
||||
|
||||
Every inference backend (openrouter, anthropic, gmi, deepseek, nvidia, …)
|
||||
ships as a plugin here. Each plugin's `__init__.py` calls
|
||||
`providers.register_provider(ProviderProfile(...))` at module load.
|
||||
`providers/__init__.py._discover_providers()` is a **lazy, separate
|
||||
discovery system** — scanned on first `get_provider_profile()` or
|
||||
`list_providers()` call, NOT by the general PluginManager.
|
||||
|
||||
Scan order:
|
||||
1. Bundled: `<repo>/plugins/model-providers/<name>/`
|
||||
2. User: `$HERMES_HOME/plugins/model-providers/<name>/`
|
||||
3. Legacy: `<repo>/providers/<name>.py` (back-compat)
|
||||
|
||||
User plugins of the same name override bundled ones — `register_provider()`
|
||||
is last-writer-wins. This lets third parties swap out any built-in
|
||||
profile without a repo patch.
|
||||
|
||||
The general PluginManager records `kind: model-provider` manifests but does
|
||||
NOT import them (would double-instantiate `ProviderProfile`). Plugins
|
||||
without an explicit `kind:` get auto-coerced via a source-text heuristic
|
||||
(`register_provider` + `ProviderProfile` in `__init__.py`).
|
||||
|
||||
Full authoring guide: `website/docs/developer-guide/model-provider-plugin.md`.
|
||||
|
||||
### Dashboard / context-engine / image-gen plugin directories
|
||||
|
||||
`plugins/context_engine/`, `plugins/image_gen/`, etc. follow the same
|
||||
pattern (ABC + orchestrator + per-plugin directory). Context engines
|
||||
plug into `agent/context_engine.py`; image-gen providers into
|
||||
`agent/image_gen_provider.py`. Reference / docs-companion plugins
|
||||
(`example-dashboard`, `strike-freedom-cockpit`, `plugin-llm-example`,
|
||||
`plugin-llm-async-example`) live in the
|
||||
[`hermes-example-plugins`](https://github.com/NousResearch/hermes-example-plugins)
|
||||
companion repo, not in this tree.
|
||||
|
||||
---
|
||||
|
||||
## Skills
|
||||
|
||||
Two parallel surfaces:
|
||||
|
||||
- **`skills/`** — built-in skills shipped and loadable by default.
|
||||
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
|
||||
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
|
||||
NOT active by default. Installed explicitly via
|
||||
`hermes skills install official/<category>/<skill>`. Adapter lives in
|
||||
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
|
||||
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
|
||||
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
|
||||
`research`, `security`, `web-development`.
|
||||
|
||||
When reviewing skill PRs, check which directory they target — heavy-dep or
|
||||
niche skills belong in `optional-skills/`.
|
||||
|
||||
### SKILL.md frontmatter
|
||||
|
||||
Standard fields: `name`, `description`, `version`, `author`, `license`,
|
||||
`platforms` (OS-gating list: `[macos]`, `[linux, macos]`, ...),
|
||||
`metadata.hermes.tags`, `metadata.hermes.category`,
|
||||
`metadata.hermes.related_skills`, `metadata.hermes.config` (config.yaml
|
||||
settings the skill needs — stored under `skills.config.<key>`, prompted
|
||||
during setup, injected at load time).
|
||||
|
||||
Top-level `tags:` and `category:` are also accepted and mirrored from
|
||||
`metadata.hermes.*` by the loader.
|
||||
|
||||
### Skill authoring standards (HARDLINE)
|
||||
|
||||
Every new or modernized skill — bundled, optional, or contributed —
|
||||
must meet these standards before merge. Reviewers reject PRs that
|
||||
violate them.
|
||||
|
||||
1. **`description` ≤ 60 characters, one sentence, ends with a period.**
|
||||
Long descriptions bloat skill listings and dilute the model's
|
||||
attention when many skills are loaded. State the capability, not
|
||||
the implementation. No marketing words ("powerful",
|
||||
"comprehensive", "seamless", "advanced"). Don't repeat the skill
|
||||
name. Verify with:
|
||||
```python
|
||||
import re, pathlib
|
||||
m = re.search(r'^description: (.*)$',
|
||||
pathlib.Path('skills/<cat>/<name>/SKILL.md').read_text(),
|
||||
re.MULTILINE)
|
||||
assert len(m.group(1)) <= 60, len(m.group(1))
|
||||
```
|
||||
|
||||
2. **Tools referenced in SKILL.md prose must be native Hermes tools or
|
||||
MCP servers the skill explicitly expects.** When the skill needs a
|
||||
capability, point at the proper tool by name in backticks
|
||||
(`` `terminal` ``, `` `web_extract` ``, `` `read_file` ``,
|
||||
`` `patch` ``, `` `search_files` ``, `` `vision_analyze` ``,
|
||||
`` `browser_navigate` ``, `` `delegate_task` ``, etc.). Do NOT
|
||||
name shell utilities the agent already has wrapped — `grep` →
|
||||
`search_files`, `cat`/`head`/`tail` → `read_file`, `sed`/`awk` →
|
||||
`patch`, `find`/`ls` → `search_files target='files'`. If the skill
|
||||
depends on an MCP server, name the MCP server and document the
|
||||
expected setup in `## Prerequisites`. Anything else (third-party
|
||||
CLIs, shell pipelines, etc.) is fair game inside script files but
|
||||
should not be the headline interaction surface in the prose.
|
||||
|
||||
3. **`platforms:` gating audited against actual script imports.**
|
||||
Skills that use POSIX-only primitives (`fcntl`, `termios`,
|
||||
`os.setsid`, `os.kill(pid, 0)` for liveness, `/proc`, `/tmp`
|
||||
hardcoded, `signal.SIGKILL`, bash heredocs, `osascript`, `apt`,
|
||||
`systemctl`) must declare their supported platforms. Default
|
||||
posture: try to fix it cross-platform first — `tempfile.gettempdir`,
|
||||
`pathlib.Path`, `psutil.pid_exists`, Python-level filtering instead
|
||||
of `grep`. Gate to a narrower set only when the dependency is
|
||||
genuinely platform-bound.
|
||||
|
||||
4. **`author` credits the human contributor first.** For external
|
||||
contributions, the contributor's real name + GitHub handle goes
|
||||
first; "Hermes Agent" is the secondary collaborator. If the
|
||||
contributor's commit shows "Hermes Agent" as author (because they
|
||||
used Hermes to draft the skill), replace it with their actual name
|
||||
— credit the human, not the tool.
|
||||
|
||||
5. **SKILL.md body uses the modern section order.** `# <Skill> Skill`
|
||||
title, 2-3 sentence intro stating what it does and doesn't do,
|
||||
`## When to Use`, `## Prerequisites`, `## How to Run`,
|
||||
`## Quick Reference`, `## Procedure`, `## Pitfalls`,
|
||||
`## Verification`. Target ~200 lines for a complex skill,
|
||||
~100 lines for a simple one. Cut redundant intro fluff, marketing
|
||||
prose, and re-explanations of env vars already in
|
||||
`## Prerequisites`.
|
||||
|
||||
6. **Scripts go in `scripts/`, references in `references/`,
|
||||
templates in `templates/`.** Don't expect the model to inline-write
|
||||
parsers, XML walkers, or non-trivial logic every call — ship a
|
||||
helper script. Reference it from SKILL.md by path relative to the
|
||||
skill directory.
|
||||
|
||||
7. **Tests live at `tests/skills/test_<skill>_skill.py`** and use only
|
||||
stdlib + pytest + `unittest.mock`. No live network calls. Run via
|
||||
`scripts/run_tests.sh tests/skills/test_<skill>_skill.py -q`.
|
||||
|
||||
8. **`.env.example` additions are isolated to a clearly delimited
|
||||
block.** Don't touch the surrounding file — contributor-supplied
|
||||
`.env.example` versions are usually stale and edits outside the
|
||||
skill's own block must be dropped during salvage.
|
||||
|
||||
The full salvage / modernization checklist for external skill PRs
|
||||
lives in the `hermes-agent-dev` skill at
|
||||
`references/new-skill-pr-salvage.md` — load it before polishing
|
||||
contributor skill PRs.
|
||||
|
||||
---
|
||||
|
||||
## Toolsets
|
||||
|
||||
All toolsets are defined in `toolsets.py` as a single `TOOLSETS` dict.
|
||||
Each platform's adapter picks a base toolset (e.g. Telegram uses
|
||||
`"messaging"`); `_HERMES_CORE_TOOLS` is the default bundle most
|
||||
platforms inherit from.
|
||||
|
||||
Current toolset keys: `browser`, `clarify`, `code_execution`, `cronjob`,
|
||||
`debugging`, `delegation`, `discord`, `discord_admin`, `feishu_doc`,
|
||||
`feishu_drive`, `file`, `homeassistant`, `image_gen`, `kanban`, `memory`,
|
||||
`messaging`, `moa`, `rl`, `safe`, `search`, `session_search`, `skills`,
|
||||
`spotify`, `terminal`, `todo`, `tts`, `video`, `vision`, `web`, `yuanbao`.
|
||||
|
||||
Enable/disable per platform via `hermes tools` (the curses UI) or the
|
||||
`tools.<platform>.enabled` / `tools.<platform>.disabled` lists in
|
||||
`config.yaml`.
|
||||
|
||||
---
|
||||
|
||||
## Delegation (`delegate_task`)
|
||||
|
||||
`tools/delegate_tool.py` spawns a subagent with an isolated
|
||||
context + terminal session. Synchronous: the parent waits for the
|
||||
child's summary before continuing its own loop — if the parent is
|
||||
interrupted, the child is cancelled.
|
||||
|
||||
Two shapes:
|
||||
|
||||
- **Single:** pass `goal` (+ optional `context`, `toolsets`).
|
||||
- **Batch (parallel):** pass `tasks: [...]` — each gets its own subagent
|
||||
running concurrently. Concurrency is capped by
|
||||
`delegation.max_concurrent_children` (default 3).
|
||||
|
||||
Roles:
|
||||
|
||||
- `role="leaf"` (default) — focused worker. Cannot call `delegate_task`,
|
||||
`clarify`, `memory`, `send_message`, `execute_code`.
|
||||
- `role="orchestrator"` — retains `delegate_task` so it can spawn its
|
||||
own workers. Gated by `delegation.orchestrator_enabled` (default true)
|
||||
and bounded by `delegation.max_spawn_depth` (default 2).
|
||||
|
||||
Key config knobs (under `delegation:` in `config.yaml`):
|
||||
`max_concurrent_children`, `max_spawn_depth`, `child_timeout_seconds`,
|
||||
`orchestrator_enabled`, `subagent_auto_approve`, `inherit_mcp_toolsets`,
|
||||
`max_iterations`.
|
||||
|
||||
Synchronicity rule: delegate_task is **not** durable. For long-running
|
||||
work that must outlive the current turn, use `cronjob` or
|
||||
`terminal(background=True, notify_on_complete=True)` instead.
|
||||
|
||||
---
|
||||
|
||||
## Curator (skill lifecycle)
|
||||
|
||||
Background skill-maintenance system that tracks usage on agent-created
|
||||
skills and auto-archives stale ones. Users never lose skills; archives
|
||||
go to `~/.hermes/skills/.archive/` and are restorable.
|
||||
|
||||
- **Core:** `agent/curator.py` (review loop, auto-transitions, LLM review
|
||||
prompt) + `agent/curator_backup.py` (pre-run tar.gz snapshots).
|
||||
- **CLI:** `hermes_cli/curator.py` wires `hermes curator <verb>` where
|
||||
verbs are: `status`, `run`, `pause`, `resume`, `pin`, `unpin`,
|
||||
`archive`, `restore`, `prune`, `backup`, `rollback`.
|
||||
- **Telemetry:** `tools/skill_usage.py` owns the sidecar
|
||||
`~/.hermes/skills/.usage.json` — per-skill `use_count`, `view_count`,
|
||||
`patch_count`, `last_activity_at`, `state` (active / stale /
|
||||
archived), `pinned`.
|
||||
|
||||
Invariants:
|
||||
- Curator only touches skills with `created_by: "agent"` provenance —
|
||||
bundled + hub-installed skills are off-limits.
|
||||
- Never deletes; max destructive action is archive.
|
||||
- Pinned skills are exempt from every auto-transition and from the
|
||||
LLM review pass.
|
||||
- `skill_manage(action="delete")` refuses pinned skills; patch/edit/
|
||||
write_file/remove_file go through so the agent can keep improving
|
||||
pinned skills.
|
||||
|
||||
Config section (`curator:` in `config.yaml`):
|
||||
`enabled`, `interval_hours`, `min_idle_hours`, `stale_after_days`,
|
||||
`archive_after_days`, `backup.*`.
|
||||
|
||||
Full user-facing docs: `website/docs/user-guide/features/curator.md`.
|
||||
|
||||
---
|
||||
|
||||
## Cron (scheduled jobs)
|
||||
|
||||
`cron/jobs.py` (job store) + `cron/scheduler.py` (tick loop). Agents
|
||||
schedule jobs via the `cronjob` tool; users via `hermes cron <verb>`
|
||||
(`list`, `add`, `edit`, `pause`, `resume`, `run`, `remove`) or the
|
||||
`/cron` slash command.
|
||||
|
||||
Supported schedule formats:
|
||||
- Duration: `"30m"`, `"2h"`, `"1d"`
|
||||
- "every" phrase: `"every 2h"`, `"every monday 9am"`
|
||||
- 5-field cron expression: `"0 9 * * *"`
|
||||
- ISO timestamp (one-shot): `"2026-06-01T09:00:00Z"`
|
||||
|
||||
Per-job fields include `skills` (load specific skills), `model` /
|
||||
`provider` overrides, `script` (pre-run data-collection script whose
|
||||
stdout is injected into the prompt; `no_agent=True` turns the script
|
||||
into the entire job), `context_from` (chain job A's last output into
|
||||
job B's prompt), `workdir` (run in a specific directory with its
|
||||
`AGENTS.md`/`CLAUDE.md` loaded), and multi-platform delivery.
|
||||
|
||||
Hardening invariants:
|
||||
- **3-minute hard interrupt** on cron sessions — runaway agent loops
|
||||
cannot monopolize the scheduler.
|
||||
- Catchup window: half the job's period, clamped to 120s–2h.
|
||||
- Grace window: 120s for one-shot jobs whose fire time was missed.
|
||||
- File lock at `~/.hermes/cron/.tick.lock` prevents duplicate ticks
|
||||
across processes.
|
||||
- Cron sessions pass `skip_memory=True` by default; memory providers
|
||||
intentionally do not run during cron.
|
||||
|
||||
Cron deliveries are **not** mirrored into the target gateway session —
|
||||
they land in their own cron session with a header/footer frame so the
|
||||
main conversation's message-role alternation stays intact.
|
||||
|
||||
---
|
||||
|
||||
## Kanban (multi-agent work queue)
|
||||
|
||||
Durable SQLite-backed board that lets multiple profiles / workers
|
||||
collaborate on shared tasks. Users drive it via `hermes kanban <verb>`;
|
||||
workers spawned by the dispatcher drive it via a dedicated `kanban_*`
|
||||
toolset so their schema footprint is zero when they're not inside a
|
||||
kanban task.
|
||||
|
||||
- **CLI:** `hermes_cli/kanban.py` wires `hermes kanban` with verbs
|
||||
`init`, `create`, `list` (alias `ls`), `show`, `assign`, `link`,
|
||||
`unlink`, `comment`, `complete`, `block`, `unblock`, `archive`,
|
||||
`tail`, plus less-commonly-used `watch`, `stats`, `runs`, `log`,
|
||||
`assignees`, `heartbeat`, `notify-*`, `dispatch`, `daemon`, `gc`.
|
||||
- **Worker toolset:** `tools/kanban_tools.py` exposes `kanban_show`,
|
||||
`kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`,
|
||||
`kanban_create`, `kanban_link` — gated by `HERMES_KANBAN_TASK` so
|
||||
the schema only appears for processes actually running as a worker.
|
||||
- **Dispatcher:** long-lived loop that (default every 60s) reclaims
|
||||
stale claims, promotes ready tasks, atomically claims, and spawns
|
||||
assigned profiles. Runs **inside the gateway** by default via
|
||||
`kanban.dispatch_in_gateway: true`.
|
||||
- **Plugin assets:** `plugins/kanban/dashboard/` (web UI) +
|
||||
`plugins/kanban/systemd/` (`hermes-kanban-dispatcher.service` for
|
||||
standalone dispatcher deployment).
|
||||
|
||||
Isolation model:
|
||||
- **Board** is the hard boundary — workers are spawned with
|
||||
`HERMES_KANBAN_BOARD` pinned in their env so they can't see other
|
||||
boards.
|
||||
- **Tenant** is a soft namespace *within* a board — one specialist
|
||||
fleet can serve multiple businesses with workspace-path + memory-key
|
||||
isolation.
|
||||
- After ~5 consecutive spawn failures on the same task the dispatcher
|
||||
auto-blocks it to prevent spin loops.
|
||||
|
||||
Full user-facing docs: `website/docs/user-guide/features/kanban.md`.
|
||||
|
||||
---
|
||||
|
||||
## Important Policies
|
||||
|
||||
### Prompt Caching Must Not Break
|
||||
|
||||
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
|
||||
@@ -845,10 +402,9 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
|
||||
|
||||
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
|
||||
|
||||
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
|
||||
must be **cache-aware**: default to deferred invalidation (change takes
|
||||
effect next session), with an opt-in `--now` flag for immediate
|
||||
invalidation. See `/skills install --now` for the canonical pattern.
|
||||
### Working Directory Behavior
|
||||
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
|
||||
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
|
||||
|
||||
### Background Process Notifications (Gateway)
|
||||
|
||||
@@ -870,7 +426,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
|
||||
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
|
||||
|
||||
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
|
||||
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
|
||||
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
|
||||
automatically scope to the active profile.
|
||||
|
||||
### Rules for profile-safe code
|
||||
@@ -927,45 +483,17 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
|
||||
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
|
||||
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
|
||||
|
||||
### DO NOT introduce new `simple_term_menu` usage
|
||||
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
|
||||
the preferred UI is curses (stdlib) because `simple_term_menu` has
|
||||
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
|
||||
interactive menus must use `hermes_cli/curses_ui.py` — see
|
||||
`hermes_cli/tools_config.py` for the canonical pattern.
|
||||
### DO NOT use `simple_term_menu` for interactive menus
|
||||
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
|
||||
|
||||
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
|
||||
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
|
||||
|
||||
### `_last_resolved_tool_names` is a process-global in `model_tools.py`
|
||||
### `_last_resolved_tool_names` is a process-global in `hermes_agent/tools/dispatch.py`
|
||||
`_run_single_child()` in `delegate_tool.py` saves and restores this global around subagent execution. If you add new code that reads this global, be aware it may be temporarily stale during child agent runs.
|
||||
|
||||
### DO NOT hardcode cross-tool references in schema descriptions
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### The gateway has TWO message guards — both must bypass approval/control commands
|
||||
When an agent is running, messages pass through two sequential guards:
|
||||
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
|
||||
`_pending_messages` when `session_key in self._active_sessions`, and
|
||||
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
|
||||
`/queue`, `/status`, `/approve`, `/deny` before they reach
|
||||
`running_agent.interrupt()`. Any new command that must reach the runner
|
||||
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
|
||||
guards and be dispatched inline, not via `_process_message_background()`
|
||||
(which races session lifecycle).
|
||||
|
||||
### Squash merges from stale branches silently revert recent fixes
|
||||
Before squash-merging a PR, ensure the branch is up to date with `main`
|
||||
(`git fetch origin main && git reset --hard origin/main` in the worktree,
|
||||
then re-apply the PR's commits). A stale branch's version of an unrelated
|
||||
file will silently overwrite recent fixes on main when squashed. Verify
|
||||
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
|
||||
red flag.
|
||||
|
||||
### Don't wire in dead code without E2E validation
|
||||
Unused code that was never shipped was dead for a reason. Before wiring an
|
||||
unused module into a live code path, E2E test the real resolution chain
|
||||
with actual imports (not mocks) against a temp `HERMES_HOME`.
|
||||
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `hermes_agent/tools/dispatch.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
|
||||
|
||||
### Tests must not write to `~/.hermes/`
|
||||
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
|
||||
@@ -1022,7 +550,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
|
||||
pytest directly), at minimum activate the venv and pass `-n 4`:
|
||||
|
||||
```bash
|
||||
source .venv/bin/activate # or: source venv/bin/activate
|
||||
source venv/bin/activate
|
||||
python -m pytest tests/ -q -n 4
|
||||
```
|
||||
|
||||
|
||||
269
CONTRIBUTING.md
269
CONTRIBUTING.md
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
|
||||
We value contributions in this order:
|
||||
|
||||
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
|
||||
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
|
||||
2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
|
||||
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
|
||||
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
|
||||
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
|
||||
@@ -49,34 +49,16 @@ If your skill is specialized, community-contributed, or niche, it's better suite
|
||||
|
||||
---
|
||||
|
||||
## Memory Providers: Ship as a Standalone Plugin
|
||||
|
||||
**We are no longer accepting new memory providers into this repo.** The set of built-in providers under `plugins/memory/` (honcho, mem0, supermemory, byterover, hindsight, holographic, openviking, retaindb) is closed. If you want to add a new memory backend, publish it as a **standalone plugin repo** that users install into `~/.hermes/plugins/` (or via a pip entry point).
|
||||
|
||||
Standalone memory plugins:
|
||||
|
||||
- Implement the same `MemoryProvider` ABC (`agent/memory_provider.py`) — `sync_turn`, `prefetch`, `shutdown`, and optionally `post_setup(hermes_home, config)` for setup-wizard integration
|
||||
- Use the same discovery system — `discover_memory_providers()` picks them up from user/project plugin directories and pip entry points
|
||||
- Integrate with `hermes memory setup` via `post_setup()` — no need to touch core code
|
||||
- Can register their own CLI subcommands via `register_cli(subparser)` in a `cli.py` file
|
||||
- Get all the same lifecycle hooks and config plumbing as in-tree providers
|
||||
|
||||
PRs that add a new directory under `plugins/memory/` will be closed with a pointer to publish the provider as its own repo. Existing in-tree providers stay; bug fixes to them are welcome.
|
||||
|
||||
This isn't a quality bar — it's a coupling-and-maintenance decision. Memory providers are the most common plugin type and they shouldn't all live in this tree.
|
||||
|
||||
---
|
||||
|
||||
## Development Setup
|
||||
|
||||
### Prerequisites
|
||||
|
||||
| Requirement | Notes |
|
||||
|-------------|-------|
|
||||
| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
|
||||
| **Git** | With `--recurse-submodules` support |
|
||||
| **Python 3.11+** | uv will install it if missing |
|
||||
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
|
||||
| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
|
||||
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
|
||||
|
||||
### Clone and install
|
||||
|
||||
@@ -106,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
|
||||
touch ~/.hermes/.env
|
||||
|
||||
# Add at minimum an LLM provider key:
|
||||
echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
|
||||
echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
|
||||
```
|
||||
|
||||
### Run
|
||||
@@ -124,11 +106,6 @@ hermes chat -q "Hello"
|
||||
### Run tests
|
||||
|
||||
```bash
|
||||
# Preferred — matches CI (hermetic env, 4 xdist workers); see AGENTS.md
|
||||
scripts/run_tests.sh
|
||||
|
||||
# Alternative (activate the venv first). The wrapper is still recommended
|
||||
# for parity with GitHub Actions before you open a PR:
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
@@ -309,18 +286,16 @@ registry.register(
|
||||
)
|
||||
```
|
||||
|
||||
**Wire into a toolset (required):** Built-in tools are auto-discovered: any
|
||||
`tools/*.py` file that contains a top-level `registry.register(...)` call is
|
||||
imported by `discover_builtin_tools()` in `tools/registry.py` when `model_tools`
|
||||
loads. There is **no** manual import list in `model_tools.py` to maintain.
|
||||
Then add the import to `model_tools.py` in the `_modules` list:
|
||||
|
||||
You must still add the tool name to the appropriate list in `toolsets.py`
|
||||
(for example `_HERMES_CORE_TOOLS` or a dedicated toolset); otherwise the tool
|
||||
registers but is never exposed to the agent. If you introduce a new toolset,
|
||||
add it in `toolsets.py` and wire it into the relevant platform presets.
|
||||
```python
|
||||
_modules = [
|
||||
# ... existing modules ...
|
||||
"tools.my_tool",
|
||||
]
|
||||
```
|
||||
|
||||
See `AGENTS.md` (section **Adding New Tools**) for profile-aware paths and
|
||||
plugin vs core guidance.
|
||||
If it's a new toolset, add it to `toolsets.py` and to the relevant platform presets.
|
||||
|
||||
---
|
||||
|
||||
@@ -479,58 +454,6 @@ Gateway and messaging sessions never collect secrets in-band; they instruct the
|
||||
|
||||
See `skills/gifs/gif-search/` and `skills/email/himalaya/` for examples.
|
||||
|
||||
### Skill authoring standards (HARDLINE)
|
||||
|
||||
Every new or modernized skill — bundled, optional, or contributed — must meet these standards before merge. Reviewers reject PRs that violate them.
|
||||
|
||||
1. **`description` ≤ 60 characters, one sentence, ends with a period.** Long descriptions bloat the skill listing UI and dilute the model's attention when many skills are loaded. State the capability, not the implementation. No marketing words ("powerful", "comprehensive", "seamless", "advanced"). Don't repeat the skill name. Verify with:
|
||||
```python
|
||||
import re, pathlib
|
||||
m = re.search(r'^description: (.*)$',
|
||||
pathlib.Path('skills/<cat>/<name>/SKILL.md').read_text(),
|
||||
re.MULTILINE)
|
||||
assert len(m.group(1)) <= 60, len(m.group(1))
|
||||
```
|
||||
|
||||
Good: `Search arXiv papers by keyword, author, category, or ID.`
|
||||
Bad: `A powerful and comprehensive skill that allows the agent to search arXiv for relevant academic papers using various criteria including keywords, authors, and categories.`
|
||||
|
||||
2. **Tools referenced in SKILL.md prose must be native Hermes tools or MCP servers the skill explicitly expects.** When the skill needs a capability, point at the proper tool by name in backticks: `` `terminal` ``, `` `web_extract` ``, `` `web_search` ``, `` `read_file` ``, `` `write_file` ``, `` `patch` ``, `` `search_files` ``, `` `vision_analyze` ``, `` `browser_navigate` ``, `` `delegate_task` ``, `` `image_generate` ``, `` `text_to_speech` ``, `` `cronjob` ``, `` `memory` ``, `` `skill_view` ``, `` `todo` ``, `` `execute_code` ``.
|
||||
|
||||
Do NOT name shell utilities the agent already has wrapped:
|
||||
|
||||
| Don't say | Say |
|
||||
|---|---|
|
||||
| `grep`, `rg` | `search_files` |
|
||||
| `cat`, `head`, `tail` | `read_file` |
|
||||
| `sed`, `awk` | `patch` |
|
||||
| `find`, `ls` | `search_files` (with `target='files'`) |
|
||||
| `curl` for content extraction | `web_extract` |
|
||||
| `echo > file`, `cat <<EOF` | `write_file` |
|
||||
|
||||
If the skill depends on an MCP server, name the MCP server and document its setup in `## Prerequisites`. Third-party CLIs (e.g. `ffmpeg`, `gh`, a specific SDK) are fine to invoke from inside script files, but the prose should frame the interaction as "invoke through the `terminal` tool", not as a manual shell session.
|
||||
|
||||
3. **`platforms:` gating audited against actual script imports.** Skills that use POSIX-only primitives (`fcntl`, `termios`, `os.setsid`, `os.kill(pid, 0)` for liveness, `/proc`, hardcoded `/tmp` paths, `signal.SIGKILL`, bash heredocs, `osascript`, `apt`, `systemctl`) must declare their supported platforms via the `platforms:` frontmatter. Default posture is to fix it cross-platform first — `tempfile.gettempdir()`, `pathlib.Path`, `psutil.pid_exists()`, Python-level filtering instead of `grep`. Gate to a narrower set only when the dependency is genuinely platform-bound (e.g. `osascript` is macOS-only, `/proc` is Linux-only).
|
||||
|
||||
4. **`author` credits the human contributor first.** For external contributions, the contributor's real name + GitHub handle goes first (`Jane Doe (jane-doe)`); "Hermes Agent" is the secondary collaborator. If the contributor's commit shows "Hermes Agent" as author because they used Hermes to draft the skill, replace it with their actual name — credit the human, not the tool.
|
||||
|
||||
5. **SKILL.md body uses the modern section order.** `# <Skill> Skill` title, 2-3 sentence intro stating what it does and what it doesn't do, then:
|
||||
- `## When to Use` — trigger conditions
|
||||
- `## Prerequisites` — env vars, install steps, MCP setup, API key sourcing
|
||||
- `## How to Run` — canonical invocation through the `terminal` tool
|
||||
- `## Quick Reference` — flat command/API reference
|
||||
- `## Procedure` — numbered steps with copy-paste commands
|
||||
- `## Pitfalls` — known limits, rate limits, things that look broken but aren't
|
||||
- `## Verification` — single command that proves the skill works
|
||||
|
||||
Target ~200 lines for a complex skill, ~100 lines for a simple one. Cut redundant intro fluff, marketing prose, and re-explanations of env vars already documented in `## Prerequisites`.
|
||||
|
||||
6. **Scripts go in `scripts/`, references in `references/`, templates in `templates/`.** Don't expect the model to inline-write parsers, XML walkers, or non-trivial logic every call — ship a helper script. Reference scripts from SKILL.md by path relative to the skill directory.
|
||||
|
||||
7. **Tests live at `tests/skills/test_<skill>_skill.py`** and use only stdlib + pytest + `unittest.mock`. No live network calls. Run via `scripts/run_tests.sh tests/skills/test_<skill>_skill.py -q`. Must pass under the hermetic CI env (no API keys leaking through). Use `monkeypatch` and `tmp_path` for any env-var or filesystem dependencies.
|
||||
|
||||
8. **`.env.example` additions are isolated to a clearly delimited block.** Don't touch the surrounding file — contributor-supplied `.env.example` versions are usually stale, and edits outside the skill's own block will be dropped during salvage. Comment all values with `#` (it's documentation, not live config).
|
||||
|
||||
### Skill guidelines
|
||||
|
||||
- **No external dependencies unless absolutely necessary.** Prefer stdlib Python, curl, and existing Hermes tools (`web_extract`, `terminal`, `read_file`).
|
||||
@@ -571,7 +494,7 @@ branding:
|
||||
agent_name: "My Agent"
|
||||
welcome: "Welcome message"
|
||||
response_label: " ⚔ Agent "
|
||||
prompt_symbol: "⚔"
|
||||
prompt_symbol: "⚔ ❯ "
|
||||
|
||||
tool_prefix: "╎" # Tool output line prefix
|
||||
```
|
||||
@@ -592,57 +515,11 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
|
||||
|
||||
## Cross-Platform Compatibility
|
||||
|
||||
Hermes runs on Linux, macOS, and native Windows (plus WSL2). When writing code
|
||||
that touches the OS, assume *any* platform can hit your code path.
|
||||
|
||||
> **Before you PR:** run `scripts/check-windows-footguns.py` to catch the
|
||||
> common Windows-unsafe patterns in your diff. It's grep-based and cheap;
|
||||
> CI runs it on every PR too.
|
||||
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
|
||||
|
||||
### Critical rules
|
||||
|
||||
1. **Never call `os.kill(pid, 0)` for liveness checks.** `os.kill(pid, 0)`
|
||||
is a standard POSIX idiom to check "is this PID alive" — the signal 0
|
||||
is a no-op permission check. **On Windows it is NOT a no-op.** Python's
|
||||
Windows `os.kill` maps `sig=0` to `CTRL_C_EVENT` (they collide at the
|
||||
integer value 0) and routes it through `GenerateConsoleCtrlEvent(0, pid)`,
|
||||
which broadcasts Ctrl+C to the **entire console process group** containing
|
||||
the target PID. "Probe if alive" silently becomes "kill the target and
|
||||
often unrelated processes sharing its console." See [bpo-14484](https://bugs.python.org/issue14484)
|
||||
(open since 2012 — will never be fixed for compat reasons).
|
||||
|
||||
**Preferred:** use `psutil` (a core dependency — always available):
|
||||
|
||||
```python
|
||||
import psutil
|
||||
if psutil.pid_exists(pid):
|
||||
# process is alive — safe on every platform
|
||||
...
|
||||
```
|
||||
|
||||
If you specifically need the hermes wrapper (it has a stdlib fallback
|
||||
for scaffold-phase imports before pip install finishes), use
|
||||
`gateway.status._pid_exists(pid)`. It calls `psutil.pid_exists` first
|
||||
and falls back to a hand-rolled `OpenProcess + WaitForSingleObject`
|
||||
dance on Windows only when psutil is somehow missing.
|
||||
|
||||
Audit grep for new callsites: `rg "os\.kill\([^,]+,\s*0\s*\)"`. Any hit
|
||||
in non-test code is presumptively a Windows silent-kill bug.
|
||||
|
||||
2. **Use `shutil.which()` before shelling out — don't assume Windows has
|
||||
tools Linux has.** `wmic` was removed in Windows 10 21H1 and later. `ps`,
|
||||
`kill`, `grep`, `awk`, `fuser`, `lsof`, `pgrep`, and most POSIX CLI tools
|
||||
simply don't exist on Windows. Test availability with
|
||||
`shutil.which("tool")` and fall back to a Windows-native equivalent —
|
||||
usually PowerShell via `subprocess.run(["powershell", "-NoProfile",
|
||||
"-Command", ...])`.
|
||||
|
||||
For process enumeration: PowerShell's `Get-CimInstance Win32_Process` is
|
||||
the modern replacement for `wmic process`. See
|
||||
`hermes_cli/gateway.py::_scan_gateway_pids` for the pattern.
|
||||
|
||||
3. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError`
|
||||
and `NotImplementedError`:
|
||||
1. **`termios` and `fcntl` are Unix-only.** Always catch both `ImportError` and `NotImplementedError`:
|
||||
```python
|
||||
try:
|
||||
from simple_term_menu import TerminalMenu
|
||||
@@ -655,126 +532,24 @@ that touches the OS, assume *any* platform can hit your code path.
|
||||
idx = int(input("Choice: ")) - 1
|
||||
```
|
||||
|
||||
4. **File encoding.** Windows may save `.env` files in `cp1252`. Always
|
||||
handle encoding errors:
|
||||
2. **File encoding.** Windows may save `.env` files in `cp1252`. Always handle encoding errors:
|
||||
```python
|
||||
try:
|
||||
load_dotenv(env_path)
|
||||
except UnicodeDecodeError:
|
||||
load_dotenv(env_path, encoding="latin-1")
|
||||
```
|
||||
Config files (`config.yaml`) may be saved with a UTF-8 BOM by Notepad and
|
||||
similar editors — use `encoding="utf-8-sig"` when reading files that
|
||||
could have been touched by a Windows GUI editor.
|
||||
|
||||
5. **Process management.** `os.setsid()`, `os.killpg()`, `os.fork()`,
|
||||
`os.getuid()`, and POSIX signal handling differ on Windows. Guard with
|
||||
`platform.system()`, `sys.platform`, or `hasattr(os, "setsid")`:
|
||||
3. **Process management.** `os.setsid()`, `os.killpg()`, and signal handling differ on Windows. Use platform checks:
|
||||
```python
|
||||
import platform
|
||||
if platform.system() != "Windows":
|
||||
kwargs["preexec_fn"] = os.setsid
|
||||
else:
|
||||
kwargs["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
```
|
||||
|
||||
**Preferred:** for killing a process AND its children (what `os.killpg`
|
||||
does on POSIX), use `psutil` — it works on every platform:
|
||||
```python
|
||||
import psutil
|
||||
try:
|
||||
parent = psutil.Process(pid)
|
||||
# Kill children first (leaf-up), then the parent.
|
||||
for child in parent.children(recursive=True):
|
||||
child.kill()
|
||||
parent.kill()
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
```
|
||||
4. **Path separators.** Use `pathlib.Path` instead of string concatenation with `/`.
|
||||
|
||||
6. **Signals that don't exist on Windows: `SIGALRM`, `SIGCHLD`, `SIGHUP`,
|
||||
`SIGUSR1`, `SIGUSR2`, `SIGPIPE`, `SIGQUIT`, `SIGKILL`.** Python's
|
||||
`signal` module raises `AttributeError` at import time if you reference
|
||||
them on Windows. Use `getattr(signal, "SIGKILL", signal.SIGTERM)` or
|
||||
gate the whole block behind a platform check. `loop.add_signal_handler`
|
||||
raises `NotImplementedError` on Windows — always catch it.
|
||||
|
||||
7. **Path separators.** Use `pathlib.Path` instead of string concatenation
|
||||
with `/`. Forward slashes work almost everywhere on Windows, but
|
||||
`subprocess.run(["cmd.exe", "/c", ...])` and other shell contexts can
|
||||
require backslashes — convert with `str(path)` at the subprocess boundary,
|
||||
not inside Python logic.
|
||||
|
||||
8. **Symlinks need elevated privileges on Windows** (unless Developer Mode is
|
||||
on). Tests that create symlinks need `@pytest.mark.skipif(sys.platform ==
|
||||
"win32", reason="Symlinks require elevated privileges on Windows")`.
|
||||
|
||||
9. **POSIX file modes (0o600, 0o644, etc.) are NOT enforced on NTFS** by
|
||||
default. Tests that assert on `stat().st_mode & 0o777` must skip on
|
||||
Windows — the concept doesn't translate. Use ACLs (`icacls`, `pywin32`)
|
||||
for Windows secret-file protection if needed.
|
||||
|
||||
10. **Detached background daemons on Windows need `pythonw.exe`, NOT
|
||||
`python.exe`.** `python.exe` always allocates or attaches to a console,
|
||||
which makes it vulnerable to `CTRL_C_EVENT` broadcasts from any sibling
|
||||
process. `pythonw.exe` is the no-console variant. Combine with
|
||||
`CREATE_NO_WINDOW | DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP |
|
||||
CREATE_BREAKAWAY_FROM_JOB` in `subprocess.Popen(creationflags=...)`.
|
||||
See `hermes_cli/gateway_windows.py::_spawn_detached` for the reference
|
||||
implementation.
|
||||
|
||||
11. **`subprocess.Popen` with `.cmd` or `.bat` shims needs `shutil.which`
|
||||
to resolve.** Passing `"agent-browser"` to `Popen` on Windows finds
|
||||
the extensionless POSIX shebang shim in `node_modules/.bin/`, which
|
||||
`CreateProcessW` can't execute — you'll get `WinError 193 "not a valid
|
||||
Win32 application"`. Use `shutil.which("agent-browser", path=local_bin)`
|
||||
which honors PATHEXT and picks the `.CMD` variant on Windows.
|
||||
|
||||
12. **Don't use shell shebangs as a way to run Python.** `#!/usr/bin/env
|
||||
python` only works when the file is executed through a Unix shell.
|
||||
`subprocess.run(["./myscript.py"])` on Windows fails even if the file
|
||||
has a shebang line. Always invoke Python explicitly:
|
||||
`[sys.executable, "myscript.py"]`.
|
||||
|
||||
13. **Shell commands in installers.** If you change `scripts/install.sh`,
|
||||
make the equivalent change in `scripts/install.ps1`. The two scripts
|
||||
are the canonical example of "works on Linux does not mean works on
|
||||
Windows" and have drifted multiple times — keep them in lockstep.
|
||||
|
||||
14. **Known paths that are OneDrive-redirected on Windows:** Desktop,
|
||||
Documents, Pictures, Videos. The "real" path when OneDrive Backup is
|
||||
enabled is `%USERPROFILE%\OneDrive\Desktop` (etc.), NOT
|
||||
`%USERPROFILE%\Desktop` (which exists as an empty husk). Resolve the
|
||||
real location via `ctypes` + `SHGetKnownFolderPath` or by reading the
|
||||
`Shell Folders` registry key — never assume `~/Desktop`.
|
||||
|
||||
15. **CRLF vs LF in generated scripts.** Windows `cmd.exe` and `schtasks`
|
||||
parse line-by-line; mixed or LF-only line endings can break multi-line
|
||||
`.cmd` / `.bat` files. Use `open(path, "w", encoding="utf-8",
|
||||
newline="\r\n")` — or `open(path, "wb")` + explicit bytes — when
|
||||
generating scripts Windows will execute.
|
||||
|
||||
16. **Two different quoting schemes in one command line.** `subprocess.run
|
||||
(["schtasks", "/TR", some_cmd])` → schtasks itself parses `/TR`, AND
|
||||
the `some_cmd` string is re-parsed by `cmd.exe` when the task fires.
|
||||
Different parsers, different escape rules. Use two separate quoting
|
||||
helpers and never cross them. See `hermes_cli/gateway_windows.py::
|
||||
_quote_cmd_script_arg` and `_quote_schtasks_arg` for the reference
|
||||
pair.
|
||||
|
||||
### Testing cross-platform
|
||||
|
||||
Tests that use POSIX-only syscalls need a skip marker. Common ones:
|
||||
- Symlinks → `@pytest.mark.skipif(sys.platform == "win32", ...)`
|
||||
- `0o600` file modes → `@pytest.mark.skipif(sys.platform.startswith("win"), ...)`
|
||||
- `signal.SIGALRM` → Unix-only (see `tests/conftest.py::_enforce_test_timeout`)
|
||||
- `os.setsid` / `os.fork` → Unix-only
|
||||
- Live Winsock / Windows-specific regression tests →
|
||||
`@pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific regression")`
|
||||
|
||||
If you monkeypatch `sys.platform` for cross-platform tests, also patch
|
||||
`platform.system()` / `platform.release()` / `platform.mac_ver()` — each
|
||||
re-reads the real OS independently, so half-patched tests still route
|
||||
through the wrong branch on a Windows runner.
|
||||
5. **Shell commands in installers.** If you change `scripts/install.sh`, check if the equivalent change is needed in `scripts/install.ps1`.
|
||||
|
||||
---
|
||||
|
||||
@@ -820,9 +595,9 @@ refactor/description # Code restructuring
|
||||
|
||||
### Before submitting
|
||||
|
||||
1. **Run tests**: `scripts/run_tests.sh` (recommended; same as CI) or `pytest tests/ -v` with the project venv activated
|
||||
1. **Run tests**: `pytest tests/ -v`
|
||||
2. **Test manually**: Run `hermes` and exercise the code path you changed
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
|
||||
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
|
||||
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
|
||||
|
||||
### PR description
|
||||
|
||||
83
Dockerfile
83
Dockerfile
@@ -10,11 +10,9 @@ ENV PYTHONUNBUFFERED=1
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
@@ -28,90 +26,29 @@ WORKDIR /opt/hermes
|
||||
# ---------- Layer-cached dependency install ----------
|
||||
# Copy only package manifests first so npm install + Playwright are cached
|
||||
# unless the lockfiles themselves change.
|
||||
#
|
||||
# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
|
||||
# because it is referenced as a `file:` workspace dependency from
|
||||
# ui-tui/package.json. Copying the tree up front lets npm resolve the
|
||||
# workspace to real content instead of stopping at a bare package.json.
|
||||
COPY package.json package-lock.json ./
|
||||
COPY web/package.json web/package-lock.json web/
|
||||
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
|
||||
COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
|
||||
|
||||
# `npm_config_install_links=false` forces npm to install `file:` deps as
|
||||
# symlinks (the npm 10+ default) even on Debian's older bundled npm 9.x,
|
||||
# which defaults to `install-links=true` and installs file deps as *copies*.
|
||||
# The host-side package-lock.json is generated with a newer npm that uses
|
||||
# symlinks, so an install-as-copy produces a hidden node_modules/.package-lock.json
|
||||
# that permanently disagrees with the root lock on the @hermes/ink entry.
|
||||
# That disagreement trips the TUI launcher's `_tui_need_npm_install()`
|
||||
# check on every startup and triggers a runtime `npm install` that then
|
||||
# fails with EACCES (node_modules/ is root-owned from build time).
|
||||
ENV npm_config_install_links=false
|
||||
|
||||
RUN npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
(cd web && npm install --prefer-offline --no-audit) && \
|
||||
(cd ui-tui && npm install --prefer-offline --no-audit) && \
|
||||
npm cache clean --force
|
||||
|
||||
# ---------- Layer-cached Python dependency install ----------
|
||||
# Copy only pyproject.toml + uv.lock so the Python dep resolve + wheel
|
||||
# download + native-extension compile layer is cached unless those inputs
|
||||
# change. Before this split the Python install sat after `COPY . .`, so
|
||||
# every source-only commit re-did ~4-5 min of dep work on cold builds.
|
||||
#
|
||||
# README.md is referenced by pyproject.toml's `readme =` field, but it's
|
||||
# excluded from the build context by .dockerignore's `*.md`. uv's build
|
||||
# frontend stats the readme path during dep resolution, so we `touch` an
|
||||
# empty placeholder — the real README is restored by `COPY . .` below.
|
||||
#
|
||||
# `uv sync --frozen --no-install-project --extra all` installs only the
|
||||
# deps reachable through the composite `[all]` extra (handpicked set
|
||||
# intended for the production image). We do NOT use `--all-extras`:
|
||||
# that would pull in `[rl]` (atroposlib + tinker + torch + wandb from
|
||||
# git), `[yc-bench]` (another git dep), and `[termux-all]` (Android
|
||||
# redundancy), none of which belong in the published container.
|
||||
#
|
||||
# The editable link is created after the source copy below.
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN touch ./README.md
|
||||
RUN uv sync --frozen --no-install-project --extra all
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build
|
||||
# Build web dashboard (Vite outputs to hermes_agent/cli/web_dist/)
|
||||
RUN cd web && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
# The venv needs to be traversable too.
|
||||
# node_modules trees additionally need to be writable by the hermes user
|
||||
# so the runtime `npm install` triggered by _tui_need_npm_install() in
|
||||
# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
|
||||
# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
|
||||
# not chowned here.
|
||||
# The .venv MUST be hermes-writable so lazy_deps.py can install platform
|
||||
# packages (discord.py, telegram, slack, etc.) at first gateway boot.
|
||||
# Without this, `uv pip install` fails with EACCES and all messaging
|
||||
# adapters silently fail to load. See tools/lazy_deps.py.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes && \
|
||||
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
|
||||
# Start as root so the entrypoint can usermod/groupmod + gosu.
|
||||
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
|
||||
|
||||
# ---------- Link hermes-agent itself (editable) ----------
|
||||
# Deps are already installed in the cached layer above; `--no-deps` makes
|
||||
# this a fast (~1s) egg-link creation with no resolution or downloads.
|
||||
RUN uv pip install --no-cache-dir --no-deps -e "."
|
||||
# ---------- Python virtualenv ----------
|
||||
RUN chown hermes:hermes /opt/hermes
|
||||
USER hermes
|
||||
RUN uv venv && \
|
||||
uv pip install --no-cache-dir -e ".[all]"
|
||||
|
||||
# ---------- Runtime ----------
|
||||
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
ENV HERMES_WEB_DIST=/opt/hermes/hermes_agent/cli/web_dist
|
||||
ENV HERMES_HOME=/opt/data
|
||||
ENV PATH="/opt/data/.local/bin:${PATH}"
|
||||
VOLUME [ "/opt/data" ]
|
||||
ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
|
||||
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
graft hermes_agent
|
||||
graft skills
|
||||
graft optional-skills
|
||||
global-exclude __pycache__
|
||||
|
||||
38
README.md
38
README.md
@@ -9,12 +9,11 @@
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.zh-CN.md"><img src="https://img.shields.io/badge/Lang-中文-red?style=for-the-badge" alt="中文"></a>
|
||||
</p>
|
||||
|
||||
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
|
||||
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NovitaAI](https://novita.ai) (AI-native cloud for Model API, Agent Sandbox, and GPU Cloud), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
|
||||
|
||||
<table>
|
||||
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
|
||||
@@ -22,7 +21,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
|
||||
<tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
|
||||
<tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
|
||||
<tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Seven terminal backends — local, Docker, SSH, Singularity, Modal, Daytona, and Vercel Sandbox. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Six terminal backends — local, Docker, SSH, Daytona, Singularity, and Modal. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
|
||||
<tr><td><b>Research-ready</b></td><td>Batch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models.</td></tr>
|
||||
</table>
|
||||
|
||||
@@ -30,29 +29,15 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
|
||||
|
||||
## Quick Install
|
||||
|
||||
### Linux, macOS, WSL2, Termux
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
### Windows (native, PowerShell) — Early Beta
|
||||
|
||||
> **Heads up:** Native Windows support is **early beta**. It installs and runs, but hasn't been road-tested as broadly as our Linux/macOS/WSL2 paths. Please [file issues](https://github.com/NousResearch/hermes-agent/issues) when you hit rough edges. For the most battle-tested Windows setup today, run the Linux/macOS one-liner above inside **WSL2**.
|
||||
|
||||
Run this in PowerShell:
|
||||
|
||||
```powershell
|
||||
irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1 | iex
|
||||
```
|
||||
|
||||
The installer handles everything: uv, Python 3.11, Node.js, ripgrep, ffmpeg, **and a portable Git Bash** (MinGit, unpacked to `%LOCALAPPDATA%\hermes\git` — no admin required, completely isolated from any system Git install). Hermes uses this bundled Git Bash to run shell commands.
|
||||
|
||||
If you already have Git installed, the installer detects it and uses that instead. Otherwise a ~45MB MinGit download is all you need — it won't touch or interfere with any system Git.
|
||||
Works on Linux, macOS, WSL2, and Android via Termux. The installer handles the platform-specific setup for you.
|
||||
|
||||
> **Android / Termux:** The tested manual path is documented in the [Termux guide](https://hermes-agent.nousresearch.com/docs/getting-started/termux). On Termux, Hermes installs a curated `.[termux]` extra because the full `.[all]` extra currently pulls Android-incompatible voice dependencies.
|
||||
>
|
||||
> **Windows:** Native Windows is supported as an **early beta** — the PowerShell one-liner above installs everything, but expect rough edges and please file issues when you hit them. If you'd rather use WSL2 (our most battle-tested Windows path), the Linux command works there too. Native Windows install lives under `%LOCALAPPDATA%\hermes`; WSL2 installs under `~/.hermes` as on Linux. The only Hermes feature that currently needs WSL2 specifically is the browser-based dashboard chat pane (it uses a POSIX PTY — classic CLI and gateway both run natively).
|
||||
> **Windows:** Native Windows is not supported. Please install [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install) and run the command above.
|
||||
|
||||
After installation:
|
||||
|
||||
@@ -91,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
|
||||
| Set a personality | `/personality [name]` | `/personality [name]` |
|
||||
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
|
||||
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
|
||||
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
|
||||
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
|
||||
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
|
||||
|
||||
@@ -169,13 +154,17 @@ Manual path (equivalent to the above):
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv .venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
scripts/run_tests.sh
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
> **RL Training (optional):** The RL/Atropos integration (`environments/`) — see [`CONTRIBUTING.md`](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#development-setup) for the full setup.
|
||||
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
|
||||
---
|
||||
|
||||
@@ -184,6 +173,7 @@ scripts/run_tests.sh
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [Skills Hub](https://agentskills.io)
|
||||
- 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
|
||||
|
||||
---
|
||||
|
||||
186
README.zh-CN.md
186
README.zh-CN.md
@@ -1,186 +0,0 @@
|
||||
<p align="center">
|
||||
<img src="assets/banner.png" alt="Hermes Agent" width="100%">
|
||||
</p>
|
||||
|
||||
# Hermes Agent ☤
|
||||
|
||||
<p align="center">
|
||||
<a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
|
||||
<a href="https://discord.gg/NousResearch"><img src="https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord"></a>
|
||||
<a href="https://github.com/NousResearch/hermes-agent/blob/main/LICENSE"><img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT"></a>
|
||||
<a href="https://nousresearch.com"><img src="https://img.shields.io/badge/Built%20by-Nous%20Research-blueviolet?style=for-the-badge" alt="Built by Nous Research"></a>
|
||||
<a href="README.md"><img src="https://img.shields.io/badge/Lang-English-lightgrey?style=for-the-badge" alt="English"></a>
|
||||
</p>
|
||||
|
||||
**由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能,在使用中改进技能,主动持久化知识,搜索过往对话,并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行,也可以在 GPU 集群上运行,或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话,而它在云端 VM 上工作。
|
||||
|
||||
支持任意模型——[Nous Portal](https://portal.nousresearch.com)、[OpenRouter](https://openrouter.ai)(200+ 模型)、[NVIDIA NIM](https://build.nvidia.com)(Nemotron)、[小米 MiMo](https://platform.xiaomimimo.com)、[z.ai/GLM](https://z.ai)、[Kimi/Moonshot](https://platform.moonshot.ai)、[MiniMax](https://www.minimax.io)、[Hugging Face](https://huggingface.co)、OpenAI,或自定义端点。使用 `hermes model` 即可切换——无需改代码,无锁定。
|
||||
|
||||
<table>
|
||||
<tr><td><b>真正的终端界面</b></td><td>完整的 TUI,支持多行编辑、斜杠命令自动补全、对话历史、中断重定向和流式工具输出。</td></tr>
|
||||
<tr><td><b>随你所在</b></td><td>Telegram、Discord、Slack、WhatsApp、Signal 和 CLI——全部从单个网关进程运行。语音备忘录转写、跨平台对话连续性。</td></tr>
|
||||
<tr><td><b>闭环学习</b></td><td>代理管理记忆并定期自我提醒。复杂任务后自动创建技能。技能在使用中自我改进。FTS5 会话搜索配合 LLM 摘要实现跨会话回溯。<a href="https://github.com/plastic-labs/honcho">Honcho</a> 辩证式用户建模。兼容 <a href="https://agentskills.io">agentskills.io</a> 开放标准。</td></tr>
|
||||
<tr><td><b>定时自动化</b></td><td>内置 cron 调度器,支持向任何平台投递。日报、夜间备份、周审计——全部用自然语言描述,无人值守运行。</td></tr>
|
||||
<tr><td><b>委派与并行</b></td><td>生成隔离子代理处理并行工作流。编写 Python 脚本通过 RPC 调用工具,将多步管道压缩为零上下文开销的轮次。</td></tr>
|
||||
<tr><td><b>随处运行</b></td><td>六种终端后端——本地、Docker、SSH、Daytona、Singularity 和 Modal。Daytona 和 Modal 提供 Serverless 持久化——代理环境空闲时休眠、按需唤醒,空闲期间几乎零成本。$5 VPS 或 GPU 集群都能跑。</td></tr>
|
||||
<tr><td><b>研究就绪</b></td><td>批量轨迹生成、Atropos RL 环境、轨迹压缩——用于训练下一代工具调用模型。</td></tr>
|
||||
</table>
|
||||
|
||||
---
|
||||
|
||||
## 快速安装
|
||||
|
||||
```bash
|
||||
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
支持 Linux、macOS、WSL2 和 Android (Termux)。安装程序会自动处理平台特定的配置。
|
||||
|
||||
> **Android / Termux:** 已测试的手动安装路径请参考 [Termux 指南](https://hermes-agent.nousresearch.com/docs/getting-started/termux)。在 Termux 上,Hermes 会安装精选的 `.[termux]` 扩展,因为完整的 `.[all]` 扩展会拉取 Android 不兼容的语音依赖。
|
||||
>
|
||||
> **Windows:** 原生 Windows 不受支持。请安装 [WSL2](https://learn.microsoft.com/zh-cn/windows/wsl/install) 并运行上述命令。
|
||||
|
||||
安装后:
|
||||
|
||||
```bash
|
||||
source ~/.bashrc # 重新加载 shell(或: source ~/.zshrc)
|
||||
hermes # 开始对话!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 快速入门
|
||||
|
||||
```bash
|
||||
hermes # 交互式 CLI — 开始对话
|
||||
hermes model # 选择 LLM 提供商和模型
|
||||
hermes tools # 配置启用的工具
|
||||
hermes config set # 设置单个配置项
|
||||
hermes gateway # 启动消息网关(Telegram、Discord 等)
|
||||
hermes setup # 运行完整设置向导(一次性配置所有内容)
|
||||
hermes claw migrate # 从 OpenClaw 迁移(如果来自 OpenClaw)
|
||||
hermes update # 更新到最新版本
|
||||
hermes doctor # 诊断问题
|
||||
```
|
||||
|
||||
📖 **[完整文档 →](https://hermes-agent.nousresearch.com/docs/)**
|
||||
|
||||
## CLI 与消息平台 快速对照
|
||||
|
||||
Hermes 有两种入口:用 `hermes` 启动终端 UI,或运行网关从 Telegram、Discord、Slack、WhatsApp、Signal 或 Email 与之对话。进入对话后,许多斜杠命令在两种界面中通用。
|
||||
|
||||
| 操作 | CLI | 消息平台 |
|
||||
|------|-----|----------|
|
||||
| 开始对话 | `hermes` | 运行 `hermes gateway setup` + `hermes gateway start`,然后给机器人发消息 |
|
||||
| 开始新对话 | `/new` 或 `/reset` | `/new` 或 `/reset` |
|
||||
| 更换模型 | `/model [provider:model]` | `/model [provider:model]` |
|
||||
| 设置人格 | `/personality [name]` | `/personality [name]` |
|
||||
| 重试或撤销上一轮 | `/retry`、`/undo` | `/retry`、`/undo` |
|
||||
| 压缩上下文 / 查看用量 | `/compress`、`/usage`、`/insights [--days N]` | `/compress`、`/usage`、`/insights [days]` |
|
||||
| 浏览技能 | `/skills` 或 `/<skill-name>` | `/skills` 或 `/<skill-name>` |
|
||||
| 中断当前工作 | `Ctrl+C` 或发送新消息 | `/stop` 或发送新消息 |
|
||||
| 平台特定状态 | `/platforms` | `/status`、`/sethome` |
|
||||
|
||||
完整命令列表请参阅 [CLI 指南](https://hermes-agent.nousresearch.com/docs/user-guide/cli) 和 [消息网关指南](https://hermes-agent.nousresearch.com/docs/user-guide/messaging)。
|
||||
|
||||
---
|
||||
|
||||
## 文档
|
||||
|
||||
所有文档位于 **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)**:
|
||||
|
||||
| 章节 | 内容 |
|
||||
|------|------|
|
||||
| [快速开始](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | 安装 → 设置 → 2 分钟内开始首次对话 |
|
||||
| [CLI 使用](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | 命令、快捷键、人格、会话 |
|
||||
| [配置](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | 配置文件、提供商、模型、所有选项 |
|
||||
| [消息网关](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram、Discord、Slack、WhatsApp、Signal、Home Assistant |
|
||||
| [安全](https://hermes-agent.nousresearch.com/docs/user-guide/security) | 命令审批、DM 配对、容器隔离 |
|
||||
| [工具与工具集](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ 工具、工具集系统、终端后端 |
|
||||
| [技能系统](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | 过程记忆、技能中心、创建技能 |
|
||||
| [记忆](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | 持久记忆、用户画像、最佳实践 |
|
||||
| [MCP 集成](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | 连接任意 MCP 服务器扩展能力 |
|
||||
| [定时调度](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | 定时任务与平台投递 |
|
||||
| [上下文文件](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files) | 影响每次对话的项目上下文 |
|
||||
| [架构](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | 项目结构、代理循环、关键类 |
|
||||
| [贡献](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | 开发设置、PR 流程、代码风格 |
|
||||
| [CLI 参考](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | 所有命令和标志 |
|
||||
| [环境变量](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | 完整环境变量参考 |
|
||||
|
||||
---
|
||||
|
||||
## 从 OpenClaw 迁移
|
||||
|
||||
如果你来自 OpenClaw,Hermes 可以自动导入你的设置、记忆、技能和 API 密钥。
|
||||
|
||||
**首次安装时:** 安装向导(`hermes setup`)会自动检测 `~/.openclaw` 并在配置开始前提供迁移选项。
|
||||
|
||||
**安装后任意时间:**
|
||||
|
||||
```bash
|
||||
hermes claw migrate # 交互式迁移(完整预设)
|
||||
hermes claw migrate --dry-run # 预览将要迁移的内容
|
||||
hermes claw migrate --preset user-data # 仅迁移用户数据,不含密钥
|
||||
hermes claw migrate --overwrite # 覆盖已有冲突
|
||||
```
|
||||
|
||||
导入内容:
|
||||
- **SOUL.md** — 人格文件
|
||||
- **记忆** — MEMORY.md 和 USER.md 条目
|
||||
- **技能** — 用户创建的技能 → `~/.hermes/skills/openclaw-imports/`
|
||||
- **命令白名单** — 审批模式
|
||||
- **消息设置** — 平台配置、允许用户、工作目录
|
||||
- **API 密钥** — 白名单中的密钥(Telegram、OpenRouter、OpenAI、Anthropic、ElevenLabs)
|
||||
- **TTS 资产** — 工作区音频文件
|
||||
- **工作区指令** — AGENTS.md(使用 `--workspace-target`)
|
||||
|
||||
使用 `hermes claw migrate --help` 查看所有选项,或使用 `openclaw-migration` 技能进行交互式代理引导迁移(含干运行预览)。
|
||||
|
||||
---
|
||||
|
||||
## 贡献
|
||||
|
||||
欢迎贡献!请参阅 [贡献指南](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) 了解开发设置、代码风格和 PR 流程。
|
||||
|
||||
贡献者快速开始——克隆并使用 `setup-hermes.sh`:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
./setup-hermes.sh # 安装 uv、创建 venv、安装 .[all]、创建符号链接 ~/.local/bin/hermes
|
||||
./hermes # 自动检测 venv,无需先 source
|
||||
```
|
||||
|
||||
手动安装(等效于上述命令):
|
||||
|
||||
```bash
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uv venv venv --python 3.11
|
||||
source venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
python -m pytest tests/ -q
|
||||
```
|
||||
|
||||
> **RL 训练(可选):** 如需参与 RL/Tinker-Atropos 集成开发:
|
||||
> ```bash
|
||||
> git submodule update --init tinker-atropos
|
||||
> uv pip install -e "./tinker-atropos"
|
||||
> ```
|
||||
|
||||
---
|
||||
|
||||
## 社区
|
||||
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [技能中心](https://agentskills.io)
|
||||
- 🐛 [问题反馈](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 💡 [讨论区](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — 社区微信桥接:在同一微信账号上运行 Hermes Agent 和 OpenClaw。
|
||||
|
||||
---
|
||||
|
||||
## 许可证
|
||||
|
||||
MIT — 详见 [LICENSE](LICENSE)。
|
||||
|
||||
由 [Nous Research](https://nousresearch.com) 构建。
|
||||
@@ -1,453 +0,0 @@
|
||||
# Hermes Agent v0.11.0 (v2026.4.23)
|
||||
|
||||
**Release Date:** April 23, 2026
|
||||
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
|
||||
|
||||
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
|
||||
|
||||
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
|
||||
|
||||
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
|
||||
|
||||
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
|
||||
|
||||
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
|
||||
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
|
||||
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
|
||||
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
|
||||
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
|
||||
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
|
||||
|
||||
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Transport Layer (NEW)
|
||||
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
|
||||
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
|
||||
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
|
||||
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
|
||||
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
|
||||
|
||||
### Provider & Model Support
|
||||
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
|
||||
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
|
||||
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
|
||||
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
|
||||
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
|
||||
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
|
||||
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
|
||||
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
|
||||
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
|
||||
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
|
||||
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
|
||||
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
|
||||
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
|
||||
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
|
||||
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
|
||||
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
|
||||
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
|
||||
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
|
||||
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
|
||||
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
|
||||
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
|
||||
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
|
||||
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
|
||||
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
|
||||
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
|
||||
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
|
||||
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
|
||||
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
|
||||
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
|
||||
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
|
||||
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
|
||||
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
|
||||
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
|
||||
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
|
||||
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
|
||||
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
|
||||
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
|
||||
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
|
||||
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
|
||||
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
|
||||
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
|
||||
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
|
||||
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
|
||||
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
|
||||
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
|
||||
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
|
||||
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
|
||||
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
|
||||
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
|
||||
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
|
||||
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
|
||||
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
|
||||
|
||||
### Session & Memory
|
||||
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
|
||||
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
|
||||
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
|
||||
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
|
||||
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
|
||||
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
|
||||
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ New Ink-based TUI
|
||||
|
||||
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
|
||||
|
||||
### TUI Foundations
|
||||
- New TUI based on Ink + Python JSON-RPC backend
|
||||
- Prettier + ESLint + vitest tooling for `ui-tui/`
|
||||
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
|
||||
- Persistent `_SlashWorker` subprocess for slash command dispatch
|
||||
|
||||
### UX & Features
|
||||
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
|
||||
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
|
||||
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
|
||||
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
|
||||
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
|
||||
- Sticky composer that freezes during scroll
|
||||
- OSC-52 clipboard support for copy across SSH sessions
|
||||
- Virtualized history rendering for performance
|
||||
- Slash command autocomplete via `complete.slash` RPC
|
||||
- Path autocomplete via `complete.path` RPC
|
||||
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
|
||||
|
||||
### Structural Refactors
|
||||
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
|
||||
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
|
||||
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
|
||||
|
||||
### Telegram
|
||||
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
|
||||
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
|
||||
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
|
||||
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
|
||||
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
|
||||
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
|
||||
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
|
||||
- Fix: Markdown special char escaping in `send_exec_approval`
|
||||
- Fix: parentheses in URLs during MarkdownV2 link conversion
|
||||
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
|
||||
- Many platform hint / streaming / session-key fixes
|
||||
|
||||
### Discord
|
||||
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
|
||||
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
|
||||
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
|
||||
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
|
||||
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
|
||||
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
|
||||
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
|
||||
|
||||
### Feishu
|
||||
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
|
||||
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
|
||||
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
|
||||
|
||||
### DingTalk
|
||||
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
|
||||
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
|
||||
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
|
||||
|
||||
### WhatsApp
|
||||
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
|
||||
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
|
||||
|
||||
### WeCom / Weixin
|
||||
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
|
||||
|
||||
### Signal
|
||||
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
|
||||
|
||||
### Slack
|
||||
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
|
||||
|
||||
### BlueBubbles (iMessage)
|
||||
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
|
||||
|
||||
### Gateway Core
|
||||
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
|
||||
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
|
||||
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
|
||||
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
|
||||
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
|
||||
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
|
||||
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
|
||||
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
|
||||
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
|
||||
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
|
||||
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
|
||||
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
|
||||
- Fix: unlink stale PID + lock files on cleanup
|
||||
- Fix: force-unlink stale PID file after `--replace` takeover
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin Surface (major expansion)
|
||||
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
|
||||
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
|
||||
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
|
||||
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
|
||||
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
|
||||
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
|
||||
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
|
||||
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
|
||||
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
|
||||
|
||||
### Browser
|
||||
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
|
||||
- Camofox hardening + connection stability across the window
|
||||
|
||||
### Execute Code
|
||||
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
|
||||
|
||||
### Image Generation
|
||||
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
|
||||
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
|
||||
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
|
||||
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
|
||||
|
||||
### TTS / STT / Voice
|
||||
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
|
||||
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
|
||||
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
|
||||
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
|
||||
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
|
||||
|
||||
### Webhook / Cron
|
||||
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
|
||||
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
|
||||
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
|
||||
|
||||
### Delegate
|
||||
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
|
||||
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
|
||||
|
||||
### File / Patch
|
||||
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
|
||||
|
||||
### API Server
|
||||
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
|
||||
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
|
||||
|
||||
### Docker / Podman
|
||||
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
|
||||
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
|
||||
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
|
||||
|
||||
### MCP
|
||||
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill System
|
||||
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
|
||||
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
|
||||
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
|
||||
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
|
||||
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
|
||||
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
|
||||
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
|
||||
|
||||
### New Skills
|
||||
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
|
||||
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
|
||||
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
|
||||
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
|
||||
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
|
||||
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
|
||||
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
|
||||
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
|
||||
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
|
||||
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
|
||||
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
|
||||
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
|
||||
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
|
||||
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
|
||||
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
|
||||
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
|
||||
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
|
||||
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
|
||||
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
|
||||
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
|
||||
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
|
||||
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
|
||||
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
|
||||
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
|
||||
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
|
||||
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
|
||||
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
|
||||
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
|
||||
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
|
||||
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
|
||||
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
|
||||
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
|
||||
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
|
||||
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
|
||||
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
|
||||
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
|
||||
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
|
||||
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
The `fix:` category in this window covers 482 PRs. Highlights:
|
||||
|
||||
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
|
||||
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
|
||||
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
|
||||
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
|
||||
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
|
||||
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
|
||||
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
|
||||
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
|
||||
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
|
||||
- Doctor checks for Kimi China credentials fixed
|
||||
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
|
||||
- Rapid Telegram follow-ups no longer get cut off
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- Atropos + wandb links in user guide
|
||||
- ACP / VS Code / Zed / JetBrains integration docs refresh
|
||||
- Webhook subscription docs updated for direct-delivery mode
|
||||
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
|
||||
- Transport layer developer guide added
|
||||
- Website removed Discussions link from README
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count)
|
||||
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
|
||||
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
|
||||
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
|
||||
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
|
||||
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
|
||||
- **@ethernet8023** — 3 PRs
|
||||
- **@benbarclay** — 3 PRs
|
||||
- **@Aslaaen** — 2 PRs
|
||||
|
||||
### Also contributing
|
||||
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
|
||||
|
||||
### All Contributors (alphabetical)
|
||||
|
||||
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
|
||||
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
|
||||
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
|
||||
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
|
||||
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
|
||||
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
|
||||
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
|
||||
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
|
||||
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
|
||||
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
|
||||
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
|
||||
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
|
||||
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
|
||||
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
|
||||
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
|
||||
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
|
||||
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
|
||||
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
|
||||
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
|
||||
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
|
||||
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
|
||||
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
|
||||
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
|
||||
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
|
||||
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
|
||||
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
|
||||
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
|
||||
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
|
||||
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
|
||||
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
|
||||
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
|
||||
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
|
||||
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
|
||||
|
||||
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
|
||||
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
|
||||
@@ -1,505 +0,0 @@
|
||||
# Hermes Agent v0.12.0 (v2026.4.30)
|
||||
|
||||
**Release Date:** April 30, 2026
|
||||
**Since v0.11.0:** 1,096 commits · 550 merged PRs · 1,270 files changed · 217,776 insertions · 213 community contributors (including co-authors)
|
||||
|
||||
> The Curator release — Hermes Agent now maintains itself. An autonomous background Curator grades, prunes, and consolidates your skill library on its own schedule. The self-improvement loop that reviews what to save got a substantial upgrade. Four new inference providers, a 18th messaging platform, a 19th via Teams plugin, native Spotify + Google Meet integrations, ComfyUI and TouchDesigner-MCP moved from optional to bundled-by-default, and a ~57% cut to visible TUI cold start.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Autonomous Curator** — `hermes curator` runs as a background agent on the gateway's cron ticker (7-day cycle default). It grades your skill library, consolidates related skills, prunes dead ones, and writes per-run reports to `logs/curator/run.json` + `REPORT.md`. Archived skills are classified consolidated-vs-pruned via model + heuristic. Defense-in-depth gates protect bundled/hub skills from mutation. Unified under `auxiliary.curator` — pick the curator's model in `hermes model`, manage it from the dashboard. `hermes curator status` ranks skills by usage (most-used / least-used). ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277), [#17307](https://github.com/NousResearch/hermes-agent/pull/17307), [#17941](https://github.com/NousResearch/hermes-agent/pull/17941), [#17868](https://github.com/NousResearch/hermes-agent/pull/17868), [#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
|
||||
- **Self-improvement loop — substantially upgraded** — The background review fork (the core of Hermes' self-improvement: after each turn it decides what memories/skills to save or update) is now class-first (rubric-based rather than free-form), active-update biased (prefers the skill the agent just loaded), handles `references/`/`templates/` sub-files, and properly inherits the parent's live runtime (provider, model, credentials actually propagate). Restricted to memory + skills toolsets so it can't sprawl. Memory providers shut down cleanly. Prior-turn tool messages excluded from the summary so the fork sees a clean context. ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026), [#17213](https://github.com/NousResearch/hermes-agent/pull/17213), [#16099](https://github.com/NousResearch/hermes-agent/pull/16099), [#16569](https://github.com/NousResearch/hermes-agent/pull/16569), [#16204](https://github.com/NousResearch/hermes-agent/pull/16204), [#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
- **Skill integrations — major expansion** — **ComfyUI v5** with official CLI + REST + hardware-gated local install, moved from optional to **built-in by default** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734)). **TouchDesigner-MCP** bundled by default, expanded with GLSL, post-FX, audio, geometry, and 9 new reference docs ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753), [#16624](https://github.com/NousResearch/hermes-agent/pull/16624), [#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @kshitijk4poor + @SHL0MS). **Humanizer** skill ports a text-cleaner that strips AI-isms ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787)). **claude-design** HTML artifact skill + design-md (Google DESIGN.md spec) + airtable salvage + `skill_manage` edits in `external_dirs` + direct-URL skill install + `/reload-skills` slash command. ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358), [#14876](https://github.com/NousResearch/hermes-agent/pull/14876), [#16291](https://github.com/NousResearch/hermes-agent/pull/16291), [#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#16323](https://github.com/NousResearch/hermes-agent/pull/16323), [#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
|
||||
- **LM Studio — first-class provider** — upgraded from a custom-endpoint alias to a full-blown native provider: dedicated auth, `hermes doctor` checks, reasoning transport, live `/models` listing. (Salvage of @kshitijk4poor's #17061.) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
|
||||
- **Four more new inference providers** — **GMI Cloud** (first-class, salvage of #11955 — @isaachuangGMICLOUD), **Azure AI Foundry** with auto-detection, **MiniMax OAuth** with PKCE browser flow (salvage #15203), **Tencent Tokenhub** (salvage of #16860). ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663), [#15845](https://github.com/NousResearch/hermes-agent/pull/15845), [#17524](https://github.com/NousResearch/hermes-agent/pull/17524), [#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
- **Pluggable gateway platforms + Microsoft Teams** — the gateway is now a plugin host. Drop-in messaging adapters live outside the core, and Microsoft Teams is the first plugin-shipped platform. (Salvage of #17664.) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751), [#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
|
||||
- **Tencent 元宝 (Yuanbao) — 18th messaging platform** — native gateway adapter with text + media delivery. ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424))
|
||||
|
||||
- **Spotify — native tools + bundled skill + wizard** — 7 tools (play, search, queue, playlists, devices) behind PKCE OAuth, interactive setup wizard, bundled skill, surfacing in `hermes tools`, cron usage documented. ([#15121](https://github.com/NousResearch/hermes-agent/pull/15121), [#15130](https://github.com/NousResearch/hermes-agent/pull/15130), [#15154](https://github.com/NousResearch/hermes-agent/pull/15154), [#15180](https://github.com/NousResearch/hermes-agent/pull/15180))
|
||||
|
||||
- **Google Meet plugin** — join calls, transcribe, speak, follow up. Realtime OpenAI transport + Node bot server, full pipeline bundled as a plugin. ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364))
|
||||
|
||||
- **`hermes -z` one-shot mode + `hermes update --check`** — non-interactive `hermes -z <prompt>` with `--model`/`--provider`/`HERMES_INFERENCE_MODEL`. `hermes update --check` preflight. Opt-in pre-update HERMES_HOME backup. ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702), [#15704](https://github.com/NousResearch/hermes-agent/pull/15704), [#15841](https://github.com/NousResearch/hermes-agent/pull/15841), [#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
|
||||
- **Models dashboard tab + in-browser model config** — rich per-model analytics, switch main + auxiliary models from the dashboard. ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745), [#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal model catalogs are now pulled from a remote manifest so new models show up without a release. ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
|
||||
- **Native multimodal image routing** — images now route based on the model's actual vision capability rather than provider defaults. ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
|
||||
- **Gateway media parity** — native multi-image sending across Telegram, Discord, Slack, Mattermost, Email, and Signal; centralized audio routing with FLAC support + Telegram document fallback. ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909), [#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
|
||||
- **TUI catches up to (and past) the classic CLI** — LaTeX rendering (@austinpickett), `/reload` .env hot-reload, pluggable busy-indicator styles (@OutThisLife, #13610), opt-in auto-resume of last session, expanded light-terminal auto-detection, session delete from `/resume` picker with `d`, modified mouse-wheel line scroll, and a `/mouse` toggle that kills ConPTY's phantom mouse injection (@kevin-ho). ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175), [#17286](https://github.com/NousResearch/hermes-agent/pull/17286), [#17150](https://github.com/NousResearch/hermes-agent/pull/17150), [#17130](https://github.com/NousResearch/hermes-agent/pull/17130), [#17113](https://github.com/NousResearch/hermes-agent/pull/17113), [#17668](https://github.com/NousResearch/hermes-agent/pull/17668), [#17669](https://github.com/NousResearch/hermes-agent/pull/17669), [#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
|
||||
- **Observability + achievements plugins** — bundled Langfuse observability plugin (salvage #16845) + bundled hermes-achievements plugin that scans full session history. ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917), [#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
|
||||
- **TTS provider registry + Piper local TTS** — pluggable `tts.providers.<name>` registry; Piper ships as a native local TTS provider. (Closes #8508.) ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843), [#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
|
||||
- **Vercel Sandbox backend** — Vercel sandboxes as an execute_code/terminal backend (@kshitijk4poor). ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
|
||||
- **Secret redaction off by default** — default flipped to off. Prevents the long-standing patch-corruption incidents where fake secret-shaped substrings mangled tool outputs. Opt in via `redaction.enabled: true` when you need it. ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
|
||||
- **Cold-start performance** — visible TUI cold start cut **~57%** via lazy agent init (@OutThisLife), lazy imports of OpenAI / Anthropic / Firecrawl / account_usage, mtime-cached `load_config()`, memoized `get_tool_definitions()` with TTL-cached `check_fn` results, precompiled dangerous-command patterns. ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190), [#17046](https://github.com/NousResearch/hermes-agent/pull/17046), [#17041](https://github.com/NousResearch/hermes-agent/pull/17041), [#17098](https://github.com/NousResearch/hermes-agent/pull/17098), [#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
- **Configurable prompt cache TTL** — `prompt_caching.cache_ttl` (5m default, 1h opt-in — cost savings for bursty sessions that keep cache warm). Salvage of #12659. ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
|
||||
---
|
||||
|
||||
## 🧠 Autonomous Curator & Self-Improvement Loop
|
||||
|
||||
### Curator — autonomous skill maintenance
|
||||
- **`hermes curator` as a background agent** — runs on the gateway's cron ticker, 7-day cycle by default, umbrella-first prompt, inherits parent config, unbounded iterations ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277) — issue #7816)
|
||||
- **Per-run reports** — `logs/curator/run.json` + `REPORT.md` per cycle ([#17307](https://github.com/NousResearch/hermes-agent/pull/17307))
|
||||
- **Consolidated vs pruned classification** — archived skills split with model + heuristic ([#17941](https://github.com/NousResearch/hermes-agent/pull/17941))
|
||||
- **`hermes curator status`** — ranks skills by usage, shows most-used and least-used ([#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
|
||||
- **Unified under `auxiliary.curator`** — pick the model in `hermes model`, configure from the dashboard ([#17868](https://github.com/NousResearch/hermes-agent/pull/17868))
|
||||
- **Documentation** — dedicated curator feature page on the docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- Fix: seed defaults on update, create `logs/curator/` directory, defer fire import ([#17927](https://github.com/NousResearch/hermes-agent/pull/17927))
|
||||
- Fix: scan nested archive subdirs in `restore_skill` (@0xDevNinja) ([#17951](https://github.com/NousResearch/hermes-agent/pull/17951))
|
||||
- Fix: use actual skill activity in curator status (@y0shua1ee) ([#17953](https://github.com/NousResearch/hermes-agent/pull/17953))
|
||||
- Fix: `skill_manage` refuses writes on pinned skills; pinning now blocks curator writes ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562), [#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- Fix: `bump_use()` wired into skill invocation + preload + skill_view (salvage #17782) ([#17932](https://github.com/NousResearch/hermes-agent/pull/17932))
|
||||
|
||||
### Self-improvement loop (background review fork)
|
||||
- **Class-first skill-review prompt** — rubric-based grading rather than free-form "should this update" ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026))
|
||||
- **Active-update bias** — prefers updating skills the agent just loaded, handles `references/` + `templates/` sub-files ([#17213](https://github.com/NousResearch/hermes-agent/pull/17213))
|
||||
- **Fork inherits parent's live runtime** — provider, model, credentials actually propagate now ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Scoped toolsets** — review fork restricted to memory + skills (no shell, no web) ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Clean shutdown** — background review memory providers exit properly (salvage #15289) ([#16204](https://github.com/NousResearch/hermes-agent/pull/16204))
|
||||
- **Clean context** — prior-history tool messages excluded from review summary (salvage #14967) ([#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### Skill integrations — newly bundled or promoted
|
||||
- **ComfyUI v5** — official CLI + REST + hardware-gated local install; **moved from optional to built-in** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734), [#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **TouchDesigner-MCP** — **bundled by default** ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753) — @kshitijk4poor), expanded with GLSL, post-FX, audio, geometry references ([#16624](https://github.com/NousResearch/hermes-agent/pull/16624)), 9 new reference docs ([#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @SHL0MS)
|
||||
- **Humanizer** — strips AI-isms from text ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787))
|
||||
- **claude-design** — HTML artifact skill with disambiguation from other design skills ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358))
|
||||
- **design-md** — Google's DESIGN.md spec skill ([#14876](https://github.com/NousResearch/hermes-agent/pull/14876))
|
||||
- **airtable** — salvaged skill + skill API keys wired into `.env` (#15838) ([#16291](https://github.com/NousResearch/hermes-agent/pull/16291))
|
||||
- **pretext** — creative browser demos with @chenglou/pretext ([#17259](https://github.com/NousResearch/hermes-agent/pull/17259))
|
||||
- **spike** + **sketch** — throwaway experiments + HTML mockups, adapted from gsd-build ([#17421](https://github.com/NousResearch/hermes-agent/pull/17421))
|
||||
|
||||
### Skills UX
|
||||
- **Install skills from a direct HTTP(S) URL** — `hermes skills install <url>` ([#16323](https://github.com/NousResearch/hermes-agent/pull/16323))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **`hermes skills list`** shows enabled/disabled status ([#16129](https://github.com/NousResearch/hermes-agent/pull/16129))
|
||||
- **`skill_manage` refuses writes on pinned skills** ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562))
|
||||
- **`skill_manage` edits external_dirs skills in place** (salvage #9966) ([#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#17289](https://github.com/NousResearch/hermes-agent/pull/17289))
|
||||
- Fix: inline-shell rendering in `skill_view` ([#15376](https://github.com/NousResearch/hermes-agent/pull/15376))
|
||||
- Fix: exclude `.archive/` from skill index walk (salvage #17639) ([#17931](https://github.com/NousResearch/hermes-agent/pull/17931))
|
||||
- Fix: dedicated docs page per bundled + optional skill ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929))
|
||||
- Fix: `google-workspace` shared HERMES_HOME helper + ship deps as optional extra ([#15405](https://github.com/NousResearch/hermes-agent/pull/15405))
|
||||
- Fix: auto-wrap ASCII-art code blocks in generated skill pages ([#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
- Point agent at `hermes-agent` skill + docs site for Hermes questions ([#16535](https://github.com/NousResearch/hermes-agent/pull/16535))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
|
||||
#### New providers
|
||||
- **GMI Cloud** — first-class API-key provider on par with Arcee/Kilocode/Xiaomi (salvage of #11955 — @isaachuangGMICLOUD) ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663))
|
||||
- **Azure AI Foundry** — auto-detection, full wiring ([#15845](https://github.com/NousResearch/hermes-agent/pull/15845))
|
||||
- **LM Studio** — upgraded from custom-endpoint alias to first-class provider: dedicated auth, doctor checks, reasoning transport, live `/models` (salvage of #17061 — @kshitijk4poor) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
|
||||
- **MiniMax OAuth** — PKCE browser flow with full OAuth integration (salvage #15203) ([#17524](https://github.com/NousResearch/hermes-agent/pull/17524))
|
||||
- **Tencent Tokenhub** — new provider (salvage of #16860) ([#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
|
||||
|
||||
#### Model catalog
|
||||
- **Remote model catalog manifest** — OpenRouter + Nous Portal catalogs pulled from remote manifest so new models show up without a release ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
|
||||
- `openai/gpt-5.5` and `gpt-5.5-pro` added to OpenRouter + Nous Portal ([#15343](https://github.com/NousResearch/hermes-agent/pull/15343))
|
||||
- `deepseek-v4-pro` and `deepseek-v4-flash` added ([#14934](https://github.com/NousResearch/hermes-agent/pull/14934))
|
||||
- `qwen3.6-plus` added to Alibaba-supported models ([#16896](https://github.com/NousResearch/hermes-agent/pull/16896))
|
||||
- Gemini free-tier keys blocked at setup with 429 guidance surfacing ([#15100](https://github.com/NousResearch/hermes-agent/pull/15100))
|
||||
|
||||
#### Model configuration
|
||||
- **Configurable `prompt_caching.cache_ttl`** — 5m default, 1h opt-in (salvage #12659) ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
|
||||
- `/fast` whitelist broadened to all OpenAI + Anthropic models ([#16883](https://github.com/NousResearch/hermes-agent/pull/16883))
|
||||
- `auxiliary.extra_body.reasoning` translates into Codex Responses API ([#17004](https://github.com/NousResearch/hermes-agent/pull/17004))
|
||||
- `hermes fallback` command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **Native multimodal image routing** — based on model vision capability, not provider defaults ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
|
||||
- **Delegate `child_timeout_seconds` default bumped to 600s** ([#14809](https://github.com/NousResearch/hermes-agent/pull/14809))
|
||||
- **Diagnostic dump when subagent times out with 0 API calls** ([#15105](https://github.com/NousResearch/hermes-agent/pull/15105))
|
||||
- **Gateway busts cached agent on compression/context_length config edits** ([#17008](https://github.com/NousResearch/hermes-agent/pull/17008))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- `/reload-mcp` awareness — rebuild cached agents + prompt-cache cost confirmation ([#17729](https://github.com/NousResearch/hermes-agent/pull/17729))
|
||||
- Fix: repair CamelCase + `_tool` suffix tool-call emissions ([#15124](https://github.com/NousResearch/hermes-agent/pull/15124))
|
||||
- Fix: retry on `json.JSONDecodeError` instead of treating as local validation error ([#15107](https://github.com/NousResearch/hermes-agent/pull/15107))
|
||||
- Fix: handle unescaped control chars in `tool_call.arguments` ([#15356](https://github.com/NousResearch/hermes-agent/pull/15356))
|
||||
- Fix: ordering fix in `_copy_reasoning_content_for_api` — cross-provider reasoning isolation (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749))
|
||||
- Fix: inject empty `reasoning_content` for DeepSeek/Kimi `tool_calls` unconditionally (@Zjianru) ([#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- Fix: persist streamed `reasoning_content` on assistant turns (#16844) ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- Fix: cancel coroutine on timeout so worker thread exits; full traceback on tool failure ([#17428](https://github.com/NousResearch/hermes-agent/pull/17428))
|
||||
- Fix: isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
|
||||
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- Fix: rename `[SYSTEM:` → `[IMPORTANT:` in all user-injected markers (dodges Azure content filter) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
|
||||
### Compression
|
||||
- **Retry summary on main model for unknown errors before giving up** ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774))
|
||||
- **Notify users when configured aux model fails even if main-model fallback recovers** ([#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- `/compress` wrapped in `_busy_command` to block input during compression ([#15388](https://github.com/NousResearch/hermes-agent/pull/15388))
|
||||
- Fix: reserve system + tools headroom when aux binds threshold ([#15631](https://github.com/NousResearch/hermes-agent/pull/15631))
|
||||
- Fix: use text-char sum for multimodal token estimation in `_find_tail_cut_by_tokens` ([#16369](https://github.com/NousResearch/hermes-agent/pull/16369))
|
||||
|
||||
### Session, Memory & State
|
||||
- **Trigram FTS5 index for CJK search, replace LIKE fallback** (@alt-glitch) ([#16651](https://github.com/NousResearch/hermes-agent/pull/16651))
|
||||
- **Index `tool_name` + `tool_calls` in FTS5, with repair + migration** (salvages #16866) ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Checkpoints: auto-prune orphan and stale shadow repos at startup** ([#16303](https://github.com/NousResearch/hermes-agent/pull/16303))
|
||||
- **Memory providers notified on mid-process session_id rotation** (#6672) ([#17409](https://github.com/NousResearch/hermes-agent/pull/17409))
|
||||
- Fix: quote underscored terms in FTS5 query sanitization ([#16915](https://github.com/NousResearch/hermes-agent/pull/16915))
|
||||
- Fix: resolve viking_read 500/412 on file URIs + pseudo-summary URIs (salvage #5886) ([#17869](https://github.com/NousResearch/hermes-agent/pull/17869))
|
||||
- Fix: skip external-provider sync on interrupted turns ([#15395](https://github.com/NousResearch/hermes-agent/pull/15395))
|
||||
- Fix: close embedded Hindsight async client cleanly (salvage #14605) ([#16209](https://github.com/NousResearch/hermes-agent/pull/16209))
|
||||
- Fix: pass session transcript to `shutdown_memory_provider` on gateway + CLI (#15165) ([#16571](https://github.com/NousResearch/hermes-agent/pull/16571))
|
||||
- Fix: write-origin metadata seam ([#15346](https://github.com/NousResearch/hermes-agent/pull/15346))
|
||||
- Fix: preserve symlinks during atomic file writes ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- Refactor: remove `flush_memories` entirely ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
### Auxiliary models
|
||||
- Fix: surface auxiliary failures in UI (previously silent) ([#15324](https://github.com/NousResearch/hermes-agent/pull/15324))
|
||||
- Fix: surface title-gen auxiliary failures instead of silently dropping ([#16371](https://github.com/NousResearch/hermes-agent/pull/16371))
|
||||
- Fix: generalize unsupported-parameter detector and harden `max_tokens` retry ([#15633](https://github.com/NousResearch/hermes-agent/pull/15633))
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New Platforms
|
||||
- **Microsoft Teams (19th platform)** — as a plugin, + xdist collision guard ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **Yuanbao (Tencent 元宝, 18th platform)** — native adapter with text + media delivery ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424), [#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
|
||||
### Pluggable Gateway Platforms
|
||||
- **Drop-in messaging adapters** — the gateway is now a plugin host for platforms (salvage of #17664) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
|
||||
### Telegram
|
||||
- **Chat allowlists for groups and forums** (@web3blind) ([#15027](https://github.com/NousResearch/hermes-agent/pull/15027))
|
||||
- **Send fresh finals for stale preview streams** (port openclaw#72038) ([#16261](https://github.com/NousResearch/hermes-agent/pull/16261))
|
||||
- **Render markdown tables as row-group bullets + prompt hint** ([#16997](https://github.com/NousResearch/hermes-agent/pull/16997))
|
||||
- Document fallback in centralized audio routing ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Discord
|
||||
- **Opt-in toolsets + ID injection + tool split + Feishu wiring** (salvage #15457, #15458) ([#15610](https://github.com/NousResearch/hermes-agent/pull/15610), [#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
- Fix: coerce `limit` parameter to int before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
|
||||
### Slack
|
||||
- **Register every gateway command as a native slash (Discord/Telegram parity)** ([#16164](https://github.com/NousResearch/hermes-agent/pull/16164))
|
||||
- **`strict_mention` config** — prevents thread auto-engagement ([#16193](https://github.com/NousResearch/hermes-agent/pull/16193))
|
||||
- **`channel_skill_bindings`** — bind specific skills to specific Slack channels ([#16283](https://github.com/NousResearch/hermes-agent/pull/16283))
|
||||
|
||||
### Signal
|
||||
- **Native formatting** — markdown → bodyRanges, reply quotes, reactions ([#17417](https://github.com/NousResearch/hermes-agent/pull/17417))
|
||||
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Feishu / Mattermost / Email / Signal
|
||||
- All participate in **native multi-image sending** ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
|
||||
### Gateway Core
|
||||
- **Centralized audio routing + FLAC support + Telegram doc fallback** ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
|
||||
- **Native multi-image sending** across Telegram, Discord, Slack, Mattermost, Email, Signal ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
|
||||
- **Make hygiene hard message limit configurable** ([#17000](https://github.com/NousResearch/hermes-agent/pull/17000))
|
||||
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
|
||||
- **`pre_gateway_dispatch` hook** — plugins can intercept before dispatch ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` / `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- Fix: timeouts — guard `load_config()` call against runtime exceptions ([#16318](https://github.com/NousResearch/hermes-agent/pull/16318))
|
||||
- Fix: support passing handler tools via registry ([#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### Plugin-first architecture
|
||||
- **Pluggable gateway platforms** — platforms can ship as plugins ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
|
||||
- **Microsoft Teams as first plugin-shipped platform** ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- **`pre_gateway_dispatch` hook** ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
|
||||
- **`pre_approval_request` + `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
|
||||
- **`duration_ms` on `post_tool_call`** (inspired by Claude Code 2.1.119) ([#15429](https://github.com/NousResearch/hermes-agent/pull/15429))
|
||||
- **Bundled plugins**: Spotify ([#15174](https://github.com/NousResearch/hermes-agent/pull/15174)), Google Meet ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364)), Langfuse observability ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917)), hermes-achievements ([#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
|
||||
- **Page-scoped plugin slots for built-in dashboard pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
|
||||
### Browser
|
||||
- **CDP supervisor** — dialog detection + response + cross-origin iframe eval ([#14540](https://github.com/NousResearch/hermes-agent/pull/14540))
|
||||
- **Auto-spawn local Chromium for LAN/localhost URLs** when cloud provider is configured ([#16136](https://github.com/NousResearch/hermes-agent/pull/16136))
|
||||
|
||||
### Execute code / Terminal
|
||||
- **Vercel Sandbox backend** for `execute_code` / terminal (@kshitijk4poor) ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
|
||||
- **Collapse subagent `task_id`s to shared container** ([#16177](https://github.com/NousResearch/hermes-agent/pull/16177))
|
||||
- **Docker: run container as host user** to avoid root-owned bind mounts (@benbarclay) ([#17305](https://github.com/NousResearch/hermes-agent/pull/17305))
|
||||
- Fix: safely quote `~/` subpaths in wrapped `cd` commands ([#15394](https://github.com/NousResearch/hermes-agent/pull/15394))
|
||||
- Fix: close file descriptor in `LocalEnvironment._update_cwd` ([#17300](https://github.com/NousResearch/hermes-agent/pull/17300))
|
||||
- Fix: SSH — prevent tar from overwriting remote home dir permissions ([#17898](https://github.com/NousResearch/hermes-agent/pull/17898), [#17867](https://github.com/NousResearch/hermes-agent/pull/17867))
|
||||
|
||||
### Image generation
|
||||
- See Provider section for updates; no new image providers this window.
|
||||
|
||||
### TTS / Voice
|
||||
- **Pluggable TTS provider registry** under `tts.providers.<name>` ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843))
|
||||
- **Piper** as native local TTS provider (closes #8508) ([#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
|
||||
- **Voice mode CLI parity in the TUI** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- Fix: vision — use HERMES_HOME-based cache dir instead of cwd ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
|
||||
### Cron
|
||||
- **Honor `hermes tools` config for the cron platform** ([#14798](https://github.com/NousResearch/hermes-agent/pull/14798))
|
||||
- **Per-job `workdir`** — project-aware cron runs ([#15110](https://github.com/NousResearch/hermes-agent/pull/15110))
|
||||
- **`context_from` field** — chain cron job outputs ([#15606](https://github.com/NousResearch/hermes-agent/pull/15606))
|
||||
- Fix: promote `croniter` to a core dependency ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
|
||||
### Web search
|
||||
- **Expose `limit` for `web_search`** ([#16934](https://github.com/NousResearch/hermes-agent/pull/16934))
|
||||
|
||||
### Maps
|
||||
- Fix: include seconds in timezone UTC offset output ([#16300](https://github.com/NousResearch/hermes-agent/pull/16300))
|
||||
|
||||
### Approvals
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- Perf: precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
|
||||
### ACP
|
||||
- **Advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
|
||||
### API Server
|
||||
- **POST `/v1/runs/{run_id}/stop`** (salvage of #15656) ([#15842](https://github.com/NousResearch/hermes-agent/pull/15842))
|
||||
- **Expose run status for external UIs** (#17085) ([#17458](https://github.com/NousResearch/hermes-agent/pull/17458))
|
||||
|
||||
### Nix
|
||||
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
|
||||
- Fix: use `--rebuild` in fix-lockfiles to bypass cached FOD store paths ([#15444](https://github.com/NousResearch/hermes-agent/pull/15444))
|
||||
- Fix: `extraPackages` now actually works via per-user profile ([#17047](https://github.com/NousResearch/hermes-agent/pull/17047))
|
||||
- Fix: refresh web/ npm-deps hash to unblock main builds ([#17174](https://github.com/NousResearch/hermes-agent/pull/17174))
|
||||
- Fix: replace magic-nix-cache with Cachix ([#17928](https://github.com/NousResearch/hermes-agent/pull/17928))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ TUI
|
||||
|
||||
### New features
|
||||
- **LaTeX rendering** (@austinpickett) ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175))
|
||||
- **`/reload` .env hot-reload** — ported from the classic CLI ([#17286](https://github.com/NousResearch/hermes-agent/pull/17286))
|
||||
- **Pluggable busy-indicator styles** (@OutThisLife, #13610) ([#17150](https://github.com/NousResearch/hermes-agent/pull/17150))
|
||||
- **Opt-in auto-resume of the most recent session** (@OutThisLife) ([#17130](https://github.com/NousResearch/hermes-agent/pull/17130))
|
||||
- **Expanded light-terminal auto-detection** — `HERMES_TUI_THEME` + background hex (@OutThisLife) ([#17113](https://github.com/NousResearch/hermes-agent/pull/17113))
|
||||
- **Delete sessions from `/resume` picker with `d`** (@OutThisLife) ([#17668](https://github.com/NousResearch/hermes-agent/pull/17668))
|
||||
- **Line-by-line scroll on modified mouse wheel** (@OutThisLife) ([#17669](https://github.com/NousResearch/hermes-agent/pull/17669))
|
||||
- **Delete queued message while editing with ctrl-x / cancel with esc** (@OutThisLife) ([#16707](https://github.com/NousResearch/hermes-agent/pull/16707))
|
||||
- **Per-section visibility for the details accordion** (@OutThisLife) ([#14968](https://github.com/NousResearch/hermes-agent/pull/14968))
|
||||
- **Voice mode CLI parity** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
|
||||
- **Contextual first-touch hints ported to TUI** — `/busy`, `/verbose` ([#16054](https://github.com/NousResearch/hermes-agent/pull/16054))
|
||||
- **Mini help menu on `?` in the input field** (@ethernet8023) ([#18043](https://github.com/NousResearch/hermes-agent/pull/18043))
|
||||
|
||||
### Fixes
|
||||
- Fix: proactive mouse disable on ConPTY + `/mouse` toggle command (@kevin-ho, WSL2 ghost-mouse fix) ([#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
|
||||
- Fix: restore skills search RPC ([#15870](https://github.com/NousResearch/hermes-agent/pull/15870))
|
||||
- Perf: cache text measurements across yoga flex re-passes ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- Perf: stabilize long-session scrolling ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- Perf: lazily seed virtual history heights ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
- Perf: cut visible cold start ~57% with lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
|
||||
---
|
||||
|
||||
## 🖱️ CLI & User Experience
|
||||
|
||||
### New commands
|
||||
- **`hermes -z <prompt>`** — non-interactive one-shot mode ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702))
|
||||
- **`hermes -z` with `--model` / `--provider` / `HERMES_INFERENCE_MODEL`** ([#15704](https://github.com/NousResearch/hermes-agent/pull/15704))
|
||||
- **`hermes update --check`** preflight flag ([#15841](https://github.com/NousResearch/hermes-agent/pull/15841))
|
||||
- **`hermes fallback`** command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
|
||||
- **`/busy`** slash command for busy input mode ([#15382](https://github.com/NousResearch/hermes-agent/pull/15382))
|
||||
- **`/busy` input mode 'steer'** as a third option ([#16279](https://github.com/NousResearch/hermes-agent/pull/16279))
|
||||
- **`/btw` as alias for `/background`** ([#16053](https://github.com/NousResearch/hermes-agent/pull/16053))
|
||||
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
|
||||
- **Surface `/queue`, `/bg`, `/steer` in agent-running placeholder** ([#16118](https://github.com/NousResearch/hermes-agent/pull/16118))
|
||||
|
||||
### Setup / onboarding
|
||||
- **Auto-reconfigure on existing installs** ([#15879](https://github.com/NousResearch/hermes-agent/pull/15879))
|
||||
- **Contextual first-touch hints for `/busy` and `/verbose`** ([#16046](https://github.com/NousResearch/hermes-agent/pull/16046))
|
||||
- **Cost-saving tips from the April 30 tip-of-the-day** ([#17841](https://github.com/NousResearch/hermes-agent/pull/17841))
|
||||
- **Hyperlink startup banner title to the latest GitHub Release** ([#14945](https://github.com/NousResearch/hermes-agent/pull/14945))
|
||||
|
||||
### Update / backup
|
||||
- **Snapshot pairing data before `git pull`** ([#16383](https://github.com/NousResearch/hermes-agent/pull/16383))
|
||||
- **Auto-backup HERMES_HOME before `hermes update`** (opt-in, off by default) ([#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
|
||||
- **Exclude `checkpoints/` from backups** ([#16572](https://github.com/NousResearch/hermes-agent/pull/16572))
|
||||
- **Exclude SQLite WAL/SHM/journal sidecars from backups** ([#16576](https://github.com/NousResearch/hermes-agent/pull/16576))
|
||||
- **Installer FHS layout for root installs on Linux** ([#15608](https://github.com/NousResearch/hermes-agent/pull/15608))
|
||||
- Fix: kill stale dashboards instead of warning ([#17832](https://github.com/NousResearch/hermes-agent/pull/17832))
|
||||
- Fix: show correct update status on nix-built hermes ([#17550](https://github.com/NousResearch/hermes-agent/pull/17550))
|
||||
|
||||
### Slash-command housekeeping
|
||||
- Refactor: drop `/provider`, `/plan` handler, and clean up slash registry ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- Refactor: drop `persist_session` plumbing + fix broken `/btw` mid-turn bypass ([#16075](https://github.com/NousResearch/hermes-agent/pull/16075))
|
||||
|
||||
### OpenClaw migration (for folks coming from OpenClaw)
|
||||
- **Hardened OpenClaw import** — plan-first apply, redaction, pre-migration backup ([#16911](https://github.com/NousResearch/hermes-agent/pull/16911))
|
||||
- Fix: case-preserving brand rewrite + one-time `~/.openclaw` residue banner ([#16327](https://github.com/NousResearch/hermes-agent/pull/16327))
|
||||
- Fix: resolve `openclaw` workspace files from `agents.defaults.workspace` ([#16879](https://github.com/NousResearch/hermes-agent/pull/16879))
|
||||
- Fix: resolve model aliases against real OpenClaw catalog schema (salvage #16778) ([#16977](https://github.com/NousResearch/hermes-agent/pull/16977))
|
||||
|
||||
---
|
||||
|
||||
## 📊 Web Dashboard
|
||||
|
||||
- **Models tab** — rich per-model analytics ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745))
|
||||
- **Configure main + auxiliary models from the Models page** ([#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
|
||||
- **Dashboard Chat tab — xterm.js + JSON-RPC sidecar** (supersedes #12710 + #13379, @OutThisLife) ([#14890](https://github.com/NousResearch/hermes-agent/pull/14890))
|
||||
- **Dashboard layout refresh** (@austinpickett) ([#14899](https://github.com/NousResearch/hermes-agent/pull/14899))
|
||||
- **`--stop` and `--status` flags** on the dashboard CLI ([#17840](https://github.com/NousResearch/hermes-agent/pull/17840))
|
||||
- **Page-scoped plugin slots for built-in pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
|
||||
- Fix: replace all buttons for design system buttons ([#17007](https://github.com/NousResearch/hermes-agent/pull/17007))
|
||||
|
||||
---
|
||||
|
||||
## ⚡ Performance
|
||||
|
||||
- **TUI visible cold start cut ~57%** via lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
|
||||
- **Lazy-import OpenAI, Anthropic, Firecrawl, account_usage** ([#17046](https://github.com/NousResearch/hermes-agent/pull/17046))
|
||||
- **mtime-cache `load_config()` and `read_raw_config()`** ([#17041](https://github.com/NousResearch/hermes-agent/pull/17041))
|
||||
- **Memoize `get_tool_definitions()` + TTL-cache `check_fn` results** ([#17098](https://github.com/NousResearch/hermes-agent/pull/17098))
|
||||
- **Precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS** ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
|
||||
- **Cache Ink text measurements across yoga flex re-passes** ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
|
||||
- **Stabilize long-session scrolling** ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
|
||||
- **Lazily seed virtual history heights** ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
|
||||
|
||||
---
|
||||
|
||||
## 🔒 Security & Reliability
|
||||
|
||||
- **Secret redaction off by default** — stops corrupting patches / API payloads with fake-key substitutions. Opt in via `redaction.enabled: true` ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
|
||||
- **`[SYSTEM:` → `[IMPORTANT:`** in all user-injected markers (Azure content filter dodge) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
|
||||
- **Canonical `mask_secret` helper; fix status.py DIM drift** ([#17207](https://github.com/NousResearch/hermes-agent/pull/17207))
|
||||
- **Sweep expired paste.rs uploads on a real timer** ([#16431](https://github.com/NousResearch/hermes-agent/pull/16431))
|
||||
- **Preserve symlinks during atomic file writes** ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
|
||||
- **Probe `/dev/tty` by opening it, not bare existence** ([#17024](https://github.com/NousResearch/hermes-agent/pull/17024))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
This window includes 360 `fix:` PRs. Selected highlights from across the stack:
|
||||
|
||||
- **Background review fork inherits parent's live runtime** — provider/model/creds now propagate correctly ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
|
||||
- **Hindsight configurable `HINDSIGHT_TIMEOUT` env var** ([#15077](https://github.com/NousResearch/hermes-agent/pull/15077))
|
||||
- **Tools: normalize numeric entries + clear stale `no_mcp` in `_save_platform_tools`** ([#15607](https://github.com/NousResearch/hermes-agent/pull/15607))
|
||||
- **MCP: rewrite `definitions` refs to `$defs` in input schemas** — closes provider-side 400s
|
||||
- **Azure content filter compatibility** — renamed `[SYSTEM:` markers so Azure's content filter stops flagging them ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
|
||||
- **Vision cache uses HERMES_HOME instead of cwd** ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
|
||||
- **FTS5 search** — tool_name + tool_calls indexing with repair + migration ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
|
||||
- **Streaming reasoning persists on assistant turns** ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
|
||||
- **execute_code concurrent RPC serialization** (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- **Background reviewer scoped to memory + skills toolsets** — no more accidental web/shell escapes ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
|
||||
- **Compression recovery** — retry on main before giving up; notify user when aux fails ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774), [#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
|
||||
- **`croniter` promoted to a core dependency** ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
|
||||
- **Discord tool `limit` parameter coerced to int** before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
|
||||
- **Yuanbao messaging platform entrance fix** ([#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
|
||||
- **ACP advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
|
||||
- **DeepSeek / Kimi reasoning content isolation** across cross-provider histories (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749), [#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
|
||||
- **Preserve reasoning_content replay on DeepSeek v4 + Kimi/Moonshot thinking** ([#18045](https://github.com/NousResearch/hermes-agent/pull/18045))
|
||||
|
||||
The vast majority of the 360 fixes landed in the streaming/compression/tool-calling paths across all providers — DeepSeek, Kimi, Moonshot, GLM, Qwen, MiniMax, Gemini, Anthropic, OpenAI — alongside TUI polish (resize, scroll, sticky-prompt) and gateway platform-specific edge cases.
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing & CI
|
||||
|
||||
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
|
||||
- **Microsoft Teams xdist collision guard** — prevents worker collisions when Teams platform tests run in parallel ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
|
||||
- Chore: remove unused imports and dead locals (ruff F401, F841) ([#17010](https://github.com/NousResearch/hermes-agent/pull/17010))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- **Curator feature page** added to docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
|
||||
- **Document pin also blocking `skill_manage` writes** ([#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
|
||||
- **Direct-URL skill install documented** across features, reference, guide, and `hermes-agent` skill ([#16355](https://github.com/NousResearch/hermes-agent/pull/16355))
|
||||
- **Hooks tutorial — build a BOOT.md startup checklist** (replaces the removed built-in hook) ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202))
|
||||
- **ComfyUI docs: ask local vs cloud FIRST before hardware check** ([#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
|
||||
- **Obliteratus skill: link YouTube video guide in SKILL.md** ([#15808](https://github.com/NousResearch/hermes-agent/pull/15808))
|
||||
- Per-skill docs pages generated for bundled + optional skills; ASCII art code blocks auto-wrapped ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929), [#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
|
||||
|
||||
---
|
||||
|
||||
## ⚖️ Removed / Reverted
|
||||
|
||||
- **Kanban multi-profile collaboration board** — landed in #16081, reverted in ([#16098](https://github.com/NousResearch/hermes-agent/pull/16098)) while the design is reworked
|
||||
- **computer-use cua-driver** — 3 preparatory PRs landed then were reverted in ([#16927](https://github.com/NousResearch/hermes-agent/pull/16927))
|
||||
- **BOOT.md built-in hook** removed ([#17093](https://github.com/NousResearch/hermes-agent/pull/17093)); the hooks tutorial ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202)) shows how to build the same workflow yourself with a shell hook
|
||||
- **`/provider` + `/plan` slash commands dropped** ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
|
||||
- **`flush_memories` removed entirely** ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** (Teknium)
|
||||
|
||||
### Top Community Contributors (by merged PR count since v0.11.0)
|
||||
|
||||
- **@OutThisLife** (Brooklyn) — 52 PRs · TUI — light-terminal detection + pluggable busy styles + auto-resume + session-delete from /resume + mouse-wheel scrolling + xterm.js dashboard Chat tab + cold-start cut + accordion polish
|
||||
- **@kshitijk4poor** — 12 PRs · LM Studio first-class provider (salvage), Vercel Sandbox backend, GMI Cloud salvage, bundled-by-default touchdesigner-mcp, many tool-call / reasoning fixes
|
||||
- **@helix4u** — 10 PRs · MCP schema robustness, assorted stability fixes
|
||||
- **@alt-glitch** — 8 PRs · trigram FTS5 CJK search, declarative Nix plugin install, matrix/feishu hints and fixes
|
||||
- **@ethernet8023** — 4 PRs
|
||||
- **@austinpickett** — 4 PRs · LaTeX rendering in TUI, dashboard layout refresh
|
||||
- **@benbarclay** — 3 PRs · Docker run-as-host-user so bind mounts don't get root-owned
|
||||
- **@vominh1919** — 2 PRs
|
||||
- **@stephenschoettler** — 2 PRs
|
||||
- **@kevin-ho** — ConPTY mouse-injection fix (#15488)
|
||||
- **@Zjianru** — cross-provider reasoning_content isolation + DeepSeek/Kimi empty-reasoning injection (#15749, #15762)
|
||||
- **@web3blind** — Telegram chat allowlists for groups and forums (#15027)
|
||||
- **@SHL0MS** — 9 new TouchDesigner-MCP reference docs (#16768)
|
||||
- **@0xDevNinja** — curator `restore_skill` nested-archive fix (#17951)
|
||||
- **@y0shua1ee** — curator `use` activity fix (#17953)
|
||||
|
||||
### Also contributing
|
||||
Salvaged or co-authored work from **@isaachuangGMICLOUD** (GMI Cloud), earlier upstream PRs from the original author of each salvage chain, and a long tail of one-shot fixes, documentation nudges, and skill contributions from the community.
|
||||
|
||||
### All Contributors (alphabetical, excluding @teknium1)
|
||||
|
||||
@0xbyt4, @0xharryriddle, @0xDevNinja, @0z1-ghb, @5park1e, @A-FdL-Prog, @aj-nt, @akhater, @alblez, @alexg0bot,
|
||||
@alexzhu0, @AllardQuek, @alt-glitch, @amanning3390, @amanuel2, @AndreKurait, @andrewhosf, @Andy283, @andyylin,
|
||||
@angel12, @AntAISecurityLab, @ash, @austinpickett, @badgerbees, @BadTechBandit, @Bartok9, @beenherebefore,
|
||||
@beesrsj2500, @BeliefanX, @benbarclay, @benjaminsehl, @BlackishGreen33, @bloodcarter, @BlueBirdBack,
|
||||
@briandevans, @brooklynnicholson, @bsgdigital, @buray, @bwjoke, @camaragon, @cdanis, @cgarwood82,
|
||||
@charles-brooks, @chen1749144759, @chengoak, @ching-kaching, @Contentment003111, @crayfish-ai, @CruxExperts,
|
||||
@cyclingwithelephants, @dandaka, @danklynn, @ddupont808, @dhabibi, @difujia, @dimitrovi, @dlkakbs,
|
||||
@dontcallmejames, @EKKOLearnAI, @emozilla, @ericnicolaides, @Erosika, @ethernet8023, @exiao, @Feranmi10,
|
||||
@flobo3, @foxion37, @georgeglessner, @georgex8001, @ghostmfr, @H-Ali13381, @HangGlidersRule, @harryplusplus,
|
||||
@haru398801, @heathley, @hejuntt1014, @hekaru-agent, @helix4u, @Heltman, @HenkDz, @heyitsaamir, @hharry11,
|
||||
@hhhonzik, @hhuang91, @HiddenPuppy, @htsh, @iamagenius00, @in-liberty420, @innocarpe, @irispillars, @iRonin,
|
||||
@isaachuangGMICLOUD, @Ito-69, @j3ffffff, @jackjin1997, @jakubkrcmar, @Jason2031, @JayGwod, @jerome-benoit,
|
||||
@johnncenae, @Kailigithub, @keiravoss94, @kevin-ho, @knockyai, @konsisumer, @kshitijk4poor, @kunlabs, @l0hde,
|
||||
@Leihb, @leoneparise, @LeonSGP43, @liizfq, @liuhao1024, @loongzhao, @lsdsjy, @luyao618, @ma-pony, @Magaav,
|
||||
@MagicRay1217, @math0r-be, @MattMaximo, @maxims-oss, @MaxyMoos, @maymuneth, @mcndjxlefnd, @memosr,
|
||||
@MestreY0d4-Uninter, @mewwts, @Mirac1eSky, @MorAlekss, @mrhwick, @mrunmayee17, @mssteuer, @Nanako0129,
|
||||
@nazirulhafiy, @Nerijusas, @Nicecsh, @nicoloboschi, @nightq, @ningfangbin, @octo-patch, @Octopus,
|
||||
@OutThisLife, @Paperclip, @pein892, @perlowja, @prasadus92, @qike-ms, @qiyin-code, @Readon, @ReginaldasR,
|
||||
@revaraver, @rfilgueiras, @rmoen, @romanornr, @rugvedS07, @rylena, @samrusani, @Sanjays2402, @sasha-id,
|
||||
@Satoshi-agi, @scheidti, @scotttrinh, @season179, @SeeYangZhi, @sgaofen, @shamork, @shannonsands, @SHL0MS,
|
||||
@simbam99, @Societus, @socrates1024, @Sonoyunchu, @sprmn24, @stephenschoettler, @tangyuanjc, @TechPrototyper,
|
||||
@tekgnosis-net, @ThomassJonax, @tmimmanuel, @tochukwuada, @Tosko4, @Tranquil-Flow, @twozle, @txbxxx,
|
||||
@UgwujaGeorge, @Versun, @vlwkaos, @voidborne-d, @vominh1919, @Wang-tianhao, @Wangshengyang2004, @web3blind,
|
||||
@westers, @Wysie, @xandersbell, @xiahu88988, @XieNBi, @xinbenlv, @xnbi, @y0shua1ee, @yatesjalex, @yes999zc,
|
||||
@yeyitech, @Yoimex, @YueLich, @Yukipukii1, @zhiyanliu, @zicochaos, @Zjianru, @zkl2333, @zons-zhaozhy,
|
||||
@ztexydt-cqh.
|
||||
|
||||
Also: @Siddharth Balyan, @YuShu.
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.23...v2026.4.30](https://github.com/NousResearch/hermes-agent/compare/v2026.4.23...v2026.4.30)
|
||||
@@ -1,641 +0,0 @@
|
||||
# Hermes Agent v0.13.0 (v2026.5.7)
|
||||
|
||||
**Release Date:** May 7, 2026
|
||||
**Since v0.12.0:** 864 commits · 588 merged PRs · 829 files changed · 128,366 insertions · 282 issues closed (13 P0, 36 P1) · 295 community contributors (including co-authors)
|
||||
|
||||
> The Tenacity Release — Hermes Agent now finishes what it starts. Kanban ships as a durable multi-agent board (heartbeat, reclaim, zombie detection, auto-block on incomplete exit, per-task retries, hallucination recovery). `/goal` keeps the agent locked on a target across turns (Ralph loop). Checkpoints v2 rewrites state persistence with real pruning. Gateway auto-resumes interrupted sessions after restart. Cron grows a `no_agent` watchdog mode. A security wave closes 8 P0s — redaction is now ON by default, Discord role-allowlists are guild-scoped, WhatsApp rejects strangers by default, and TOCTOU windows close across auth.json and MCP OAuth. Google Chat becomes the 20th platform. Providers become a pluggable surface. Seven i18n locales ship.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Highlights
|
||||
|
||||
- **Multi-agent Kanban — delegate to an AI team that actually finishes** — Spin up a durable board, drop tasks on it, and let multiple Hermes workers pick them up, hand off, and close them out. Heartbeats, reclaim, zombie detection, retry budgets, and a hallucination gate keep the team honest. One install, many kanbans. ([#17805](https://github.com/NousResearch/hermes-agent/pull/17805), [#19653](https://github.com/NousResearch/hermes-agent/pull/19653), [#20232](https://github.com/NousResearch/hermes-agent/pull/20232), [#20332](https://github.com/NousResearch/hermes-agent/pull/20332), [#21330](https://github.com/NousResearch/hermes-agent/pull/21330), [#21183](https://github.com/NousResearch/hermes-agent/pull/21183), [#21214](https://github.com/NousResearch/hermes-agent/pull/21214))
|
||||
|
||||
- **`/goal` — the agent doesn't forget what you asked it to do** — Lock the agent onto a target and it stays on task across turns. The Ralph loop as a first-class primitive. ([#18262](https://github.com/NousResearch/hermes-agent/pull/18262), [#18275](https://github.com/NousResearch/hermes-agent/pull/18275), [#21287](https://github.com/NousResearch/hermes-agent/pull/21287))
|
||||
|
||||
- **Show it a video** — new `video_analyze` tool for native video understanding on Gemini and compatible multimodal models. (@alt-glitch) ([#19301](https://github.com/NousResearch/hermes-agent/pull/19301))
|
||||
|
||||
- **Clone a voice** — xAI Custom Voices lands as a TTS provider with voice cloning support. (@alt-glitch) ([#18776](https://github.com/NousResearch/hermes-agent/pull/18776))
|
||||
|
||||
- **Hermes speaks your language** — static gateway + CLI messages translate to 7 locales: Chinese, Japanese, German, Spanish, French, Ukrainian, and Turkish. Docs site gains a Chinese (zh-Hans) locale. ([#20231](https://github.com/NousResearch/hermes-agent/pull/20231), [#20329](https://github.com/NousResearch/hermes-agent/pull/20329), [#20467](https://github.com/NousResearch/hermes-agent/pull/20467), [#20474](https://github.com/NousResearch/hermes-agent/pull/20474), [#20430](https://github.com/NousResearch/hermes-agent/pull/20430), [#20431](https://github.com/NousResearch/hermes-agent/pull/20431))
|
||||
|
||||
- **Google Chat — the 20th messaging platform** — plus a generic platform-plugin hooks surface so third-party adapters drop in without touching core (IRC and Teams migrated). ([#21306](https://github.com/NousResearch/hermes-agent/pull/21306), [#21331](https://github.com/NousResearch/hermes-agent/pull/21331))
|
||||
|
||||
- **Sessions survive restarts** — gateway bounces mid-agent, `/update` restarts, source-file reloads — conversations auto-resume when the gateway comes back. ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192))
|
||||
|
||||
- **Security wave — 8 P0 closures** — redaction ON by default, Discord role-allowlists guild-scoped (CVSS 8.1 cross-guild DM bypass closed), WhatsApp rejects strangers by default, TOCTOU windows closed across `auth.json` and MCP OAuth, browser enforces cloud-metadata SSRF floor, cron prompt-injection scans assembled skill content, `hermes debug share` redacts at upload. ([#21193](https://github.com/NousResearch/hermes-agent/pull/21193), [#21241](https://github.com/NousResearch/hermes-agent/pull/21241), [#21291](https://github.com/NousResearch/hermes-agent/pull/21291), [#21176](https://github.com/NousResearch/hermes-agent/pull/21176), [#21194](https://github.com/NousResearch/hermes-agent/pull/21194), [#21228](https://github.com/NousResearch/hermes-agent/pull/21228), [#21350](https://github.com/NousResearch/hermes-agent/pull/21350), [#19318](https://github.com/NousResearch/hermes-agent/pull/19318))
|
||||
|
||||
- **Checkpoints v2** — state persistence rewritten. Real pruning, disk guardrails, no more orphan shadow repos. ([#20709](https://github.com/NousResearch/hermes-agent/pull/20709))
|
||||
|
||||
- **The agent lints its own writes** — post-write delta lint on `write_file` + `patch`. Python, JSON, YAML, TOML. Syntax errors surface immediately instead of shipping downstream. ([#20191](https://github.com/NousResearch/hermes-agent/pull/20191))
|
||||
|
||||
- **`no_agent` cron mode — script-only watchdog** — cron jobs can now skip the agent entirely and just run a script. Empty stdout is silent, non-empty gets delivered verbatim. ([#19709](https://github.com/NousResearch/hermes-agent/pull/19709))
|
||||
|
||||
- **Platform allowlists everywhere** — `allowed_channels` / `allowed_chats` / `allowed_rooms` config across Slack, Telegram, Mattermost, Matrix, and DingTalk. ([#21251](https://github.com/NousResearch/hermes-agent/pull/21251))
|
||||
|
||||
- **Providers are now plugins** — `ProviderProfile` ABC + `plugins/model-providers/`. Drop in third-party providers without touching core. ([#20324](https://github.com/NousResearch/hermes-agent/pull/20324))
|
||||
|
||||
- **API server — long-term memory per session** — `X-Hermes-Session-Key` header gives memory providers a stable session identifier. ([#20199](https://github.com/NousResearch/hermes-agent/pull/20199))
|
||||
|
||||
- **MCP levels up** — SSE transport with OAuth forwarding, stale-pipe retries, image results surface as MEDIA tags instead of getting dropped, keepalive on long-lived lifecycle waits. ([#21227](https://github.com/NousResearch/hermes-agent/pull/21227), [#21323](https://github.com/NousResearch/hermes-agent/pull/21323), [#21289](https://github.com/NousResearch/hermes-agent/pull/21289), [#21328](https://github.com/NousResearch/hermes-agent/pull/21328), [#20209](https://github.com/NousResearch/hermes-agent/pull/20209))
|
||||
|
||||
- **Curator grows subcommands** — `hermes curator archive`, `prune`, `list-archived`. Manual `hermes curator run` is synchronous now — you see results without polling. ([#20200](https://github.com/NousResearch/hermes-agent/pull/20200), [#21236](https://github.com/NousResearch/hermes-agent/pull/21236), [#21216](https://github.com/NousResearch/hermes-agent/pull/21216))
|
||||
|
||||
- **ACP — `/steer` and `/queue`** — direct the in-flight agent or queue follow-ups from Zed, VS Code, or JetBrains. Plus atomic session persistence and reasoning-metadata preservation across restarts. (@HenkDz) ([#18114](https://github.com/NousResearch/hermes-agent/pull/18114), [#20279](https://github.com/NousResearch/hermes-agent/pull/20279), [#20296](https://github.com/NousResearch/hermes-agent/pull/20296), [#20433](https://github.com/NousResearch/hermes-agent/pull/20433))
|
||||
|
||||
- **TUI glow-up** — `/model` picker matches `hermes model` with inline auth (@austinpickett), collapsible startup banner sections (@kshitijk4poor), context-compression counter in the status bar. ([#18117](https://github.com/NousResearch/hermes-agent/pull/18117), [#20625](https://github.com/NousResearch/hermes-agent/pull/20625), [#21218](https://github.com/NousResearch/hermes-agent/pull/21218))
|
||||
|
||||
- **Dashboard grows up** — Plugins page (manage, enable/disable, auth status) (@austinpickett), Profiles management page (@vincez-hms-coder), sortable analytics tables, reverse-proxy support via `X-Forwarded-Prefix`, new `default-large` 18px theme. ([#18095](https://github.com/NousResearch/hermes-agent/pull/18095), [#16419](https://github.com/NousResearch/hermes-agent/pull/16419), [#18192](https://github.com/NousResearch/hermes-agent/pull/18192), [#21296](https://github.com/NousResearch/hermes-agent/pull/21296), [#20820](https://github.com/NousResearch/hermes-agent/pull/20820))
|
||||
|
||||
- **SearXNG + split web tools** — SearXNG ships as a native search-only backend; web tools now let you pick different backends per capability (search vs extract vs browse). (@kshitijk4poor) ([#20823](https://github.com/NousResearch/hermes-agent/pull/20823), [#20061](https://github.com/NousResearch/hermes-agent/pull/20061), [#20841](https://github.com/NousResearch/hermes-agent/pull/20841))
|
||||
|
||||
- **OpenRouter response caching** — explicit cache control for models that expose it. (@kshitijk4poor) ([#19132](https://github.com/NousResearch/hermes-agent/pull/19132))
|
||||
|
||||
- **`[[as_document]]` — skill media-routing directive** — skills can force the gateway to deliver output as a document on platforms that support it. ([#21210](https://github.com/NousResearch/hermes-agent/pull/21210))
|
||||
|
||||
- **`transform_llm_output` plugin hook** — new lifecycle hook that lets plugins reshape or filter LLM output before it hits the conversation. Useful for context-window reducers and content filters. ([#21235](https://github.com/NousResearch/hermes-agent/pull/21235))
|
||||
|
||||
- **Nous OAuth persists across profiles** — shared token store: sign in once, every profile inherits the session. ([#19712](https://github.com/NousResearch/hermes-agent/pull/19712))
|
||||
|
||||
- **QQBot — native approval keyboards** — feature parity with Telegram / Discord approval UX. Chunked upload, quoted attachments. ([#21342](https://github.com/NousResearch/hermes-agent/pull/21342), [#21353](https://github.com/NousResearch/hermes-agent/pull/21353))
|
||||
|
||||
- **6 new optional skills** — Shopify (Admin + Storefront GraphQL), here.now, shop-app personal shopping assistant, Anthropic financial-services bundle, kanban-video-orchestrator (@SHL0MS), searxng-search (@kshitijk4poor). ([#18116](https://github.com/NousResearch/hermes-agent/pull/18116), [#18170](https://github.com/NousResearch/hermes-agent/pull/18170), [#20702](https://github.com/NousResearch/hermes-agent/pull/20702), [#21180](https://github.com/NousResearch/hermes-agent/pull/21180), [#19281](https://github.com/NousResearch/hermes-agent/pull/19281), [#20841](https://github.com/NousResearch/hermes-agent/pull/20841))
|
||||
|
||||
- **New models** — `deepseek/deepseek-v4-pro`, `x-ai/grok-4.3`, `openrouter/owl-alpha` (free), `tencent/hy3-preview` (@Contentment003111), Arcee Trinity Large Thinking temperature + compression overrides. ([#20495](https://github.com/NousResearch/hermes-agent/pull/20495), [#20497](https://github.com/NousResearch/hermes-agent/pull/20497), [#18071](https://github.com/NousResearch/hermes-agent/pull/18071), [#21077](https://github.com/NousResearch/hermes-agent/pull/21077), [#20473](https://github.com/NousResearch/hermes-agent/pull/20473))
|
||||
|
||||
- **100 fresh CLI startup tips** — the random tip banner gets 100 new entries covering cron, kanban, curator, plugins, and lesser-known flags. ([#20168](https://github.com/NousResearch/hermes-agent/pull/20168))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Multi-Agent Kanban (Durable)
|
||||
|
||||
### New — durable multi-profile collaboration board
|
||||
- **`feat(kanban): durable multi-profile collaboration board`** — post-revert reimplementation, multi-profile by design ([#17805](https://github.com/NousResearch/hermes-agent/pull/17805))
|
||||
- **Multi-project boards** — one install, many kanbans ([#19653](https://github.com/NousResearch/hermes-agent/pull/19653), [#19679](https://github.com/NousResearch/hermes-agent/pull/19679))
|
||||
- **Share board, workspaces, and worker logs across profiles** ([#19378](https://github.com/NousResearch/hermes-agent/pull/19378))
|
||||
- **Hallucination gate + recovery UX for worker-created-card claims** (closes #20017) ([#20232](https://github.com/NousResearch/hermes-agent/pull/20232))
|
||||
- **Generic diagnostics engine for task distress signals** ([#20332](https://github.com/NousResearch/hermes-agent/pull/20332))
|
||||
- **Per-task `max_retries` override** (supersedes #20972) ([#21330](https://github.com/NousResearch/hermes-agent/pull/21330))
|
||||
- **Multiline textarea for inline-create title** (salvage of #20970) ([#21243](https://github.com/NousResearch/hermes-agent/pull/21243))
|
||||
|
||||
### Kanban Dashboard
|
||||
- **Workspace kind + path inputs in inline create form** ([#19679](https://github.com/NousResearch/hermes-agent/pull/19679))
|
||||
- **Per-platform home-channel notification toggles** ([#19864](https://github.com/NousResearch/hermes-agent/pull/19864))
|
||||
- **Sharper home-channel toggle contrast + drop → running action** ([#19916](https://github.com/NousResearch/hermes-agent/pull/19916))
|
||||
- Fix: reject direct status transition to 'running' via dashboard API (salvage of #19554) ([#19705](https://github.com/NousResearch/hermes-agent/pull/19705))
|
||||
- Fix: dashboard board pin authoritative over server current file (#20879) ([#21230](https://github.com/NousResearch/hermes-agent/pull/21230))
|
||||
- Fix: treat dashboard event-stream cancellation as normal shutdown (#20790) ([#21222](https://github.com/NousResearch/hermes-agent/pull/21222))
|
||||
- Fix: filter dashboard board by selected tenant (#19817) ([#21349](https://github.com/NousResearch/hermes-agent/pull/21349))
|
||||
- Fix: code/pre styling theme-immune across all themes (#21086) ([#21247](https://github.com/NousResearch/hermes-agent/pull/21247))
|
||||
- Fix: reset `<code>` background inside dashboard board ([#20687](https://github.com/NousResearch/hermes-agent/pull/20687))
|
||||
- Fix: preserve dashboard completion summaries + add kanban edit (salvages #20016) ([#20195](https://github.com/NousResearch/hermes-agent/pull/20195))
|
||||
- Fix: avoid fragile failure-column renames (salvage #20848) (@kshitijk4poor) ([#20855](https://github.com/NousResearch/hermes-agent/pull/20855))
|
||||
|
||||
### Worker lifecycle + reliability
|
||||
- **Heartbeat + reclaim + zombie + retry-cap fixes** (#21147, #21141, #21169, #20881) ([#21183](https://github.com/NousResearch/hermes-agent/pull/21183))
|
||||
- **Auto-block workers that exit without completing + shutdown race** (#20894) ([#21214](https://github.com/NousResearch/hermes-agent/pull/21214))
|
||||
- **Detect darwin zombie workers** (salvages #20023) ([#20188](https://github.com/NousResearch/hermes-agent/pull/20188))
|
||||
- **Unify failure counter across spawn/timeout/crash outcomes** ([#20410](https://github.com/NousResearch/hermes-agent/pull/20410))
|
||||
- **Enforce worker task-ownership on destructive tool calls** ([#19713](https://github.com/NousResearch/hermes-agent/pull/19713))
|
||||
- **Drop worker identity claim from KANBAN_GUIDANCE** ([#19427](https://github.com/NousResearch/hermes-agent/pull/19427))
|
||||
- Fix: skip dispatch for tasks assigned to non-profile lanes (salvages #20105, #20134) ([#20165](https://github.com/NousResearch/hermes-agent/pull/20165))
|
||||
- Fix: include default profile in on-disk assignee enumeration (salvages #20123) ([#20170](https://github.com/NousResearch/hermes-agent/pull/20170))
|
||||
- Fix: ignore stale current board pointers (salvages #20063) ([#20183](https://github.com/NousResearch/hermes-agent/pull/20183))
|
||||
- Fix: profile discovery ignores HERMES_HOME in custom-root deployments (@jackey8616) ([#19020](https://github.com/NousResearch/hermes-agent/pull/19020))
|
||||
- Fix: allow orchestrator profiles to see kanban tools via toolsets config ([#19606](https://github.com/NousResearch/hermes-agent/pull/19606))
|
||||
|
||||
### Batch salvages
|
||||
- Tier-1 batch — metadata test, max_spawn config, run-id lifecycle guard (salvages #19522 #19556 #19829) ([#20440](https://github.com/NousResearch/hermes-agent/pull/20440))
|
||||
- Tier-2 batch — doctor, started_at, parent-guard, latest_summary, selects, linked-children ([#20448](https://github.com/NousResearch/hermes-agent/pull/20448))
|
||||
|
||||
### Documentation
|
||||
- Backfill multi-board refs in reference docs ([#19704](https://github.com/NousResearch/hermes-agent/pull/19704))
|
||||
- Document `/kanban` slash command ([#19584](https://github.com/NousResearch/hermes-agent/pull/19584))
|
||||
- Document recommended handoff evidence metadata (salvage #19512) ([#20415](https://github.com/NousResearch/hermes-agent/pull/20415))
|
||||
- Fix orchestrator + worker skill setup instructions (@helix4u) ([#20958](https://github.com/NousResearch/hermes-agent/pull/20958), [#20960](https://github.com/NousResearch/hermes-agent/pull/20960))
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Persistent Goals, Checkpoints & Session Durability
|
||||
|
||||
### `/goal` — persistent cross-turn goals (Ralph loop)
|
||||
- **`feat: /goal — persistent cross-turn goals`** ([#18262](https://github.com/NousResearch/hermes-agent/pull/18262))
|
||||
- **Docs page — Persistent Goals (/goal)** ([#18275](https://github.com/NousResearch/hermes-agent/pull/18275))
|
||||
- Fix: honor configured goal turn budget (salvage #19423) ([#21287](https://github.com/NousResearch/hermes-agent/pull/21287))
|
||||
|
||||
### Checkpoints v2
|
||||
- **Single-store rewrite with real pruning + disk guardrails** ([#20709](https://github.com/NousResearch/hermes-agent/pull/20709))
|
||||
|
||||
### Session durability
|
||||
- **Auto-resume interrupted sessions after gateway restart** (salvage #20888) ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192))
|
||||
- **Preserve pending update prompts across restarts** ([#20160](https://github.com/NousResearch/hermes-agent/pull/20160))
|
||||
- **Preserve home-channel thread targets across restart notifications** (salvage #18440) ([#19271](https://github.com/NousResearch/hermes-agent/pull/19271))
|
||||
- **Preserve thread routing from cached live session sources** ([#21206](https://github.com/NousResearch/hermes-agent/pull/21206))
|
||||
- **Preserve assistant metadata when branching sessions** ([#18222](https://github.com/NousResearch/hermes-agent/pull/18222))
|
||||
- **Preserve thread routing for /update progress and prompts** ([#18193](https://github.com/NousResearch/hermes-agent/pull/18193))
|
||||
- **Preserve document type when merging queued events** ([#18215](https://github.com/NousResearch/hermes-agent/pull/18215))
|
||||
|
||||
---
|
||||
|
||||
## 🛡️ Security & Reliability
|
||||
|
||||
### Security hardening (8 P0 closures)
|
||||
- **Enable secret redaction by default** (#17691, #20785) ([#21193](https://github.com/NousResearch/hermes-agent/pull/21193))
|
||||
- **Discord — scope `DISCORD_ALLOWED_ROLES` to originating guild** (#12136, CVSS 8.1) ([#21241](https://github.com/NousResearch/hermes-agent/pull/21241))
|
||||
- **WhatsApp — reject strangers by default, never respond in self-chat** (#8389) ([#21291](https://github.com/NousResearch/hermes-agent/pull/21291))
|
||||
- **MCP OAuth — close TOCTOU window when saving credentials** ([#21176](https://github.com/NousResearch/hermes-agent/pull/21176))
|
||||
- **`hermes_cli/auth.py` — close TOCTOU window in credential writers** ([#21194](https://github.com/NousResearch/hermes-agent/pull/21194))
|
||||
- **Browser — enforce cloud-metadata SSRF floor in hybrid routing** (#16234) ([#21228](https://github.com/NousResearch/hermes-agent/pull/21228))
|
||||
- **`hermes debug share` — redact log content at upload time** (@GodsBoy) ([#19318](https://github.com/NousResearch/hermes-agent/pull/19318))
|
||||
- **Cron — scan assembled prompt including skill content for prompt injection** (#3968) ([#21350](https://github.com/NousResearch/hermes-agent/pull/21350))
|
||||
- **Restore .env/auth.json/state.db with 0600 perms** ([#19699](https://github.com/NousResearch/hermes-agent/pull/19699))
|
||||
- **SRI integrity for dashboard plugin scripts** (salvage #19389) ([#21277](https://github.com/NousResearch/hermes-agent/pull/21277))
|
||||
- **Bind Meet node server to localhost, restrict token file to owner read** ([#19597](https://github.com/NousResearch/hermes-agent/pull/19597))
|
||||
- **Extend sensitive-write target to cover shell RC and credential files** ([#19282](https://github.com/NousResearch/hermes-agent/pull/19282))
|
||||
- **Harden YOLO mode env parsing against quoted-bool strings** ([#18214](https://github.com/NousResearch/hermes-agent/pull/18214))
|
||||
- **OSV-Scanner CI + Dependabot for github-actions only** ([#20037](https://github.com/NousResearch/hermes-agent/pull/20037))
|
||||
|
||||
### Reliability — critical bug closures
|
||||
- **CLI crash on startup — `Invalid key 'c-S-c'`** (P0, prompt_toolkit doesn't support Shift modifier) ([#19895](https://github.com/NousResearch/hermes-agent/pull/19895), [#19919](https://github.com/NousResearch/hermes-agent/pull/19919))
|
||||
- **CLOSE_WAIT fd leak audit** — httpx keepalive + WhatsApp aiohttp leak + Feishu hygiene (#18451) ([#18766](https://github.com/NousResearch/hermes-agent/pull/18766))
|
||||
- **Gateway creates AIAgent with empty OpenRouter API key when OPENROUTER_API_KEY is missing** (#20982) — fallback providers correctly honored
|
||||
- **Background review + curator protected from overwriting bundled/hub skills** (#20273) ([#20194](https://github.com/NousResearch/hermes-agent/pull/20194))
|
||||
- **TUI compression continuation — ghost sessions with incomplete metadata** (#20001)
|
||||
- **`hermes mcp add` silently launches chat instead of registering MCP server** (#19785) ([#21204](https://github.com/NousResearch/hermes-agent/pull/21204))
|
||||
- **Background review agent runtime propagation** — provider/model/credentials now actually inherit from parent
|
||||
- **Inbound document host paths translated to container paths for Docker backend** (salvage #19048) ([#21184](https://github.com/NousResearch/hermes-agent/pull/21184))
|
||||
- **Matrix gateway race between auto-redaction and message delivery with high-speed models** (#19075)
|
||||
- **`/new` during active agent session never sends response on Telegram** (#18912)
|
||||
|
||||
---
|
||||
|
||||
## 📱 Messaging Platforms (Gateway)
|
||||
|
||||
### New platform
|
||||
- **Google Chat — 20th platform** + generic `env_enablement_fn` / `cron_deliver_env_var` platform-plugin hooks (IRC + Teams migrated) ([#21306](https://github.com/NousResearch/hermes-agent/pull/21306), [#21331](https://github.com/NousResearch/hermes-agent/pull/21331))
|
||||
|
||||
### Cross-platform
|
||||
- **`allowed_{channels,chats,rooms}` whitelist** — Slack (salvage #7401), Telegram, Mattermost, Matrix, DingTalk ([#21251](https://github.com/NousResearch/hermes-agent/pull/21251))
|
||||
- **Per-platform `gateway_restart_notification` flag** ([#20892](https://github.com/NousResearch/hermes-agent/pull/20892))
|
||||
- **`busy_ack_enabled` config — suppress ack messages** ([#18194](https://github.com/NousResearch/hermes-agent/pull/18194))
|
||||
- **Auto-delete slash-command system notices after TTL** ([#18266](https://github.com/NousResearch/hermes-agent/pull/18266))
|
||||
- **Opt-in cleanup of temporary progress bubbles** ([#21186](https://github.com/NousResearch/hermes-agent/pull/21186))
|
||||
- **`[[as_document]]` directive — skill media routing** (salvage #19069) ([#21210](https://github.com/NousResearch/hermes-agent/pull/21210))
|
||||
- **`hermes gateway list` — cross-profile status** (salvage #19129) ([#21225](https://github.com/NousResearch/hermes-agent/pull/21225))
|
||||
- **Auto-resume interrupted sessions after restart** (salvage #20888) ([#21192](https://github.com/NousResearch/hermes-agent/pull/21192))
|
||||
- **Atomic restart markers + Windows runtime-lock offset** (#17842) ([#18179](https://github.com/NousResearch/hermes-agent/pull/18179))
|
||||
- Fix: `config.yaml` wins over `.env` for agent/display/timezone settings ([#18764](https://github.com/NousResearch/hermes-agent/pull/18764))
|
||||
- Fix: auto-restart when source files change out from under us (#17648) ([#18409](https://github.com/NousResearch/hermes-agent/pull/18409))
|
||||
- Fix: use git HEAD SHA for stale-code check, not file mtimes ([#19740](https://github.com/NousResearch/hermes-agent/pull/19740))
|
||||
- Fix: shutdown + restart hygiene — drain timeout, false-fatal, success log ([#18761](https://github.com/NousResearch/hermes-agent/pull/18761))
|
||||
- Fix: preserve max_turns after env reload (salvage #19183) ([#21240](https://github.com/NousResearch/hermes-agent/pull/21240))
|
||||
- Fix: exclude ancestor PIDs from gateway process scan ([#19586](https://github.com/NousResearch/hermes-agent/pull/19586))
|
||||
- Fix: move quick-command alias dispatch before built-ins ([#19588](https://github.com/NousResearch/hermes-agent/pull/19588))
|
||||
- Fix: show other profiles in 'gateway status' to prevent confusion ([#19582](https://github.com/NousResearch/hermes-agent/pull/19582))
|
||||
- Fix: include external_dirs skills in Telegram/Discord slash commands (salvage #8790) ([#18741](https://github.com/NousResearch/hermes-agent/pull/18741))
|
||||
- Fix: match disabled/optional skills by frontmatter slug, not dir name ([#18753](https://github.com/NousResearch/hermes-agent/pull/18753))
|
||||
- Fix: read /status token totals from SessionDB (#17158) ([#18206](https://github.com/NousResearch/hermes-agent/pull/18206))
|
||||
- Fix: snapshot callback generation after agent binds it, not before ([#18219](https://github.com/NousResearch/hermes-agent/pull/18219))
|
||||
- Fix: re-inject topic-bound skill after /new or /reset ([#18205](https://github.com/NousResearch/hermes-agent/pull/18205))
|
||||
- Fix: isolate pending native image paths by session ([#18202](https://github.com/NousResearch/hermes-agent/pull/18202))
|
||||
- Fix: clear queued reload skills notes on new/resume/branch ([#19431](https://github.com/NousResearch/hermes-agent/pull/19431))
|
||||
- Fix: hide required-arg commands from Telegram menu ([#19400](https://github.com/NousResearch/hermes-agent/pull/19400))
|
||||
- Fix: bridge top-level `require_mention` to Telegram config ([#19429](https://github.com/NousResearch/hermes-agent/pull/19429))
|
||||
- Fix: suppress duplicate voice transcripts ([#19428](https://github.com/NousResearch/hermes-agent/pull/19428))
|
||||
- Fix: show friendly error when service is not installed ([#19707](https://github.com/NousResearch/hermes-agent/pull/19707))
|
||||
- Fix: read context_length from custom_providers in session info header ([#19708](https://github.com/NousResearch/hermes-agent/pull/19708))
|
||||
- Fix: preserve WSL interop PATH in systemd units ([#19867](https://github.com/NousResearch/hermes-agent/pull/19867))
|
||||
- Fix: handle planned service stops (salvage #19876) ([#19936](https://github.com/NousResearch/hermes-agent/pull/19936))
|
||||
- Fix: keep DoH-confirmed Telegram IPs that match system DNS (salvage #17043) ([#20175](https://github.com/NousResearch/hermes-agent/pull/20175))
|
||||
- Fix: load `reply_to_mode` from config.yaml for Discord + Telegram (salvage #17117) ([#20171](https://github.com/NousResearch/hermes-agent/pull/20171))
|
||||
- Fix: tolerate malformed HERMES_HUMAN_DELAY_* env vars (salvage #16933) ([#20217](https://github.com/NousResearch/hermes-agent/pull/20217))
|
||||
- Fix: deterministic thread eviction preserves newest entries (salvage #13639) ([#20285](https://github.com/NousResearch/hermes-agent/pull/20285))
|
||||
- Fix: don't dead-end setup wizard when only system-scope unit is installed ([#20905](https://github.com/NousResearch/hermes-agent/pull/20905))
|
||||
- Fix: wait for systemd restart readiness + harden Discord slash-command sync ([#20949](https://github.com/NousResearch/hermes-agent/pull/20949))
|
||||
- Fix: avoid duplicated Responses history (salvage #18995) ([#21185](https://github.com/NousResearch/hermes-agent/pull/21185))
|
||||
- Fix: surface bootstrap failures to stderr (salvage #21157) ([#21278](https://github.com/NousResearch/hermes-agent/pull/21278))
|
||||
- Fix: log agent task failures instead of silently losing usage data (salvage #21159) ([#21274](https://github.com/NousResearch/hermes-agent/pull/21274))
|
||||
- Fix: log runtime-status write failures with rate-limiting (salvage #21158) ([#21285](https://github.com/NousResearch/hermes-agent/pull/21285))
|
||||
- Fix: reset-failed before every fallback restart so the gateway can't get stranded ([#21371](https://github.com/NousResearch/hermes-agent/pull/21371))
|
||||
- Fix: Telegram — preserve `thread_id=1` for forum General typing indicator ([#21390](https://github.com/NousResearch/hermes-agent/pull/21390))
|
||||
- Fix: batch critical fixes — session resume, /new race, HA WebSocket scheme (@kshitijk4poor) ([#19182](https://github.com/NousResearch/hermes-agent/pull/19182))
|
||||
|
||||
### Telegram
|
||||
- **DM user-managed multi-session topics** (salvage of #19185) ([#19206](https://github.com/NousResearch/hermes-agent/pull/19206))
|
||||
|
||||
### Discord
|
||||
- **Message deletion action** (salvage #19052) ([#21197](https://github.com/NousResearch/hermes-agent/pull/21197))
|
||||
- Fix: allow `free_response_channels` to override `DISCORD_IGNORE_NO_MENTION` ([#19629](https://github.com/NousResearch/hermes-agent/pull/19629))
|
||||
|
||||
### Slack
|
||||
- Fix: ephemeral slash-command ack, private notice delivery, format_message fixes (@kshitijk4poor) ([#18198](https://github.com/NousResearch/hermes-agent/pull/18198))
|
||||
|
||||
### WhatsApp
|
||||
- Fix: load WhatsApp home channel from env overrides ([#18190](https://github.com/NousResearch/hermes-agent/pull/18190))
|
||||
|
||||
### Feishu
|
||||
- **Operator-configurable bot admission and mention policy** ([#18208](https://github.com/NousResearch/hermes-agent/pull/18208))
|
||||
- Fix: force text mode for markdown tables (salvage of #13723 by @WuTianyi123) ([#20275](https://github.com/NousResearch/hermes-agent/pull/20275))
|
||||
|
||||
### Matrix + Email
|
||||
- Fix: `/sethome` on Matrix and Email now persists across restarts ([#18272](https://github.com/NousResearch/hermes-agent/pull/18272))
|
||||
|
||||
### Teams
|
||||
- **Docs + feat: sidebar + threading with group-chat fallback** ([#20042](https://github.com/NousResearch/hermes-agent/pull/20042))
|
||||
|
||||
### Weixin
|
||||
- Fix: deduplicate Weixin messages by content fingerprint ([#19742](https://github.com/NousResearch/hermes-agent/pull/19742))
|
||||
|
||||
### QQBot
|
||||
- **Port SDK improvements in-tree — chunked upload, approval keyboards, quoted attachments** ([#21342](https://github.com/NousResearch/hermes-agent/pull/21342))
|
||||
- **Wire native tool-approval UX via inline keyboards** ([#21353](https://github.com/NousResearch/hermes-agent/pull/21353))
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Core Agent & Architecture
|
||||
|
||||
### Provider & Model Support
|
||||
|
||||
#### Pluggable providers
|
||||
- **ProviderProfile ABC + `plugins/model-providers/`** — inference providers are now a pluggable surface (salvage of #14424) ([#20324](https://github.com/NousResearch/hermes-agent/pull/20324))
|
||||
- **`list_picker_providers`** — credential-filtered picker (salvage #13561) ([#20298](https://github.com/NousResearch/hermes-agent/pull/20298))
|
||||
- **Remove `/provider` alias for `/model`** ([#20358](https://github.com/NousResearch/hermes-agent/pull/20358))
|
||||
- **Shared Hermes dotenv loader across CLI + plugins** (salvage #13660) ([#20281](https://github.com/NousResearch/hermes-agent/pull/20281))
|
||||
- **Nous OAuth persisted across profiles via shared token store** ([#19712](https://github.com/NousResearch/hermes-agent/pull/19712))
|
||||
|
||||
#### New models
|
||||
- `deepseek/deepseek-v4-pro` added to OpenRouter + Nous Portal ([#20495](https://github.com/NousResearch/hermes-agent/pull/20495))
|
||||
- `x-ai/grok-4.3` added to OpenRouter + Nous Portal ([#20497](https://github.com/NousResearch/hermes-agent/pull/20497))
|
||||
- `openrouter/owl-alpha` (free tier) added to curated OpenRouter list ([#18071](https://github.com/NousResearch/hermes-agent/pull/18071))
|
||||
- `tencent/hy3-preview` paid route on OpenRouter (@Contentment003111) ([#21077](https://github.com/NousResearch/hermes-agent/pull/21077))
|
||||
- Arcee Trinity Large Thinking — temperature + compression overrides ([#20473](https://github.com/NousResearch/hermes-agent/pull/20473))
|
||||
- Rename `x-ai/grok-4.20-beta` to `x-ai/grok-4.20` ([#19640](https://github.com/NousResearch/hermes-agent/pull/19640))
|
||||
- Demote Vercel AI Gateway to bottom of provider picker ([#18112](https://github.com/NousResearch/hermes-agent/pull/18112))
|
||||
|
||||
#### Provider configuration
|
||||
- **OpenRouter — response caching support** (@kshitijk4poor) ([#19132](https://github.com/NousResearch/hermes-agent/pull/19132))
|
||||
- **`image_gen.model` from config.yaml honored** (salvage #19376) ([#21273](https://github.com/NousResearch/hermes-agent/pull/21273))
|
||||
- Fix: honor runtime default model during delegate provider resolution (@johnncenae) ([#17587](https://github.com/NousResearch/hermes-agent/pull/17587))
|
||||
- Fix: avoid Bedrock credential probe in provider picker (@helix4u) ([#18998](https://github.com/NousResearch/hermes-agent/pull/18998))
|
||||
- Fix: drop stale env-var override of persisted provider for cron ([#19627](https://github.com/NousResearch/hermes-agent/pull/19627))
|
||||
- Fix: auxiliary curator api_key/base_url into runtime resolution ([#19421](https://github.com/NousResearch/hermes-agent/pull/19421))
|
||||
|
||||
### Agent Loop & Conversation
|
||||
- **`video_analyze` — native video understanding tool** (@alt-glitch) ([#19301](https://github.com/NousResearch/hermes-agent/pull/19301))
|
||||
- **Show context compression count in status bar** (CLI + TUI) ([#21218](https://github.com/NousResearch/hermes-agent/pull/21218))
|
||||
- **Isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection** (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
|
||||
- Fix: warning-first tool-call loop guardrails ([#18227](https://github.com/NousResearch/hermes-agent/pull/18227))
|
||||
- Fix: break permanent empty-response loop from orphan tool-tail ([#21385](https://github.com/NousResearch/hermes-agent/pull/21385))
|
||||
- Fix: propagate ContextVars to concurrent tool worker threads (salvage #16660) ([#18123](https://github.com/NousResearch/hermes-agent/pull/18123))
|
||||
- Fix: surface self-improvement review summaries across CLI, TUI, and gateway ([#18073](https://github.com/NousResearch/hermes-agent/pull/18073))
|
||||
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
|
||||
- Fix: include system prompt + tool schemas in token estimates for compression ([#18265](https://github.com/NousResearch/hermes-agent/pull/18265))
|
||||
|
||||
### Compression
|
||||
- Fix: skip non-string tool content in dedup pass to prevent AttributeError ([#19398](https://github.com/NousResearch/hermes-agent/pull/19398))
|
||||
- Fix: reset `_summary_failure_cooldown_until` on session reset ([#19622](https://github.com/NousResearch/hermes-agent/pull/19622))
|
||||
- Fix: trigger fallback on timeout errors alongside model-unavailable errors ([#19665](https://github.com/NousResearch/hermes-agent/pull/19665))
|
||||
- Fix: `_prune_old_tool_results` boundary direction ([#19725](https://github.com/NousResearch/hermes-agent/pull/19725))
|
||||
- Fix: soften summary prompt for content filters (salvage #19456) ([#21302](https://github.com/NousResearch/hermes-agent/pull/21302))
|
||||
|
||||
### Delegate
|
||||
- Fix: inherit parent fallback_chain in `_build_child_agent` ([#19601](https://github.com/NousResearch/hermes-agent/pull/19601))
|
||||
- Fix: guard `_load_config()` against `delegation: null` in config.yaml ([#19662](https://github.com/NousResearch/hermes-agent/pull/19662))
|
||||
- Fix: inherit parent api_key when `delegation.base_url` set without `delegation.api_key` ([#19741](https://github.com/NousResearch/hermes-agent/pull/19741))
|
||||
- Fix: expand composite toolsets before intersection (salvage #19455) ([#21300](https://github.com/NousResearch/hermes-agent/pull/21300))
|
||||
- Fix: correct ACP docs — Claude Code CLI has no --acp flag (salvage #19058) ([#21201](https://github.com/NousResearch/hermes-agent/pull/21201))
|
||||
|
||||
### Session & Memory
|
||||
- **Hindsight — probe API for `update_mode='append'` to dedupe across processes** (@nicoloboschi) ([#20222](https://github.com/NousResearch/hermes-agent/pull/20222))
|
||||
|
||||
### Curator
|
||||
- **`hermes curator archive` and `prune` subcommands** ([#20200](https://github.com/NousResearch/hermes-agent/pull/20200))
|
||||
- **`hermes curator list-archived`** (#20651) ([#21236](https://github.com/NousResearch/hermes-agent/pull/21236))
|
||||
- **Synchronous manual `hermes curator run`** (#20555) ([#21216](https://github.com/NousResearch/hermes-agent/pull/21216))
|
||||
- Fix: preserve `last_report_path` in state ([#18169](https://github.com/NousResearch/hermes-agent/pull/18169))
|
||||
- Fix: rewrite cron job skill refs after consolidation ([#18253](https://github.com/NousResearch/hermes-agent/pull/18253))
|
||||
- Fix: defer first run + `--dry-run` preview (#18373) ([#18389](https://github.com/NousResearch/hermes-agent/pull/18389))
|
||||
- Fix: authoritative `absorbed_into` on delete + restore cron skill links on rollback (#18671) ([#18731](https://github.com/NousResearch/hermes-agent/pull/18731))
|
||||
- Fix: prevent false-positive consolidation from substring matching ([#19573](https://github.com/NousResearch/hermes-agent/pull/19573))
|
||||
- Fix: only mark agent-created for background-review sediment ([#19621](https://github.com/NousResearch/hermes-agent/pull/19621))
|
||||
- Fix: protect hub skills by frontmatter name ([#20194](https://github.com/NousResearch/hermes-agent/pull/20194))
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Tool System
|
||||
|
||||
### File tools
|
||||
- **Post-write delta lint on `write_file` + `patch`** — in-proc linters for Python, JSON, YAML, TOML ([#20191](https://github.com/NousResearch/hermes-agent/pull/20191))
|
||||
|
||||
### Cron
|
||||
- **`no_agent` mode — script-only cron jobs (watchdog pattern)** ([#19709](https://github.com/NousResearch/hermes-agent/pull/19709))
|
||||
- **`context_from` chaining docs** (salvage #15724) ([#20394](https://github.com/NousResearch/hermes-agent/pull/20394))
|
||||
- Fix: treat non-dict origin as missing instead of crashing tick ([#19283](https://github.com/NousResearch/hermes-agent/pull/19283))
|
||||
- Fix: bump skill usage when cron jobs load skills ([#19433](https://github.com/NousResearch/hermes-agent/pull/19433))
|
||||
- Fix: recover null `next_run_at` jobs ([#19576](https://github.com/NousResearch/hermes-agent/pull/19576))
|
||||
- Fix: skip AI call when prerun script produces no output ([#19628](https://github.com/NousResearch/hermes-agent/pull/19628))
|
||||
- Fix: expand config.yaml refs during job execution ([#19872](https://github.com/NousResearch/hermes-agent/pull/19872))
|
||||
- Fix: serialize `get_due_jobs` writes to prevent parallel state corruption ([#19874](https://github.com/NousResearch/hermes-agent/pull/19874))
|
||||
- Fix: initialize MCP servers before constructing the cron AIAgent ([#21354](https://github.com/NousResearch/hermes-agent/pull/21354))
|
||||
|
||||
### MCP
|
||||
- **SSE transport support** (salvage #19135) ([#21227](https://github.com/NousResearch/hermes-agent/pull/21227))
|
||||
- **Forward OAuth auth + bump `sse_read_timeout` on SSE transport** ([#21323](https://github.com/NousResearch/hermes-agent/pull/21323))
|
||||
- **Retry stale pipe transport failures as session-expired** ([#21289](https://github.com/NousResearch/hermes-agent/pull/21289))
|
||||
- **Surface image tool results as MEDIA tags instead of dropping them** ([#21328](https://github.com/NousResearch/hermes-agent/pull/21328))
|
||||
- **Periodic keepalive to `_wait_for_lifecycle_event`** (salvage #17016) ([#20209](https://github.com/NousResearch/hermes-agent/pull/20209))
|
||||
- Fix: reconnect on terminated sessions ([#19380](https://github.com/NousResearch/hermes-agent/pull/19380))
|
||||
- Fix: decouple AnyUrl import from mcp dependency ([#19695](https://github.com/NousResearch/hermes-agent/pull/19695))
|
||||
- Fix: `mcp add --command` gets distinct argparse dest ([#21204](https://github.com/NousResearch/hermes-agent/pull/21204))
|
||||
- Fix: clear stale thread interrupt before MCP discovery ([#21276](https://github.com/NousResearch/hermes-agent/pull/21276))
|
||||
- Fix: report configured timeout in MCP call errors ([#21281](https://github.com/NousResearch/hermes-agent/pull/21281))
|
||||
- Fix: include exception type in error messages when str(exc) is empty (salvage #19425) ([#21292](https://github.com/NousResearch/hermes-agent/pull/21292))
|
||||
- Fix: re-raise CancelledError explicitly in `MCPServerTask.run` ([#21318](https://github.com/NousResearch/hermes-agent/pull/21318))
|
||||
- Fix: coerce numeric tool args defensively in `mcp_serve` ([#21329](https://github.com/NousResearch/hermes-agent/pull/21329))
|
||||
- Fix: gate utility stubs on server-advertised capabilities ([#21347](https://github.com/NousResearch/hermes-agent/pull/21347))
|
||||
|
||||
### Browser
|
||||
- Fix: allow explicit CDP override without local agent-browser ([#19670](https://github.com/NousResearch/hermes-agent/pull/19670))
|
||||
- Fix: inject `--no-sandbox` for root + AppArmor userns restrictions ([#19747](https://github.com/NousResearch/hermes-agent/pull/19747))
|
||||
- Fix: tighten Lightpanda fallback edge cases (@kshitijk4poor) ([#20672](https://github.com/NousResearch/hermes-agent/pull/20672))
|
||||
|
||||
### Web tools
|
||||
- **Per-capability backend selection — search/extract split** (@kshitijk4poor) ([#20061](https://github.com/NousResearch/hermes-agent/pull/20061))
|
||||
- **SearXNG native search-only backend** (@kshitijk4poor) ([#20823](https://github.com/NousResearch/hermes-agent/pull/20823))
|
||||
|
||||
### Approval / Tool gating
|
||||
- Fix: wake blocked gateway approvals on session cleanup ([#18171](https://github.com/NousResearch/hermes-agent/pull/18171))
|
||||
- Fix: harden YOLO mode env parsing against quoted-bool strings ([#18214](https://github.com/NousResearch/hermes-agent/pull/18214))
|
||||
- Fix: extend sensitive write target to cover shell RC and credential files ([#19282](https://github.com/NousResearch/hermes-agent/pull/19282))
|
||||
|
||||
---
|
||||
|
||||
## 🔌 Plugin System
|
||||
|
||||
- **`transform_llm_output` plugin hook** (salvage of #20813) ([#21235](https://github.com/NousResearch/hermes-agent/pull/21235))
|
||||
- **Document `env_enablement_fn` + `cron_deliver_env_var` platform-plugin hooks** ([#21331](https://github.com/NousResearch/hermes-agent/pull/21331))
|
||||
- **Pluggable surfaces coverage — model-provider guide, full plugin map, opt-in fix** ([#20749](https://github.com/NousResearch/hermes-agent/pull/20749))
|
||||
- **Plugin-authoring gaps — image-gen provider guide + publishing a skill tap** ([#20800](https://github.com/NousResearch/hermes-agent/pull/20800))
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Skills Ecosystem
|
||||
|
||||
### New optional skills
|
||||
- **Shopify** — Admin + Storefront GraphQL optional skill ([#18116](https://github.com/NousResearch/hermes-agent/pull/18116))
|
||||
- **here.now** — optional skill ([#18170](https://github.com/NousResearch/hermes-agent/pull/18170))
|
||||
- **shop-app** — personal shopping assistant (optional) ([#20702](https://github.com/NousResearch/hermes-agent/pull/20702))
|
||||
- **Anthropic financial-services bundle** — ported as optional finance skills ([#21180](https://github.com/NousResearch/hermes-agent/pull/21180))
|
||||
- **kanban-video-orchestrator** — creative optional skill (@SHL0MS) ([#19281](https://github.com/NousResearch/hermes-agent/pull/19281))
|
||||
- **searxng-search** — optional skill + Web Search + Extract docs page (@kshitijk4poor) ([#20841](https://github.com/NousResearch/hermes-agent/pull/20841), [#20844](https://github.com/NousResearch/hermes-agent/pull/20844))
|
||||
|
||||
### Skill UX
|
||||
- **Linear skill — add Documents support + Python helper script** ([#20752](https://github.com/NousResearch/hermes-agent/pull/20752))
|
||||
- **Modernize Obsidian skill to use file tools** (salvage #19332) ([#20413](https://github.com/NousResearch/hermes-agent/pull/20413))
|
||||
- **Default custom tool creation to plugins** (@kshitijk4poor) ([#19755](https://github.com/NousResearch/hermes-agent/pull/19755))
|
||||
- **skill_commands cache — rescan on platform scope changes** (salvage #14570 by @LeonSGP43) ([#18739](https://github.com/NousResearch/hermes-agent/pull/18739))
|
||||
- **Skills — additional rescan paths in skill_commands cache** (salvage #19042) ([#21181](https://github.com/NousResearch/hermes-agent/pull/21181))
|
||||
- Fix: regression tests for non-dict metadata in `extract_skill_conditions` ([#18213](https://github.com/NousResearch/hermes-agent/pull/18213))
|
||||
- Docs: explain restoring bundled skills (salvage #19254) ([#20404](https://github.com/NousResearch/hermes-agent/pull/20404))
|
||||
- Docs: document `hermes skills reset` subcommand (salvage #11544) ([#20395](https://github.com/NousResearch/hermes-agent/pull/20395))
|
||||
- Docs: himalaya v1.2.0 `folder.aliases` syntax ([#19882](https://github.com/NousResearch/hermes-agent/pull/19882))
|
||||
- Point agent at `hermes-agent` skill + docs site sync ([#20390](https://github.com/NousResearch/hermes-agent/pull/20390))
|
||||
|
||||
---
|
||||
|
||||
## 🖥️ CLI & User Experience
|
||||
|
||||
### CLI
|
||||
- **`/new` accepts optional session name argument** (salvage of #19555) ([#19637](https://github.com/NousResearch/hermes-agent/pull/19637))
|
||||
- **100 new CLI startup tips** ([#20168](https://github.com/NousResearch/hermes-agent/pull/20168))
|
||||
- **`display.language` — static message translation** (zh/ja/de/es) ([#20231](https://github.com/NousResearch/hermes-agent/pull/20231))
|
||||
- **French (fr) locale** (@Foolafroos) ([#20329](https://github.com/NousResearch/hermes-agent/pull/20329))
|
||||
- **Ukrainian (uk) locale** ([#20467](https://github.com/NousResearch/hermes-agent/pull/20467))
|
||||
- **Turkish (tr) locale** ([#20474](https://github.com/NousResearch/hermes-agent/pull/20474))
|
||||
- Fix: recover classic CLI output after resize (@helix4u) ([#20444](https://github.com/NousResearch/hermes-agent/pull/20444))
|
||||
- Fix: complete absolute paths as paths (@helix4u) ([#19930](https://github.com/NousResearch/hermes-agent/pull/19930))
|
||||
- Fix: resolve lazy session creation regressions (#18370 fallout) (@alt-glitch) ([#20363](https://github.com/NousResearch/hermes-agent/pull/20363))
|
||||
- Fix: local backend CLI always uses launch directory (@alt-glitch) ([#19334](https://github.com/NousResearch/hermes-agent/pull/19334))
|
||||
- Refactor: drop dead c-S-c key binding (follow-up to #19895) ([#19919](https://github.com/NousResearch/hermes-agent/pull/19919))
|
||||
|
||||
### TUI (Ink)
|
||||
- **`/model` picker overhaul to match `hermes model` with inline auth** (@austinpickett) ([#18117](https://github.com/NousResearch/hermes-agent/pull/18117))
|
||||
- **Collapsible sections in startup banner** — skills, system prompt, MCP (@kshitijk4poor) ([#20625](https://github.com/NousResearch/hermes-agent/pull/20625))
|
||||
- **Show context compression count in status bar** ([#21218](https://github.com/NousResearch/hermes-agent/pull/21218))
|
||||
- Perf: reduce overlay render churn with focused selectors (@OutThisLife) ([#20393](https://github.com/NousResearch/hermes-agent/pull/20393))
|
||||
- Fix: restore voice push-to-talk parity (salvage of #16189 by @Montbra) (@OutThisLife) ([#20897](https://github.com/NousResearch/hermes-agent/pull/20897))
|
||||
- Fix: kanban button (@austinpickett) ([#18358](https://github.com/NousResearch/hermes-agent/pull/18358))
|
||||
|
||||
### Dashboard
|
||||
- **Plugins page — manage, enable/disable, auth status** (@austinpickett) ([#18095](https://github.com/NousResearch/hermes-agent/pull/18095))
|
||||
- **Profiles management page** (@vincez-hms-coder) ([#16419](https://github.com/NousResearch/hermes-agent/pull/16419))
|
||||
- **Interactive column sorting in analytics tables** ([#18192](https://github.com/NousResearch/hermes-agent/pull/18192))
|
||||
- **`default-large` built-in theme with 18px base size** ([#20820](https://github.com/NousResearch/hermes-agent/pull/20820))
|
||||
- **Support serving under URL prefix via `X-Forwarded-Prefix`** (salvage #19450) ([#21296](https://github.com/NousResearch/hermes-agent/pull/21296))
|
||||
- **Launch dashboard as side-process via `HERMES_DASHBOARD=1` in Docker** (@benbarclay) ([#19540](https://github.com/NousResearch/hermes-agent/pull/19540))
|
||||
- Fix: dashboard theme layout shift (@AllardQuek) ([#17232](https://github.com/NousResearch/hermes-agent/pull/17232))
|
||||
- Fix: gateway model picker current context (@helix4u) ([#20513](https://github.com/NousResearch/hermes-agent/pull/20513))
|
||||
|
||||
### Update + setup
|
||||
- **`hermes update --yes/-y` to skip interactive prompts** ([#18261](https://github.com/NousResearch/hermes-agent/pull/18261))
|
||||
- **Restart manual profile gateways after update** ([#18178](https://github.com/NousResearch/hermes-agent/pull/18178))
|
||||
|
||||
### Profiles
|
||||
- **`--no-skills` flag for empty profile creation** ([#20986](https://github.com/NousResearch/hermes-agent/pull/20986))
|
||||
|
||||
---
|
||||
|
||||
## 🎵 Voice, Image & Media
|
||||
|
||||
- **xAI Custom Voices — voice cloning** (@alt-glitch) ([#18776](https://github.com/NousResearch/hermes-agent/pull/18776))
|
||||
- **Achievements — share card render on unlocked badges** ([#19657](https://github.com/NousResearch/hermes-agent/pull/19657))
|
||||
- **Refresh systemd unit on gateway boot (not just start/restart)** (@alt-glitch) ([#19684](https://github.com/NousResearch/hermes-agent/pull/19684))
|
||||
|
||||
---
|
||||
|
||||
## 🔗 API Server & Remote Access
|
||||
|
||||
- **`X-Hermes-Session-Key` header for long-term memory scoping** (closes #20060) ([#20199](https://github.com/NousResearch/hermes-agent/pull/20199))
|
||||
|
||||
---
|
||||
|
||||
## 🧰 ACP Adapter (VS Code / Zed / JetBrains)
|
||||
|
||||
- **`/steer` and `/queue` slash commands** (@HenkDz) ([#18114](https://github.com/NousResearch/hermes-agent/pull/18114))
|
||||
- Fix: translate Windows cwd for WSL sessions (salvage #18128) ([#18233](https://github.com/NousResearch/hermes-agent/pull/18233))
|
||||
- Fix: run `/steer` as a regular prompt on idle sessions ([#18258](https://github.com/NousResearch/hermes-agent/pull/18258))
|
||||
- Fix: route Zed thoughts to reasoning + polish tool/context rendering ([#19139](https://github.com/NousResearch/hermes-agent/pull/19139))
|
||||
- Fix: atomic session persistence via `replace_messages` (salvage #13675) ([#20279](https://github.com/NousResearch/hermes-agent/pull/20279))
|
||||
- Fix: preserve assistant reasoning metadata in session persistence (salvage #13575) ([#20296](https://github.com/NousResearch/hermes-agent/pull/20296))
|
||||
- Docs: update VS Code setup for ACP Client extension (salvage #12495) ([#20433](https://github.com/NousResearch/hermes-agent/pull/20433))
|
||||
|
||||
---
|
||||
|
||||
## 🐳 Docker
|
||||
|
||||
- **Launch dashboard as side-process via `HERMES_DASHBOARD=1`** (@benbarclay) ([#19540](https://github.com/NousResearch/hermes-agent/pull/19540))
|
||||
- **Refuse root gateway runs in official image** (salvage #19215) ([#21250](https://github.com/NousResearch/hermes-agent/pull/21250))
|
||||
- **Chown runtime `node_modules` trees to hermes user** (salvage #19303) ([#21267](https://github.com/NousResearch/hermes-agent/pull/21267))
|
||||
- Fix: exclude compose/profile runtime state from build context ([#19626](https://github.com/NousResearch/hermes-agent/pull/19626))
|
||||
- CI: don't cancel overlapping builds, guard `:latest` (@ethernet8023) ([#20890](https://github.com/NousResearch/hermes-agent/pull/20890))
|
||||
- Test: align Dockerfile contract tests with simplified TUI flow (salvage #19024) ([#21174](https://github.com/NousResearch/hermes-agent/pull/21174))
|
||||
- Docs: connect to local inference servers (vLLM, Ollama) (salvage #12335) ([#20407](https://github.com/NousResearch/hermes-agent/pull/20407))
|
||||
- Docs: document `API_SERVER_*` env vars (salvage #11758) ([#20409](https://github.com/NousResearch/hermes-agent/pull/20409))
|
||||
- Docs: clarify Docker terminal backend is a single persistent container ([#20003](https://github.com/NousResearch/hermes-agent/pull/20003))
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Notable Bug Fixes
|
||||
|
||||
### Agent
|
||||
- Fix: recover lazy session creation regressions (#18370 fallout) (@alt-glitch) ([#20363](https://github.com/NousResearch/hermes-agent/pull/20363))
|
||||
- Fix: propagate ContextVars to concurrent tool worker threads (salvage #16660) ([#18123](https://github.com/NousResearch/hermes-agent/pull/18123))
|
||||
- Fix: warning-first tool-call loop guardrails ([#18227](https://github.com/NousResearch/hermes-agent/pull/18227))
|
||||
- Fix: surface self-improvement review summaries across CLI, TUI, and gateway ([#18073](https://github.com/NousResearch/hermes-agent/pull/18073))
|
||||
|
||||
### Gateway streaming
|
||||
- Fix: harden StreamingConfig bool and numeric coercion (@simbam99) ([#16463](https://github.com/NousResearch/hermes-agent/pull/16463))
|
||||
|
||||
### Model
|
||||
- Fix: avoid Bedrock credential probe in provider picker (@helix4u) ([#18998](https://github.com/NousResearch/hermes-agent/pull/18998))
|
||||
|
||||
### Doctor
|
||||
- Fix: check global agent-browser when local install not found ([#19671](https://github.com/NousResearch/hermes-agent/pull/19671))
|
||||
- Test: kimi-coding-cn provider validation regression ([#19734](https://github.com/NousResearch/hermes-agent/pull/19734))
|
||||
|
||||
### Update
|
||||
- Fix: patch `isatty` on real streams to fix xdist-flaky `--yes` tests (salvage #19026) ([#21175](https://github.com/NousResearch/hermes-agent/pull/21175))
|
||||
- Fix: teach restart-mocks about the post-update survivor sweep (salvage #19031) ([#21177](https://github.com/NousResearch/hermes-agent/pull/21177))
|
||||
|
||||
### Auth
|
||||
- Fix: acp preserve assistant reasoning metadata ([#20296](https://github.com/NousResearch/hermes-agent/pull/20296))
|
||||
|
||||
### Redact
|
||||
- Fix: add `code_file` param to skip false-positive ENV/JSON patterns ([#19715](https://github.com/NousResearch/hermes-agent/pull/19715))
|
||||
|
||||
### Email
|
||||
- Fix: quoted-relative file-drop paths + Date header on tool email path ([#19646](https://github.com/NousResearch/hermes-agent/pull/19646))
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
- **ACP — accept prompt persistence kwargs in MCP E2E mocks** (@stephenschoettler) ([#18047](https://github.com/NousResearch/hermes-agent/pull/18047))
|
||||
- **Toolsets — include kanban in expected post-#17805 toolset assertions** (@briandevans) ([#18122](https://github.com/NousResearch/hermes-agent/pull/18122))
|
||||
- **Agent — cover max-iterations summary message sanitization** ([#19580](https://github.com/NousResearch/hermes-agent/pull/19580))
|
||||
- **run_agent — `-inf` and `nan` regression coverage for `_coerce_number`** ([#19703](https://github.com/NousResearch/hermes-agent/pull/19703))
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
### Major docs additions
|
||||
- **`llms.txt` + `llms-full.txt` — agent-friendly ingestion** ([#18276](https://github.com/NousResearch/hermes-agent/pull/18276))
|
||||
- **User Stories and Use Cases collage page** ([#18282](https://github.com/NousResearch/hermes-agent/pull/18282))
|
||||
- **Persistent Goals (/goal) feature page** ([#18275](https://github.com/NousResearch/hermes-agent/pull/18275))
|
||||
- **Windows (WSL2) guide expansion** — filesystem, networking, services, pitfalls ([#20748](https://github.com/NousResearch/hermes-agent/pull/20748))
|
||||
- **Chinese (zh-CN) README translation** (salvage #13508) ([#20431](https://github.com/NousResearch/hermes-agent/pull/20431))
|
||||
- **zh-Hans Docusaurus locale** + Tool Gateway / image-gen / WSL quickstart translations (salvage #11728) ([#20430](https://github.com/NousResearch/hermes-agent/pull/20430))
|
||||
- **Tool Gateway docs restructure** — lead with what it does, config moved to bottom ([#20827](https://github.com/NousResearch/hermes-agent/pull/20827))
|
||||
- **Quickstart — Onchain AI Garage Hermes tutorials playlist** ([#20192](https://github.com/NousResearch/hermes-agent/pull/20192))
|
||||
- **Open WebUI bootstrap script** (salvage #9566) ([#20427](https://github.com/NousResearch/hermes-agent/pull/20427))
|
||||
- **Local Ollama setup guide** (salvage #5842) ([#20426](https://github.com/NousResearch/hermes-agent/pull/20426))
|
||||
- **Google Gemini guide** (salvage #17450) ([#20401](https://github.com/NousResearch/hermes-agent/pull/20401))
|
||||
- **Custom model aliases for /model command** ([#20475](https://github.com/NousResearch/hermes-agent/pull/20475))
|
||||
- **Together/Groq/Perplexity cookbook via `custom_providers`** (salvage #15214) ([#20400](https://github.com/NousResearch/hermes-agent/pull/20400))
|
||||
- **Doubao speech integration examples** (TTS + STT) (salvage #18065) ([#20418](https://github.com/NousResearch/hermes-agent/pull/20418))
|
||||
- **WSL-to-Windows Chrome MCP bridge** (salvage #8313) ([#20428](https://github.com/NousResearch/hermes-agent/pull/20428))
|
||||
- **Hermes skills docs sync** — slash commands + durable-systems section ([#20390](https://github.com/NousResearch/hermes-agent/pull/20390))
|
||||
- **AGENTS.md — curator/cron/delegation/toolsets + fix plugin tree** ([#20226](https://github.com/NousResearch/hermes-agent/pull/20226))
|
||||
- **Bedrock quickstart entry + fallback comment + deployment link** (salvage #11093) ([#20397](https://github.com/NousResearch/hermes-agent/pull/20397))
|
||||
|
||||
### Docs polish
|
||||
- Collapse exploding skills tree to a single Skills node ([#18259](https://github.com/NousResearch/hermes-agent/pull/18259))
|
||||
- Clarify `session_search` auxiliary model docs ([#19593](https://github.com/NousResearch/hermes-agent/pull/19593))
|
||||
- Open WebUI Quick Setup gap fill ([#19654](https://github.com/NousResearch/hermes-agent/pull/19654))
|
||||
- Default custom tool creation to plugins (@kshitijk4poor) ([#19755](https://github.com/NousResearch/hermes-agent/pull/19755))
|
||||
- Clarify Telegram group chat troubleshooting (salvage #18672) ([#20416](https://github.com/NousResearch/hermes-agent/pull/20416))
|
||||
- Codex OAuth auth prerequisite clarification (salvage #18688) ([#20417](https://github.com/NousResearch/hermes-agent/pull/20417))
|
||||
- Discord Server Members Intent + SSRC-mapping drift + /voice join slash Choice (salvage #11350) ([#20411](https://github.com/NousResearch/hermes-agent/pull/20411))
|
||||
- Document `ctx.dispatch_tool()` (salvage #10955) ([#20391](https://github.com/NousResearch/hermes-agent/pull/20391))
|
||||
- Document `hermes webhook subscribe --deliver-only` (salvage #12612) ([#20392](https://github.com/NousResearch/hermes-agent/pull/20392))
|
||||
- Document `hermes import` reference (salvage #14711) ([#20396](https://github.com/NousResearch/hermes-agent/pull/20396))
|
||||
- Document per-provider TTS `max_text_length` caps (salvage #13825) ([#20389](https://github.com/NousResearch/hermes-agent/pull/20389))
|
||||
- Clarify supported prompt customization surfaces (salvage #19987) ([#20383](https://github.com/NousResearch/hermes-agent/pull/20383))
|
||||
- Correct `web_extract` summarizer timeout comment (salvage #20051) ([#20381](https://github.com/NousResearch/hermes-agent/pull/20381))
|
||||
- Fix fallback provider config paths (salvage #20033) ([#20382](https://github.com/NousResearch/hermes-agent/pull/20382))
|
||||
- Fix misleading RL install-extras claim (salvage #19080) ([#21213](https://github.com/NousResearch/hermes-agent/pull/21213))
|
||||
- Clarify API server tool execution locality (salvage #19117) ([#21223](https://github.com/NousResearch/hermes-agent/pull/21223))
|
||||
- Prefer `.venv` to match AGENTS.md and scripts/run_tests.sh (@xxxigm) ([#21334](https://github.com/NousResearch/hermes-agent/pull/21334))
|
||||
- Align tool discovery + test runner with AGENTS.md (@xxxigm) ([#20791](https://github.com/NousResearch/hermes-agent/pull/20791))
|
||||
- Align terminal-backend count and naming across docs and code (salvage #19044) ([#20402](https://github.com/NousResearch/hermes-agent/pull/20402))
|
||||
- Refresh stale platform counts (salvage #19053) ([#20403](https://github.com/NousResearch/hermes-agent/pull/20403))
|
||||
|
||||
---
|
||||
|
||||
## 👥 Contributors
|
||||
|
||||
### Core
|
||||
- **@teknium1** — salvage, triage, review, feature work, and release management
|
||||
|
||||
### Top Community Contributors
|
||||
|
||||
- **@kshitijk4poor** (21 PRs) — SearXNG native search backend, per-capability backend selection, collapsible TUI startup banner, Slack ephemeral ack + format fixes, Lightpanda fallback hardening, searxng-search optional skill + Web Search + Extract docs, default custom tool creation to plugins, kanban failure-column fix
|
||||
- **@alt-glitch** (13 PRs) — video_analyze tool, xAI Custom Voices (voice cloning), local-backend CLI launch-directory fix, lazy-session creation regression recovery, systemd unit refresh on gateway boot
|
||||
- **@OutThisLife** (9 PRs) — TUI perf — overlay render churn reduction, voice push-to-talk parity restoration (salvaging @Montbra)
|
||||
- **@helix4u** (6 PRs) — Classic CLI output recovery after resize, absolute-path TUI completion, gateway model picker current-context fix, Bedrock credential probe avoidance, kanban docs fixes
|
||||
- **@ethernet8023** (3 PRs) — Docker CI — don't cancel overlapping builds, :latest guard
|
||||
- **@benbarclay** (3 PRs) — Docker — launch dashboard as side-process via HERMES_DASHBOARD=1
|
||||
- **@austinpickett** (3 PRs) — Dashboard Plugins page, TUI /model picker overhaul with inline auth, kanban button fix
|
||||
- **@sprmn24** (2 PRs) — Contributor (2 PRs)
|
||||
- **@asheriif** (2 PRs) — Contributor (2 PRs)
|
||||
- **@xxxigm** (2 PRs) — Contributing docs — .venv preference and test runner alignment with AGENTS.md
|
||||
- **@stephenschoettler** (1 PR) — ACP — MCP E2E mock kwargs
|
||||
- **@vincez-hms-coder** (1 PR) — Dashboard — Profiles management page
|
||||
- **@cdanis** (1 PR) — Contributor
|
||||
- **@briandevans** (1 PR) — Toolsets test — kanban assertions post-#17805
|
||||
- **@heyitsaamir** (1 PR) — Contributor
|
||||
|
||||
### All Contributors
|
||||
|
||||
Thanks to everyone who contributed to v0.13.0 — commits, co-authored work, and salvaged PRs. 295 contributors in one week.
|
||||
|
||||
@0oAstro, @0xDevNinja, @0xharryriddle, @0xKingBack, @0xsir0000, @0xyg3n, @0z1-ghb, @abhinav11082001-stack,
|
||||
@acc001k, @acesjohnny, @adamludwin, @adybag14-cyber, @agentlinker, @agilejava, @ai-ag2026, @AJV20,
|
||||
@alanxchen85, @albert748, @AllardQuek, @alt-glitch, @altmazza0-star, @ambition0802, @amitgaur, @amroessam,
|
||||
@andrewhosf, @Asce66, @asheriif, @ashermorse, @asimons81, @Aslaaen, @Asunfly, @atongrun, @austinpickett,
|
||||
@banditburai, @barteqpl, @Bartok9, @Beandon13, @beardthelion, @beibi9966, @benbarclay, @binhnt92, @bjianhang,
|
||||
@BlackJulySnow, @bobashopcashier, @bogerman1, @Bongulielmi, @Brecht-H, @briandevans, @brooklynnicholson,
|
||||
@c3115644151, @camaragon, @CashWilliams, @CCClelo, @cdanis, @CES4751, @cg2aigc, @changchun989, @ChanlerDev,
|
||||
@CharlieKerfoot, @chengoak, @chenyunbo411, @chinadbo, @CIRWEL, @cixuuz, @cmcgrabby-hue, @colorcross,
|
||||
@Contentment003111, @CoreyNoDream, @counterposition, @curiouscleo, @DaniuXie, @deep-name, @dengtaoyuan450-a11y,
|
||||
@discodirector, @donramon77, @dpaluy, @ee-blog, @ehz0ah, @el-analista, @elmatadorgh, @EmelyanenkoK,
|
||||
@Emidomenge, @emozilla, @Es1la, @EthanGuo-coder, @etherman-os, @ethernet8023, @EvilDrag0n, @exxmen, @Fearvox,
|
||||
@Feranmi10, @firefly, @flobo3, @fmercurio, @Foolafroos, @formulahendry, @franksong2702, @ggnnggez, @GinWU05,
|
||||
@giwaov, @glesperance, @gnanirahulnutakki, @GodsBoy, @Gosuj, @Grey0202, @guillaumemeyer, @Gutslabs, @h0tp-ftw,
|
||||
@haidao1919, @halmisen, @happy5318, @hedirman, @helix4u, @hendrixfreire, @HenkDz, @hex-clawd, @heyitsaamir,
|
||||
@hharry11, @Hinotoi-agent, @holynn-q, @hrkzogw, @Hypn0sis, @Hypnus-Yuan, @ideathinklab01-source, @IMHaoyan,
|
||||
@Interstellar-code, @ishardo, @jacdevos, @jackey8616, @JanCong, @jasonoutland, @jatingodnani, @JayGwod,
|
||||
@jethac, @JezzaHehn, @JiaDe-Wu, @jjjojoj, @jkausel-ai, @John-tip, @johnncenae, @jrusso1020, @jslizar,
|
||||
@JTroyerOvermatch, @julysir, @Junass1, @JustinUssuri, @Kailigithub, @keepcalmqqf, @kiala9, @konsisumer,
|
||||
@kowenhaoai, @Krionex, @kshitijk4poor, @kyan12, @leavrcn, @leon7609, @LeonSGP43, @leprincep35700, @lhysdl,
|
||||
@likejudy, @lisanhu, @liu-collab, @liuguangyong93, @liuhao1024, @LucianoSP, @luoyuctl, @luyao618, @M3RCUR2Y,
|
||||
@maciekczech, @Magicray1217, @magicray1217, @MaHaoHao-ch, @malaiwah, @manateelazycat, @masonjames, @megastary,
|
||||
@memosr, @MichaelWDanko, @mikeyobrien, @millerc79, @Mind-Dragon, @mioimotoai-lgtm, @misery-hl, @molvikar,
|
||||
@momowind, @Montbra, @MottledShadow, @mrbob-git, @mrcharlesiv, @mrcoferland, @ms-alan, @mwnickerson,
|
||||
@nazirulhafiy, @nftpoetrist, @nicoloboschi, @nightq, @nikolay-bratanov, @NikolayGusev-astra, @nocturnum91,
|
||||
@noOne-list, @nouseman666, @novax635, @npmisantosh, @nudiltoys-cmyk, @olisikh, @oluwadareab12, @Oxidane-bot,
|
||||
@pama0227, @pander, @pasevin, @paul-tian, @pdonizete, @perlowja, @pingchesu, @PratikRai0101, @priveperfumes,
|
||||
@probepark, @QifengKuang, @quocanh261997, @qWaitCrypto, @qxxaa, @r266-tech, @rames-jusso, @revaraver,
|
||||
@Ricardo-M-L, @rob-maron, @Roy-oss1, @rxdxxxx, @SandroHub013, @Sanjays2402, @Sertug17, @shashwatgokhe,
|
||||
@shellybotmoyer, @SHL0MS, @SimbaKingjoe, @simbam99, @simplenamebox-ops, @socrates1024, @sonic-netizen,
|
||||
@sprmn24, @steezkelly, @stephen0110, @stephenschoettler, @stevenchanin, @stevenchouai, @stormhierta,
|
||||
@subtract0, @suncokret12, @swithek, @taeng0204, @TakeshiSawaguchi, @tangyuanjc, @TheEpTic, @thelumiereguy,
|
||||
@Tkander1715, @tmdgusya, @Tranquil-Flow, @TruaShamu, @UgwujaGeorge, @valda, @vincez-hms-coder, @VinVC,
|
||||
@vominh1919, @wabrent, @WadydX, @wanazhar, @WanderWang, @warabe1122, @web-dev0521, @WideLee, @willy-scr,
|
||||
@wmagev, @WuTianyi123, @wxst, @wysie, @Wysie, @xsfX20, @xxxigm, @xyiy001, @YanzhongSu, @ygd58, @Yoimex,
|
||||
@yuehei, @Yukipukii1, @yuqianma, @YX234, @zeejaytan, @zhanggttry, @zhao0112, @zng8418, @zons-zhaozhy, @Zyproth
|
||||
|
||||
---
|
||||
|
||||
**Full Changelog**: [v2026.4.30...v2026.5.7](https://github.com/NousResearch/hermes-agent/compare/v2026.4.30...v2026.5.7)
|
||||
357
SECURITY.md
357
SECURITY.md
@@ -1,331 +1,84 @@
|
||||
# Hermes Agent Security Policy
|
||||
|
||||
This document describes Hermes Agent's trust model, names the one
|
||||
security boundary the project treats as load-bearing, and defines the
|
||||
scope for vulnerability reports.
|
||||
This document outlines the security protocols, trust model, and deployment hardening guidelines for the **Hermes Agent** project.
|
||||
|
||||
## 1. Reporting a Vulnerability
|
||||
## 1. Vulnerability Reporting
|
||||
|
||||
Report privately via [GitHub Security Advisories](https://github.com/NousResearch/hermes-agent/security/advisories/new)
|
||||
or **security@nousresearch.com**. Do not open public issues for
|
||||
security vulnerabilities. **Hermes Agent does not operate a bug
|
||||
bounty program.**
|
||||
Hermes Agent does **not** operate a bug bounty program. Security issues should be reported via [GitHub Security Advisories (GHSA)](https://github.com/NousResearch/hermes-agent/security/advisories/new) or by emailing **security@nousresearch.com**. Do not open public issues for security vulnerabilities.
|
||||
|
||||
A useful report includes:
|
||||
|
||||
- A concise description and severity assessment.
|
||||
- The affected component, identified by file path and line range
|
||||
(e.g. `path/to/file.py:120-145`).
|
||||
- Environment details (`hermes version`, commit SHA, OS, Python
|
||||
version).
|
||||
- A reproduction against `main` or the latest release.
|
||||
- A statement of which trust boundary in §2 is crossed.
|
||||
|
||||
Please read §2 and §3 before submitting. Reports that demonstrate
|
||||
limits of an in-process heuristic this policy does not treat as a
|
||||
boundary will be closed as out-of-scope under §3 — but see §3.2:
|
||||
they are still welcome as regular issues or pull requests, just not
|
||||
through the private security channel.
|
||||
### Required Submission Details
|
||||
- **Title & Severity:** Concise description and CVSS score/rating.
|
||||
- **Affected Component:** Exact file path and line range (e.g., `tools/approval.py:120-145`).
|
||||
- **Environment:** Output of `hermes version`, commit SHA, OS, and Python version.
|
||||
- **Reproduction:** Step-by-step Proof-of-Concept (PoC) against `main` or the latest release.
|
||||
- **Impact:** Explanation of what trust boundary was crossed.
|
||||
|
||||
---
|
||||
|
||||
## 2. Trust Model
|
||||
|
||||
Hermes Agent is a single-tenant personal agent. Its posture is
|
||||
layered, and the layers are not equally load-bearing. Reporters and
|
||||
operators should reason about them in the same terms.
|
||||
The core assumption is that Hermes is a **personal agent** with one trusted operator.
|
||||
|
||||
### 2.1 Definitions
|
||||
### Operator & Session Trust
|
||||
- **Single Tenant:** The system protects the operator from LLM actions, not from malicious co-tenants. Multi-user isolation must happen at the OS/host level.
|
||||
- **Gateway Security:** Authorized callers (Telegram, Discord, Slack, etc.) receive equal trust. Session keys are used for routing, not as authorization boundaries.
|
||||
- **Execution:** Defaults to `terminal.backend: local` (direct host execution). Container isolation (Docker, Modal, Daytona) is opt-in for sandboxing.
|
||||
|
||||
- **Agent process.** The Python interpreter running Hermes Agent,
|
||||
including any Python modules it has loaded (skills, plugins,
|
||||
hook handlers).
|
||||
- **Terminal backend.** A pluggable execution target for the
|
||||
`terminal()` tool. The default runs commands directly on the host.
|
||||
Other backends run commands inside a container, cloud sandbox, or
|
||||
remote host.
|
||||
- **Input surface.** Any channel through which content enters the
|
||||
agent's context: operator input, web fetches, email, gateway
|
||||
messages, file reads, MCP server responses, tool results.
|
||||
- **Trust envelope.** The set of resources an operator has implicitly
|
||||
granted Hermes Agent access to by running it — typically, whatever
|
||||
the operator's own user account can reach on the host.
|
||||
- **Stance.** An explicit statement in Hermes Agent's documentation
|
||||
or code about how a consuming layer (adapter, UI, file writer,
|
||||
shell) should treat agent output — e.g. "the dashboard renders
|
||||
agent output as inert HTML."
|
||||
### Dangerous Command Approval
|
||||
The approval system (`tools/approval.py`) is a core security boundary. Terminal commands, file operations, and other potentially destructive actions are gated behind explicit user confirmation before execution. The approval mode is configurable via `approvals.mode` in `config.yaml`:
|
||||
- `"on"` (default) — prompts the user to approve dangerous commands.
|
||||
- `"auto"` — auto-approves after a configurable delay.
|
||||
- `"off"` — disables the gate entirely (break-glass; see Section 3).
|
||||
|
||||
### 2.2 The Boundary: OS-Level Isolation
|
||||
### Output Redaction
|
||||
`agent/redact.py` strips secret-like patterns (API keys, tokens, credentials) from all display output before it reaches the terminal or gateway platform. This prevents accidental credential leakage in chat logs, tool previews, and response text. Redaction operates on the display layer only — underlying values remain intact for internal agent operations.
|
||||
|
||||
**The only security boundary against an adversarial LLM is the
|
||||
operating system.** Nothing inside the agent process constitutes
|
||||
containment — not the approval gate, not output redaction, not any
|
||||
pattern scanner, not any tool allowlist. Any in-process component
|
||||
that screens LLM output is a heuristic operating on an
|
||||
attacker-influenced string, and this policy treats it as such.
|
||||
### Skills vs. MCP Servers
|
||||
- **Installed Skills:** High trust. Equivalent to local host code; skills can read environment variables and run arbitrary commands.
|
||||
- **MCP Servers:** Lower trust. MCP subprocesses receive a filtered environment (`_build_safe_env()` in `tools/mcp_tool.py`) — only safe baseline variables (`PATH`, `HOME`, `XDG_*`) plus variables explicitly declared in the server's `env` config block are passed through. Host credentials are stripped by default. Additionally, packages invoked via `npx`/`uvx` are checked against the OSV malware database before spawning.
|
||||
|
||||
Hermes Agent supports two OS-level isolation postures. They address
|
||||
different threats and an operator should choose deliberately.
|
||||
### Code Execution Sandbox
|
||||
The `execute_code` tool (`tools/code_execution_tool.py`) runs LLM-generated Python scripts in a child process with API keys and tokens stripped from the environment to prevent credential exfiltration. Only environment variables explicitly declared by loaded skills (via `env_passthrough`) or by the user in `config.yaml` (`terminal.env_passthrough`) are passed through. The child accesses Hermes tools via RPC, not direct API calls.
|
||||
|
||||
#### Terminal-backend isolation
|
||||
|
||||
A non-default terminal backend runs LLM-emitted shell commands
|
||||
inside a container, remote host, or cloud sandbox. The file tools
|
||||
(`read_file`, `write_file`, `patch`) also run through this backend,
|
||||
since they are implemented on top of the shell contract — they
|
||||
cannot reach paths the backend doesn't expose.
|
||||
|
||||
What this confines: anything the agent does by issuing shell or
|
||||
file operations. What this does **not** confine: everything the
|
||||
agent does in its own Python process. That includes the
|
||||
code-execution tool (spawned as a host subprocess), MCP subprocesses
|
||||
(spawned from the agent's environment), plugin loading, hook
|
||||
dispatch, and skill loading (all imported into the agent
|
||||
interpreter).
|
||||
|
||||
Terminal-backend isolation is the right posture when the concern is
|
||||
LLM-emitted destructive shell or unwanted file-tool writes, and the
|
||||
operator is otherwise trusted.
|
||||
|
||||
#### Whole-process wrapping
|
||||
|
||||
Whole-process wrapping runs the entire agent process tree inside a
|
||||
sandbox. Every code path — shell, code-execution, MCP, file tools,
|
||||
plugins, hooks, skill loading — is subject to the same filesystem,
|
||||
network, process, and (where applicable) inference policy.
|
||||
|
||||
Hermes Agent supports this in two ways:
|
||||
|
||||
- **Hermes Agent's own Docker image and Compose setup.** Lighter-
|
||||
weight; the agent runs in a standard container with operator-
|
||||
configured mounts and network policy.
|
||||
- **[NVIDIA OpenShell](https://github.com/NVIDIA/OpenShell)**.
|
||||
OpenShell provides per-session sandboxes with declarative policy
|
||||
across filesystem, network (L7 egress), process/syscall, and
|
||||
inference-routing layers. Network and inference policies are
|
||||
hot-reloadable. Credentials are injected from a Provider store
|
||||
and never touch the sandbox filesystem.
|
||||
|
||||
Under a whole-process wrapper, Hermes Agent's in-process heuristics
|
||||
(§2.4) function as accident-prevention layered on top of a real
|
||||
boundary. This is the supported posture when the agent ingests
|
||||
content from surfaces the operator does not control — the open web,
|
||||
inbound email, multi-user channels, untrusted MCP servers — and for
|
||||
production or shared deployments.
|
||||
|
||||
Operators running the default local backend with untrusted input
|
||||
surfaces, or running a terminal-backend sandbox and expecting it to
|
||||
contain code paths that don't go through the shell, are operating
|
||||
outside the supported security posture.
|
||||
|
||||
### 2.3 Credential Scoping
|
||||
|
||||
Hermes Agent filters the environment it passes to its lower-trust
|
||||
in-process components: shell subprocesses, MCP subprocesses, and
|
||||
the code-execution child. Credentials like provider API keys and
|
||||
gateway tokens are stripped by default; variables explicitly
|
||||
declared by the operator or by a loaded skill are passed through.
|
||||
|
||||
This reduces casual exfiltration. It is not containment. Any
|
||||
component running inside the agent process (skills, plugins, hook
|
||||
handlers) can read whatever the agent itself can read, including
|
||||
in-memory credentials. The mitigation against a compromised
|
||||
in-process component is operator review before install (§2.4,
|
||||
§2.5), not environment scrubbing.
|
||||
|
||||
### 2.4 In-Process Heuristics
|
||||
|
||||
The following components screen or warn about LLM behavior. They
|
||||
are useful. They are not boundaries.
|
||||
|
||||
- The **approval gate** detects common destructive shell patterns
|
||||
and prompts the operator before execution. Shell is Turing-
|
||||
complete; a denylist over shell strings is structurally
|
||||
incomplete. The gate catches cooperative-mode mistakes, not
|
||||
adversarial output.
|
||||
- **Output redaction** strips secret-like patterns from display.
|
||||
A motivated output producer will defeat it.
|
||||
- **Skills Guard** scans installable skill content for injection
|
||||
patterns. It is a review aid; the boundary for third-party skills
|
||||
is operator review before install. Reviewing a skill means
|
||||
reading its Python code and scripts, not just its SKILL.md
|
||||
description — skills execute arbitrary Python at import time.
|
||||
|
||||
### 2.5 Plugin Trust Model
|
||||
|
||||
Plugins load into the agent process and run with full agent
|
||||
privileges: they can read the same credentials, call the same
|
||||
tools, register the same hooks, and import the same modules as
|
||||
anything shipped in-tree. The boundary for third-party plugins is
|
||||
operator review before install — the same rule as skills (§2.4),
|
||||
called out separately because plugins are architecturally heavier
|
||||
and often ship their own background services, network listeners,
|
||||
and dependencies.
|
||||
|
||||
A malicious or buggy plugin is not a vulnerability in Hermes Agent
|
||||
itself. Bugs in Hermes Agent's plugin-install or plugin-discovery
|
||||
path that prevent the operator from seeing what they're installing
|
||||
are in scope under §3.1.
|
||||
|
||||
### 2.6 External Surfaces
|
||||
|
||||
An **external surface** is any channel outside the local agent
|
||||
process through which a caller can dispatch agent work, resolve
|
||||
approvals, or receive agent output. Each surface has its own
|
||||
authorization model, but the rules below apply uniformly.
|
||||
|
||||
**Surfaces in Hermes Agent:**
|
||||
|
||||
- **Gateway platform adapters.** Messaging integrations in
|
||||
`gateway/platforms/` (Telegram, Discord, Slack, email, SMS, etc.)
|
||||
and analogous adapters shipped as plugins.
|
||||
- **Network-exposed HTTP surfaces.** The API server adapter, the
|
||||
dashboard plugin, the kanban plugin's HTTP endpoints, and any
|
||||
other plugin that binds a listening socket.
|
||||
- **Editor / IDE adapters.** The ACP adapter (`acp_adapter/`) and
|
||||
equivalent integrations that accept requests from a local client
|
||||
process.
|
||||
- **The TUI gateway (`tui_gateway/`).** JSON-RPC backend for the
|
||||
Ink terminal UI, reached over local IPC.
|
||||
|
||||
**Uniform rules:**
|
||||
|
||||
1. **Authorization is required at every surface that crosses a
|
||||
trust boundary.** For messaging and network HTTP surfaces, the
|
||||
boundary is the network: authorization means an operator-
|
||||
configured caller allowlist. For editor and local-IPC surfaces
|
||||
(ACP, TUI gateway), the boundary is the host's user account:
|
||||
authorization means relying on OS-level access control (file
|
||||
permissions, loopback-only binds) and not exposing the surface
|
||||
beyond the local user without an explicit network auth layer.
|
||||
2. **An allowlist is required for every enabled network-exposed
|
||||
adapter.** Adapters must refuse to dispatch agent work, resolve
|
||||
approvals, or relay output until an allowlist is set. Code paths
|
||||
that fail open when no allowlist is configured are code bugs in
|
||||
scope under §3.1.
|
||||
3. **Session identifiers are routing handles, not authorization
|
||||
boundaries.** Knowing another caller's session ID does not grant
|
||||
access to their approvals or output; authorization is always
|
||||
re-checked against the allowlist (or OS-level equivalent).
|
||||
4. **Within the authorized set, all callers are equally trusted.**
|
||||
Hermes Agent does not model per-caller capabilities inside a
|
||||
single adapter. Operators who need capability separation should
|
||||
run separate agent instances with separate allowlists.
|
||||
5. **Binding a local-only surface to a non-loopback interface is a
|
||||
break-glass operator decision (§3.2).** The dashboard and other
|
||||
plugin HTTP servers default to loopback; exposing them via
|
||||
`--host 0.0.0.0` or equivalent makes public-exposure hardening
|
||||
(§4) the operator's responsibility.
|
||||
### Subagents
|
||||
- **No recursive delegation:** The `delegate_task` tool is disabled for child agents.
|
||||
- **Depth limit:** `MAX_DEPTH = 2` — parent (depth 0) can spawn a child (depth 1); grandchildren are rejected.
|
||||
- **Memory isolation:** Subagents run with `skip_memory=True` and do not have access to the parent's persistent memory provider. The parent receives only the task prompt and final response as an observation.
|
||||
|
||||
---
|
||||
|
||||
## 3. Scope
|
||||
## 3. Out of Scope (Non-Vulnerabilities)
|
||||
|
||||
### 3.1 In Scope
|
||||
|
||||
- Escape from a declared OS-level isolation posture (§2.2): an
|
||||
attacker-controlled code path reaching state that the posture
|
||||
claimed to confine.
|
||||
- Unauthorized external-surface access: a caller outside the
|
||||
configured authorization set (allowlist, or OS-level equivalent
|
||||
for local-IPC surfaces) dispatching work, receiving output, or
|
||||
resolving approvals (§2.6).
|
||||
- Credential exfiltration: leakage of operator credentials or
|
||||
session authorization material to a destination outside the
|
||||
trust envelope, via a mechanism that should have prevented it
|
||||
(environment scrubbing bug, adapter logging, transport error
|
||||
that flushes credentials to an upstream, etc.).
|
||||
- Trust-model documentation violations: code behaving contrary to
|
||||
what this policy, Hermes Agent's own documentation, or reasonable
|
||||
operator expectations would predict — including cases where
|
||||
Hermes Agent has documented a stance about how its output should
|
||||
be rendered by a consuming layer (dashboard, gateway adapter,
|
||||
file writer, shell) and a code path breaks that stance.
|
||||
|
||||
### 3.2 Out of Scope
|
||||
|
||||
"Out of scope" here means "not a security vulnerability under this
|
||||
policy." It does not mean "not worth reporting." Improvements to the
|
||||
in-process heuristics, hardening ideas, and UX fixes are welcome as
|
||||
regular issues or pull requests — the approval gate can always catch
|
||||
more patterns, redaction can always get smarter, adapter behavior
|
||||
can always be tightened. These items just don't go through the
|
||||
private-disclosure channel and don't receive advisories.
|
||||
|
||||
- **Bypasses of in-process heuristics (§2.4)** — approval-gate regex
|
||||
bypasses, redaction bypasses, Skills Guard pattern bypasses, and
|
||||
analogous reports against future heuristics. These components are
|
||||
not boundaries; defeating them is not a vulnerability under this
|
||||
policy.
|
||||
- **Prompt injection per se.** Getting the LLM to emit unusual
|
||||
output — via injected content, hallucination, training artifacts,
|
||||
or any other cause — is not itself a vulnerability. "I achieved
|
||||
prompt injection" without a chained §3.1 outcome is not an
|
||||
actionable report under this policy.
|
||||
- **Consequences of a chosen isolation posture.** Reports that a
|
||||
code path operating within its posture's scope can do what that
|
||||
posture permits are not vulnerabilities. Examples: shell or file
|
||||
tools reaching host state under the local backend; code-execution
|
||||
or MCP subprocesses reaching host state under terminal-backend
|
||||
isolation that only sandboxes shell; reports whose preconditions
|
||||
require pre-existing write access to operator-owned configuration
|
||||
or credential files (those are already inside the trust envelope).
|
||||
- **Documented break-glass settings.** Operator-selected trade-offs
|
||||
that explicitly disable protections: `--insecure` and equivalent
|
||||
flags on the dashboard or other components, disabled approvals,
|
||||
local backend in production, development profiles that bypass
|
||||
hermes-home security, and similar. Reports against those
|
||||
configurations are not vulnerabilities — that's the flag's job.
|
||||
- **Community-contributed skills and plugins.** Third-party skills
|
||||
(including the community skills repository) and third-party
|
||||
plugins are in the operator's review surface, not Hermes Agent's
|
||||
trust surface (§2.4, §2.5). A skill or plugin doing something
|
||||
malicious is the expected failure mode of one that wasn't
|
||||
reviewed, not a vulnerability in Hermes Agent. Bugs in Hermes
|
||||
Agent's skill-install or plugin-install path that prevent the
|
||||
operator from seeing what they're installing are in scope under
|
||||
§3.1.
|
||||
- **Public exposure without external controls.** Exposing the
|
||||
gateway or API to the public internet without authentication,
|
||||
VPN, or firewall.
|
||||
- **Tool-level read/write restrictions on a posture where shell is
|
||||
permitted.** If a path is reachable via the terminal tool, reports
|
||||
that other file tools can reach it add nothing.
|
||||
The following scenarios are **not** considered security breaches:
|
||||
- **Prompt Injection:** Unless it results in a concrete bypass of the approval system, toolset restrictions, or container sandbox.
|
||||
- **Public Exposure:** Deploying the gateway to the public internet without external authentication or network protection.
|
||||
- **Trusted State Access:** Reports that require pre-existing write access to `~/.hermes/`, `.env`, or `config.yaml` (these are operator-owned files).
|
||||
- **Default Behavior:** Host-level command execution when `terminal.backend` is set to `local` — this is the documented default, not a vulnerability.
|
||||
- **Configuration Trade-offs:** Intentional break-glass settings such as `approvals.mode: "off"` or `terminal.backend: local` in production.
|
||||
- **Tool-level read/access restrictions:** The agent has unrestricted shell access via the `terminal` tool by design. Reports that a specific tool (e.g., `read_file`) can access a resource are not vulnerabilities if the same access is available through `terminal`. Tool-level deny lists only constitute a meaningful security boundary when paired with equivalent restrictions on the terminal side (as with write operations, where `WRITE_DENIED_PATHS` is paired with the dangerous command approval system).
|
||||
|
||||
---
|
||||
|
||||
## 4. Deployment Hardening
|
||||
## 4. Deployment Hardening & Best Practices
|
||||
|
||||
The single most important hardening decision is matching isolation
|
||||
(§2.2) to the trust of the content the agent will ingest. Beyond
|
||||
that:
|
||||
### Filesystem & Network
|
||||
- **Production sandboxing:** Use container backends (`docker`, `modal`, `daytona`) instead of `local` for untrusted workloads.
|
||||
- **File permissions:** Run as non-root (the Docker image uses UID 10000); protect credentials with `chmod 600 ~/.hermes/.env` on local installs.
|
||||
- **Network exposure:** Do not expose the gateway or API server to the public internet without VPN, Tailscale, or firewall protection. SSRF protection is enabled by default across all gateway platform adapters (Telegram, Discord, Slack, Matrix, Mattermost, etc.) with redirect validation. Note: the local terminal backend does not apply SSRF filtering, as it operates within the trusted operator's environment.
|
||||
|
||||
- Run the agent as a non-root user. The supplied container image
|
||||
does this by default.
|
||||
- Keep credentials in the operator credential file with tight
|
||||
permissions, never in the main config, never in version control.
|
||||
Under OpenShell, use the Provider store rather than an on-disk
|
||||
credential file.
|
||||
- Do not expose the gateway or API to the public internet without
|
||||
VPN, Tailscale, or firewall protection. Under OpenShell, use the
|
||||
network policy layer to restrict egress.
|
||||
- Configure a caller allowlist for every network-exposed adapter
|
||||
you enable (§2.6).
|
||||
- Review third-party skills and plugins before install (§2.4,
|
||||
§2.5). For skills, this means reading the Python and scripts,
|
||||
not just SKILL.md. Skills Guard reports and the install audit
|
||||
log are the review surface.
|
||||
- Hermes Agent includes supply-chain guards for MCP server
|
||||
launches and for dependency / bundled-package changes in CI; see
|
||||
`CONTRIBUTING.md` for specifics.
|
||||
### Skills & Supply Chain
|
||||
- **Skill installation:** Review Skills Guard reports (`tools/skills_guard.py`) before installing third-party skills. The audit log at `~/.hermes/skills/.hub/audit.log` tracks every install and removal.
|
||||
- **MCP safety:** OSV malware checking runs automatically for `npx`/`uvx` packages before MCP server processes are spawned.
|
||||
- **CI/CD:** GitHub Actions are pinned to full commit SHAs. The `supply-chain-audit.yml` workflow blocks PRs containing `.pth` files or suspicious `base64`+`exec` patterns.
|
||||
|
||||
### Credential Storage
|
||||
- API keys and tokens belong exclusively in `~/.hermes/.env` — never in `config.yaml` or checked into version control.
|
||||
- The credential pool system (`agent/credential_pool.py`) handles key rotation and fallback. Credentials are resolved from environment variables, not stored in plaintext databases.
|
||||
|
||||
---
|
||||
|
||||
## 5. Disclosure
|
||||
## 5. Disclosure Process
|
||||
|
||||
- **Coordinated disclosure window:** 90 days from report, or until a
|
||||
fix is released, whichever comes first.
|
||||
- **Channel:** the GHSA thread or email correspondence with
|
||||
security@nousresearch.com.
|
||||
- **Credit:** reporters are credited in release notes unless
|
||||
anonymity is requested.
|
||||
- **Coordinated Disclosure:** 90-day window or until a fix is released, whichever comes first.
|
||||
- **Communication:** All updates occur via the GHSA thread or email correspondence with security@nousresearch.com.
|
||||
- **Credits:** Reporters are credited in release notes unless anonymity is requested.
|
||||
|
||||
@@ -1,141 +0,0 @@
|
||||
"""ACP permission bridging for Hermes dangerous-command approvals."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import TimeoutError as FutureTimeout
|
||||
from itertools import count
|
||||
from typing import Callable
|
||||
|
||||
from acp.schema import (
|
||||
AllowedOutcome,
|
||||
PermissionOption,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maps ACP permission option ids to Hermes approval result strings.
|
||||
# Option ids are stable across both the ``allow_permanent=True`` and
|
||||
# ``allow_permanent=False`` paths even though the option list differs.
|
||||
_OPTION_ID_TO_HERMES = {
|
||||
"allow_once": "once",
|
||||
"allow_session": "session",
|
||||
"allow_always": "always",
|
||||
"deny": "deny",
|
||||
}
|
||||
|
||||
_PERMISSION_REQUEST_IDS = count(1)
|
||||
|
||||
|
||||
def _build_permission_options(*, allow_permanent: bool) -> list[PermissionOption]:
|
||||
"""Return ACP options that match Hermes approval semantics."""
|
||||
options = [
|
||||
PermissionOption(option_id="allow_once", kind="allow_once", name="Allow once"),
|
||||
PermissionOption(
|
||||
option_id="allow_session",
|
||||
# ACP has no session-scoped kind, so use the closest persistent
|
||||
# hint while keeping Hermes semantics in the option id.
|
||||
kind="allow_always",
|
||||
name="Allow for session",
|
||||
),
|
||||
]
|
||||
if allow_permanent:
|
||||
options.append(
|
||||
PermissionOption(
|
||||
option_id="allow_always",
|
||||
kind="allow_always",
|
||||
name="Allow always",
|
||||
),
|
||||
)
|
||||
options.append(PermissionOption(option_id="deny", kind="reject_once", name="Deny"))
|
||||
return options
|
||||
|
||||
|
||||
def _build_permission_tool_call(command: str, description: str):
|
||||
"""Return the ACP tool-call update attached to a permission request.
|
||||
|
||||
``request_permission`` expects a ``ToolCallUpdate`` payload — produced
|
||||
by ``_acp.update_tool_call`` — not a ``ToolCallStart``. Each request
|
||||
gets a unique ``perm-check-N`` id so concurrent requests don't collide.
|
||||
"""
|
||||
import acp as _acp
|
||||
|
||||
tool_call_id = f"perm-check-{next(_PERMISSION_REQUEST_IDS)}"
|
||||
return _acp.update_tool_call(
|
||||
tool_call_id,
|
||||
title=description,
|
||||
kind="execute",
|
||||
status="pending",
|
||||
content=[_acp.tool_content(_acp.text_block(f"$ {command}"))],
|
||||
raw_input={"command": command, "description": description},
|
||||
)
|
||||
|
||||
|
||||
def _map_outcome_to_hermes(outcome: object, *, allowed_option_ids: set[str]) -> str:
|
||||
"""Map an ACP permission outcome into Hermes approval strings."""
|
||||
if not isinstance(outcome, AllowedOutcome):
|
||||
return "deny"
|
||||
|
||||
option_id = outcome.option_id
|
||||
if option_id not in allowed_option_ids:
|
||||
logger.warning("Permission request returned unknown option_id: %s", option_id)
|
||||
return "deny"
|
||||
return _OPTION_ID_TO_HERMES.get(option_id, "deny")
|
||||
|
||||
|
||||
def make_approval_callback(
|
||||
request_permission_fn: Callable,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
session_id: str,
|
||||
timeout: float = 60.0,
|
||||
) -> Callable[..., str]:
|
||||
"""
|
||||
Return a Hermes-compatible approval callback that bridges to ACP.
|
||||
|
||||
The callback accepts ``command`` and ``description`` plus optional
|
||||
keyword arguments such as ``allow_permanent`` used by
|
||||
``tools.approval.prompt_dangerous_approval()``.
|
||||
|
||||
Args:
|
||||
request_permission_fn: The ACP connection's ``request_permission`` coroutine.
|
||||
loop: The event loop on which the ACP connection lives.
|
||||
session_id: Current ACP session id.
|
||||
timeout: Seconds to wait for a response before auto-denying.
|
||||
"""
|
||||
|
||||
def _callback(
|
||||
command: str,
|
||||
description: str,
|
||||
*,
|
||||
allow_permanent: bool = True,
|
||||
**_: object,
|
||||
) -> str:
|
||||
options = _build_permission_options(allow_permanent=allow_permanent)
|
||||
|
||||
future = None
|
||||
try:
|
||||
tool_call = _build_permission_tool_call(command, description)
|
||||
coro = request_permission_fn(
|
||||
session_id=session_id,
|
||||
tool_call=tool_call,
|
||||
options=options,
|
||||
)
|
||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
response = future.result(timeout=timeout)
|
||||
except (FutureTimeout, Exception) as exc:
|
||||
if future is not None:
|
||||
future.cancel()
|
||||
logger.warning("Permission request timed out or failed: %s", exc)
|
||||
return "deny"
|
||||
|
||||
if response is None:
|
||||
return "deny"
|
||||
|
||||
allowed_option_ids = {option.option_id for option in options}
|
||||
return _map_outcome_to_hermes(
|
||||
response.outcome,
|
||||
allowed_option_ids=allowed_option_ids,
|
||||
)
|
||||
|
||||
return _callback
|
||||
File diff suppressed because it is too large
Load Diff
1180
acp_adapter/tools.py
1180
acp_adapter/tools.py
File diff suppressed because it is too large
Load Diff
1781
agent/curator.py
1781
agent/curator.py
File diff suppressed because it is too large
Load Diff
@@ -1,693 +0,0 @@
|
||||
"""Curator snapshot + rollback.
|
||||
|
||||
A pre-run snapshot of ``~/.hermes/skills/`` (excluding ``.curator_backups/``
|
||||
itself) is taken before any mutating curator pass. Snapshots are tar.gz
|
||||
files under ``~/.hermes/skills/.curator_backups/<utc-iso>/`` with a
|
||||
companion ``manifest.json`` describing the snapshot (reason, time, size,
|
||||
counted skill files). Rollback picks a snapshot, moves the current
|
||||
``skills/`` tree aside into another snapshot so even the rollback itself
|
||||
is undoable, then extracts the chosen snapshot into place.
|
||||
|
||||
The snapshot does NOT include:
|
||||
- ``.curator_backups/`` (would recurse)
|
||||
- ``.hub/`` (hub-installed skills — managed by the hub, not us)
|
||||
|
||||
It DOES include:
|
||||
- all SKILL.md files + their directories (``scripts/``, ``references/``,
|
||||
``templates/``, ``assets/``)
|
||||
- ``.usage.json`` (usage telemetry — needed to rehydrate state cleanly)
|
||||
- ``.archive/`` (so rollback restores previously-archived skills too)
|
||||
- ``.curator_state`` (so rolling back also restores the last-run-at
|
||||
pointer — otherwise the curator would immediately re-fire on the next
|
||||
tick)
|
||||
- ``.bundled_manifest`` (so protection markers stay consistent)
|
||||
|
||||
Alongside the skills tarball, each snapshot also captures a copy of
|
||||
``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
|
||||
jobs reference skills by name in their ``skills``/``skill`` fields; the
|
||||
curator's consolidation pass rewrites those in place via
|
||||
``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
|
||||
rolling back the skills tree would leave cron jobs pointing at the
|
||||
umbrella skills even though the narrow skills they were originally
|
||||
configured with have been restored. We store the whole jobs.json for
|
||||
fidelity but rollback only touches the ``skills``/``skill`` fields — the
|
||||
rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
|
||||
we leave it alone.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tarfile
|
||||
import tempfile
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_KEEP = 5
|
||||
|
||||
# Entries under skills/ that should NEVER be rolled up into a snapshot.
|
||||
# .hub/ is managed by the skills hub; rolling it back would break lockfile
|
||||
# invariants. .curator_backups is the backup dir itself — recursion bomb.
|
||||
_EXCLUDE_TOP_LEVEL = {".curator_backups", ".hub"}
|
||||
|
||||
# Snapshot id regex: UTC ISO with colons replaced by dashes so the filename
|
||||
# is portable (Windows-safe). An optional ``-NN`` suffix handles two
|
||||
# snapshots landing in the same wallclock second.
|
||||
_ID_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z(-\d{2})?$")
|
||||
|
||||
|
||||
def _backups_dir() -> Path:
|
||||
return get_hermes_home() / "skills" / ".curator_backups"
|
||||
|
||||
|
||||
def _skills_dir() -> Path:
|
||||
return get_hermes_home() / "skills"
|
||||
|
||||
|
||||
def _cron_jobs_file() -> Path:
|
||||
"""Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
|
||||
return get_hermes_home() / "cron" / "jobs.json"
|
||||
|
||||
|
||||
CRON_JOBS_FILENAME = "cron-jobs.json"
|
||||
|
||||
|
||||
def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
|
||||
"""Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
|
||||
|
||||
Returns a small dict describing what was captured so the caller can
|
||||
fold it into the manifest. Never raises — if the cron file is missing
|
||||
or unreadable, the return dict has ``backed_up=False`` and the reason,
|
||||
and the snapshot proceeds without cron data (the snapshot is still
|
||||
useful for rolling back skills).
|
||||
"""
|
||||
src = _cron_jobs_file()
|
||||
info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
|
||||
if not src.exists():
|
||||
info["reason"] = "no cron/jobs.json present"
|
||||
return info
|
||||
try:
|
||||
raw = src.read_text(encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to read cron/jobs.json for backup: %s", e)
|
||||
info["reason"] = f"read error: {e}"
|
||||
return info
|
||||
# Count jobs as a nice diagnostic — but don't fail the snapshot if the
|
||||
# file is unparseable; just store the raw text and let rollback deal
|
||||
# with it (or not, if it's corrupted). jobs.json wraps the list as
|
||||
# `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
|
||||
# fall back to bare-list shape just in case the format ever changes.
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
inner = parsed.get("jobs")
|
||||
if isinstance(inner, list):
|
||||
info["jobs_count"] = len(inner)
|
||||
elif isinstance(parsed, list):
|
||||
info["jobs_count"] = len(parsed)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
info["jobs_count"] = 0
|
||||
info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
|
||||
try:
|
||||
(dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to write cron backup file: %s", e)
|
||||
info["reason"] = f"write error: {e}"
|
||||
return info
|
||||
info["backed_up"] = True
|
||||
return info
|
||||
|
||||
|
||||
def _utc_id(now: Optional[datetime] = None) -> str:
|
||||
"""UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
# isoformat → "2026-05-01T13:05:42.123456+00:00"; strip subseconds and tz.
|
||||
s = now.replace(microsecond=0).isoformat()
|
||||
if s.endswith("+00:00"):
|
||||
s = s[:-6]
|
||||
return s.replace(":", "-") + "Z"
|
||||
|
||||
|
||||
def _load_config() -> Dict[str, Any]:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load config for curator backup: %s", e)
|
||||
return {}
|
||||
if not isinstance(cfg, dict):
|
||||
return {}
|
||||
cur = cfg.get("curator") or {}
|
||||
if not isinstance(cur, dict):
|
||||
return {}
|
||||
bk = cur.get("backup") or {}
|
||||
return bk if isinstance(bk, dict) else {}
|
||||
|
||||
|
||||
def is_enabled() -> bool:
|
||||
"""Default ON — the whole point of the backup is safety by default."""
|
||||
return bool(_load_config().get("enabled", True))
|
||||
|
||||
|
||||
def get_keep() -> int:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
n = int(cfg.get("keep", DEFAULT_KEEP))
|
||||
except (TypeError, ValueError):
|
||||
n = DEFAULT_KEEP
|
||||
return max(1, n)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Snapshot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _count_skill_files(base: Path) -> int:
|
||||
try:
|
||||
return sum(1 for _ in base.rglob("SKILL.md"))
|
||||
except OSError:
|
||||
return 0
|
||||
|
||||
|
||||
def _write_manifest(dest: Path, reason: str, archive_path: Path,
|
||||
skills_counted: int,
|
||||
cron_info: Optional[Dict[str, Any]] = None) -> None:
|
||||
manifest = {
|
||||
"id": dest.name,
|
||||
"reason": reason,
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"archive": archive_path.name,
|
||||
"archive_bytes": archive_path.stat().st_size,
|
||||
"skill_files": skills_counted,
|
||||
}
|
||||
if cron_info is not None:
|
||||
manifest["cron_jobs"] = {
|
||||
"backed_up": bool(cron_info.get("backed_up", False)),
|
||||
"jobs_count": int(cron_info.get("jobs_count", 0)),
|
||||
}
|
||||
if not cron_info.get("backed_up"):
|
||||
manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
|
||||
if cron_info.get("parse_warning"):
|
||||
manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
|
||||
(dest / "manifest.json").write_text(
|
||||
json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
|
||||
)
|
||||
|
||||
|
||||
def snapshot_skills(reason: str = "manual") -> Optional[Path]:
|
||||
"""Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
|
||||
|
||||
Returns the snapshot directory path, or ``None`` if the snapshot was
|
||||
skipped (backup disabled, skills dir missing, or an IO error occurred —
|
||||
in which case we log at debug and return None so the curator never
|
||||
aborts a pass because of a backup failure).
|
||||
"""
|
||||
if not is_enabled():
|
||||
logger.debug("Curator backup disabled by config; skipping snapshot")
|
||||
return None
|
||||
|
||||
skills = _skills_dir()
|
||||
if not skills.exists():
|
||||
logger.debug("No ~/.hermes/skills/ directory — nothing to back up")
|
||||
return None
|
||||
|
||||
backups = _backups_dir()
|
||||
try:
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create backups dir %s: %s", backups, e)
|
||||
return None
|
||||
|
||||
# Uniquify: if a snapshot with the same second already exists (can
|
||||
# happen if two curator runs fire in the same second), append a short
|
||||
# counter. Avoids clobbering and avoids timestamp collisions.
|
||||
base_id = _utc_id()
|
||||
snap_id = base_id
|
||||
counter = 1
|
||||
while (backups / snap_id).exists():
|
||||
snap_id = f"{base_id}-{counter:02d}"
|
||||
counter += 1
|
||||
|
||||
dest = backups / snap_id
|
||||
try:
|
||||
dest.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to create snapshot dir %s: %s", dest, e)
|
||||
return None
|
||||
|
||||
archive = dest / "skills.tar.gz"
|
||||
try:
|
||||
# Stream into the tarball — no tempdir copy needed.
|
||||
with tarfile.open(archive, "w:gz", compresslevel=6) as tf:
|
||||
for entry in sorted(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
# arcname: store paths relative to skills/ so extraction
|
||||
# drops cleanly back into the skills dir.
|
||||
tf.add(str(entry), arcname=entry.name, recursive=True)
|
||||
# Capture cron/jobs.json alongside the tarball. Never fails the
|
||||
# snapshot — the skills side is the core guarantee; cron is
|
||||
# additive. We still record in the manifest whether it was
|
||||
# captured so rollback can surface "no cron data in this snapshot".
|
||||
cron_info = _backup_cron_jobs_into(dest)
|
||||
_write_manifest(dest, reason, archive,
|
||||
_count_skill_files(skills),
|
||||
cron_info=cron_info)
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
logger.debug("Curator snapshot failed: %s", e, exc_info=True)
|
||||
# Clean up partial snapshot
|
||||
try:
|
||||
shutil.rmtree(dest, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
_prune_old(keep=get_keep())
|
||||
logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
|
||||
return dest
|
||||
|
||||
|
||||
def _prune_old(keep: int) -> List[str]:
|
||||
"""Delete regular snapshots beyond the newest *keep*. Returns deleted
|
||||
ids. Staging dirs (``.rollback-staging-*``) are implementation detail
|
||||
and pruned independently on every call."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
entries: List[Tuple[str, Path]] = []
|
||||
stale_staging: List[Path] = []
|
||||
for child in backups.iterdir():
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if child.name.startswith(".rollback-staging-"):
|
||||
# Staging dirs are only supposed to exist briefly during a
|
||||
# rollback. If we find one here (e.g. from a crashed rollback),
|
||||
# clean it up opportunistically.
|
||||
stale_staging.append(child)
|
||||
continue
|
||||
if _ID_RE.match(child.name):
|
||||
entries.append((child.name, child))
|
||||
# Newest first (lexicographic works because the id is UTC ISO).
|
||||
entries.sort(key=lambda t: t[0], reverse=True)
|
||||
deleted: List[str] = []
|
||||
for _, path in entries[keep:]:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
deleted.append(path.name)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to prune %s: %s", path, e)
|
||||
for path in stale_staging:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to clean stale staging dir %s: %s", path, e)
|
||||
return deleted
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# List + rollback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_manifest(snap_dir: Path) -> Dict[str, Any]:
|
||||
mf = snap_dir / "manifest.json"
|
||||
if not mf.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(mf.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def list_backups() -> List[Dict[str, Any]]:
|
||||
"""Return all restorable snapshots, newest first. Only entries with a
|
||||
real ``skills.tar.gz`` tarball are listed — transient
|
||||
``.rollback-staging-*`` directories created mid-rollback are
|
||||
implementation detail and not shown."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return []
|
||||
out: List[Dict[str, Any]] = []
|
||||
for child in sorted(backups.iterdir(), reverse=True):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
if not _ID_RE.match(child.name):
|
||||
continue
|
||||
if not (child / "skills.tar.gz").exists():
|
||||
continue
|
||||
mf = _read_manifest(child)
|
||||
mf.setdefault("id", child.name)
|
||||
mf.setdefault("path", str(child))
|
||||
if "archive_bytes" not in mf:
|
||||
arc = child / "skills.tar.gz"
|
||||
try:
|
||||
mf["archive_bytes"] = arc.stat().st_size
|
||||
except OSError:
|
||||
mf["archive_bytes"] = 0
|
||||
out.append(mf)
|
||||
return out
|
||||
|
||||
|
||||
def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
|
||||
"""Return the path of the requested backup, or the newest one if
|
||||
*backup_id* is None. Returns None if no match."""
|
||||
backups = _backups_dir()
|
||||
if not backups.exists():
|
||||
return None
|
||||
if backup_id:
|
||||
target = backups / backup_id
|
||||
if (
|
||||
target.is_dir()
|
||||
and _ID_RE.match(backup_id)
|
||||
and (target / "skills.tar.gz").exists()
|
||||
):
|
||||
return target
|
||||
return None
|
||||
candidates = [
|
||||
c for c in sorted(backups.iterdir(), reverse=True)
|
||||
if c.is_dir() and _ID_RE.match(c.name) and (c / "skills.tar.gz").exists()
|
||||
]
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
|
||||
"""Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
|
||||
|
||||
We do NOT overwrite the whole cron file. Only the ``skills`` and
|
||||
``skill`` fields are restored, and only on jobs that still exist in the
|
||||
current file (matched by ``id``). Everything else about the job —
|
||||
schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks —
|
||||
is live state that the user/scheduler has modified since the snapshot;
|
||||
overwriting it would regress unrelated cron activity.
|
||||
|
||||
Rules:
|
||||
- Jobs present in backup AND live, with differing skills → skills restored.
|
||||
- Jobs present in backup AND live, with matching skills → no-op.
|
||||
- Jobs present in backup but gone from live (user deleted the job
|
||||
after the snapshot) → skipped, noted in the return report.
|
||||
- Jobs present in live but not in backup (user created a new cron
|
||||
job after the snapshot) → left untouched.
|
||||
|
||||
Never raises; failures are captured in the return dict. Writes through
|
||||
``cron.jobs`` to pick up the same lock + atomic-write path that tick()
|
||||
uses, so we don't race the scheduler.
|
||||
"""
|
||||
report: Dict[str, Any] = {
|
||||
"attempted": False,
|
||||
"restored": [],
|
||||
"skipped_missing": [],
|
||||
"unchanged": 0,
|
||||
"error": None,
|
||||
}
|
||||
backup_file = snapshot_dir / CRON_JOBS_FILENAME
|
||||
if not backup_file.exists():
|
||||
report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
|
||||
return report
|
||||
|
||||
try:
|
||||
backup_text = backup_file.read_text(encoding="utf-8")
|
||||
backup_parsed = json.loads(backup_text)
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
report["error"] = f"failed to load backed-up jobs: {e}"
|
||||
return report
|
||||
# jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
|
||||
# that shape and a bare list for forward compat.
|
||||
if isinstance(backup_parsed, dict):
|
||||
backup_jobs = backup_parsed.get("jobs")
|
||||
elif isinstance(backup_parsed, list):
|
||||
backup_jobs = backup_parsed
|
||||
else:
|
||||
backup_jobs = None
|
||||
if not isinstance(backup_jobs, list):
|
||||
report["error"] = "backed-up cron-jobs.json has no jobs list"
|
||||
return report
|
||||
|
||||
# Build a lookup of the backed-up skill state keyed by job id.
|
||||
# We only need the two skill-ish fields (legacy single and modern list).
|
||||
backup_by_id: Dict[str, Dict[str, Any]] = {}
|
||||
for job in backup_jobs:
|
||||
if not isinstance(job, dict):
|
||||
continue
|
||||
jid = job.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
backup_by_id[jid] = {
|
||||
"skills": job.get("skills"),
|
||||
"skill": job.get("skill"),
|
||||
"name": job.get("name") or jid,
|
||||
}
|
||||
|
||||
if not backup_by_id:
|
||||
report["attempted"] = True # we tried but there was nothing to do
|
||||
return report
|
||||
|
||||
# Load and rewrite the live jobs under the scheduler's lock.
|
||||
try:
|
||||
from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
|
||||
except ImportError as e:
|
||||
report["error"] = f"cron module unavailable: {e}"
|
||||
return report
|
||||
|
||||
report["attempted"] = True
|
||||
try:
|
||||
with _jobs_file_lock:
|
||||
live_jobs = load_jobs()
|
||||
changed = False
|
||||
|
||||
live_ids = set()
|
||||
for live in live_jobs:
|
||||
if not isinstance(live, dict):
|
||||
continue
|
||||
jid = live.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
live_ids.add(jid)
|
||||
|
||||
backup = backup_by_id.get(jid)
|
||||
if backup is None:
|
||||
continue # live job didn't exist at snapshot time
|
||||
|
||||
cur_skills = live.get("skills")
|
||||
cur_skill = live.get("skill")
|
||||
bkp_skills = backup.get("skills")
|
||||
bkp_skill = backup.get("skill")
|
||||
|
||||
if cur_skills == bkp_skills and cur_skill == bkp_skill:
|
||||
report["unchanged"] += 1
|
||||
continue
|
||||
|
||||
# Restore. Preserve absence (don't force the key to appear
|
||||
# if the backup didn't have it either).
|
||||
if bkp_skills is None:
|
||||
live.pop("skills", None)
|
||||
else:
|
||||
live["skills"] = bkp_skills
|
||||
if bkp_skill is None:
|
||||
live.pop("skill", None)
|
||||
else:
|
||||
live["skill"] = bkp_skill
|
||||
|
||||
report["restored"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
"from": {"skills": cur_skills, "skill": cur_skill},
|
||||
"to": {"skills": bkp_skills, "skill": bkp_skill},
|
||||
})
|
||||
changed = True
|
||||
|
||||
# Jobs in backup but not in live = user deleted them after snapshot
|
||||
for jid, backup in backup_by_id.items():
|
||||
if jid not in live_ids:
|
||||
report["skipped_missing"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
})
|
||||
|
||||
if changed:
|
||||
save_jobs(live_jobs)
|
||||
except Exception as e: # noqa: BLE001 — rollback must not die mid-restore
|
||||
logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
|
||||
report["error"] = f"restore failed mid-flight: {e}"
|
||||
|
||||
return report
|
||||
|
||||
|
||||
|
||||
def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
|
||||
"""Restore ``~/.hermes/skills/`` from a snapshot.
|
||||
|
||||
Strategy:
|
||||
1. Resolve the target snapshot (explicit id or newest regular).
|
||||
2. Take a safety snapshot of the CURRENT skills tree under
|
||||
``.curator_backups/pre-rollback-<ts>/`` so the rollback itself is
|
||||
undoable.
|
||||
3. Move all current top-level entries (except ``.curator_backups``
|
||||
and ``.hub``) into a tempdir.
|
||||
4. Extract the chosen snapshot into ``~/.hermes/skills/``.
|
||||
5. On failure during 4, move the tempdir contents back (best-effort)
|
||||
and return failure.
|
||||
|
||||
Returns ``(ok, message, snapshot_path)``.
|
||||
"""
|
||||
target = _resolve_backup(backup_id)
|
||||
if target is None:
|
||||
return (
|
||||
False,
|
||||
f"no matching backup found"
|
||||
+ (f" for id '{backup_id}'" if backup_id else "")
|
||||
+ " (use `hermes curator rollback --list` to see available snapshots)",
|
||||
None,
|
||||
)
|
||||
archive = target / "skills.tar.gz"
|
||||
if not archive.exists():
|
||||
return (False, f"snapshot {target.name} has no skills.tar.gz — corrupted?", None)
|
||||
|
||||
skills = _skills_dir()
|
||||
skills.mkdir(parents=True, exist_ok=True)
|
||||
backups = _backups_dir()
|
||||
backups.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Step 2: safety snapshot of current state FIRST. If this fails we bail
|
||||
# out before touching anything — otherwise a failed extract could leave
|
||||
# the user with no skills.
|
||||
try:
|
||||
snapshot_skills(reason=f"pre-rollback to {target.name}")
|
||||
except Exception as e:
|
||||
return (False, f"pre-rollback safety snapshot failed: {e}", None)
|
||||
|
||||
# Additionally move current entries into an internal staging dir so
|
||||
# the extract happens into an empty skills tree (predictable result).
|
||||
# This dir is implementation detail — not listed as a restorable
|
||||
# backup. The safety snapshot above is the user-facing undo handle.
|
||||
staged = backups / f".rollback-staging-{_utc_id()}"
|
||||
try:
|
||||
staged.mkdir(parents=True, exist_ok=False)
|
||||
except OSError as e:
|
||||
return (False, f"failed to create staging dir: {e}", None)
|
||||
|
||||
moved: List[Tuple[Path, Path]] = []
|
||||
try:
|
||||
for entry in list(skills.iterdir()):
|
||||
if entry.name in _EXCLUDE_TOP_LEVEL:
|
||||
continue
|
||||
dest = staged / entry.name
|
||||
shutil.move(str(entry), str(dest))
|
||||
moved.append((entry, dest))
|
||||
except OSError as e:
|
||||
# Best-effort rollback of the move
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"failed to stage current skills: {e}", None)
|
||||
|
||||
# Step 4: extract the snapshot into skills/
|
||||
try:
|
||||
with tarfile.open(archive, "r:gz") as tf:
|
||||
# Python 3.12+ supports filter='data' for safer extraction.
|
||||
# Fall back to the unfiltered call for older interpreters but
|
||||
# still reject absolute paths and .. components defensively.
|
||||
for member in tf.getmembers():
|
||||
name = member.name
|
||||
if name.startswith("/") or ".." in Path(name).parts:
|
||||
raise tarfile.TarError(
|
||||
f"refusing to extract unsafe path: {name!r}"
|
||||
)
|
||||
try:
|
||||
tf.extractall(str(skills), filter="data") # type: ignore[call-arg]
|
||||
except TypeError:
|
||||
# Python < 3.12 — no filter kwarg
|
||||
tf.extractall(str(skills))
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
# Best-effort recover: move staged contents back
|
||||
for orig, dest in moved:
|
||||
try:
|
||||
shutil.move(str(dest), str(orig))
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
return (False, f"snapshot extract failed (state restored): {e}", None)
|
||||
|
||||
# Extract succeeded — the staging dir has served its purpose. The
|
||||
# user's undo handle is the safety snapshot tarball we took earlier.
|
||||
try:
|
||||
shutil.rmtree(staged, ignore_errors=True)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Reconcile cron skill-links. Surgical: only the skills/skill fields
|
||||
# on jobs matched by id. Everything else in jobs.json is live state
|
||||
# (schedule, next_run_at, enabled, prompt, etc.) and we leave it
|
||||
# alone. Failures here don't fail the overall rollback — the skills
|
||||
# tree is already restored, which is the main guarantee.
|
||||
cron_report = _restore_cron_skill_links(target)
|
||||
|
||||
summary_bits = [f"restored from snapshot {target.name}"]
|
||||
if cron_report.get("attempted"):
|
||||
restored_n = len(cron_report.get("restored") or [])
|
||||
skipped_n = len(cron_report.get("skipped_missing") or [])
|
||||
if cron_report.get("error"):
|
||||
summary_bits.append(f"cron links: error — {cron_report['error']}")
|
||||
elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
|
||||
# Attempted but nothing matched — empty snapshot or no overlapping ids.
|
||||
pass
|
||||
else:
|
||||
parts = []
|
||||
if restored_n:
|
||||
parts.append(f"{restored_n} job(s) had skill links restored")
|
||||
if skipped_n:
|
||||
parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
|
||||
if cron_report.get("unchanged"):
|
||||
parts.append(f"{cron_report['unchanged']} already matched")
|
||||
summary_bits.append("cron links: " + ", ".join(parts))
|
||||
|
||||
logger.info("Curator rollback: restored from %s (cron_report=%s)",
|
||||
target.name, cron_report)
|
||||
return (True, "; ".join(summary_bits), target)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Human-readable summary for CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def format_size(n: int) -> str:
|
||||
for unit in ("B", "KB", "MB", "GB"):
|
||||
if n < 1024 or unit == "GB":
|
||||
return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
|
||||
n /= 1024
|
||||
return f"{n:.1f} GB"
|
||||
|
||||
|
||||
def summarize_backups() -> str:
|
||||
rows = list_backups()
|
||||
if not rows:
|
||||
return "No curator snapshots yet."
|
||||
lines = [f"{'id':<24} {'reason':<40} {'skills':>6} {'size':>8}"]
|
||||
lines.append("─" * len(lines[0]))
|
||||
for r in rows:
|
||||
lines.append(
|
||||
f"{r.get('id','?'):<24} "
|
||||
f"{(r.get('reason','?') or '?')[:40]:<40} "
|
||||
f"{r.get('skill_files', 0):>6} "
|
||||
f"{format_size(int(r.get('archive_bytes', 0))):>8}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
258
agent/i18n.py
258
agent/i18n.py
@@ -1,258 +0,0 @@
|
||||
"""Lightweight internationalization (i18n) for Hermes static user-facing messages.
|
||||
|
||||
Scope (thin slice, by design): only the highest-impact static strings shown
|
||||
to the user by Hermes itself -- approval prompts, a handful of gateway slash
|
||||
command replies, restart-drain notices. Agent-generated output, log lines,
|
||||
error tracebacks, tool outputs, and slash-command descriptions all stay in
|
||||
English.
|
||||
|
||||
Catalog files live under ``locales/<lang>.yaml`` at the repo root. Each
|
||||
catalog is a flat dict keyed by dotted paths (e.g. ``approval.choose`` or
|
||||
``gateway.approval_expired``). Missing keys fall back to English; if English
|
||||
is missing too, the key path itself is returned so a broken catalog never
|
||||
crashes the agent.
|
||||
|
||||
Usage::
|
||||
|
||||
from agent.i18n import t
|
||||
print(t("approval.choose_long")) # current lang
|
||||
print(t("gateway.draining", count=3)) # {count} formatted
|
||||
print(t("approval.choose_long", lang="zh")) # explicit override
|
||||
|
||||
Language resolution order:
|
||||
1. Explicit ``lang=`` argument passed to :func:`t`
|
||||
2. ``HERMES_LANGUAGE`` environment variable (for tests / quick override)
|
||||
3. ``display.language`` from config.yaml
|
||||
4. ``"en"`` (baseline)
|
||||
|
||||
Supported languages: en, zh, ja, de, es, fr, tr, uk. Unknown values fall back to en.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUPPORTED_LANGUAGES: tuple[str, ...] = (
|
||||
"en", "zh", "zh-hant", "ja", "de", "es", "fr", "tr", "uk",
|
||||
"af", "ko", "it", "ga", "pt", "ru", "hu",
|
||||
)
|
||||
DEFAULT_LANGUAGE = "en"
|
||||
|
||||
# Accept a few natural aliases so users who type "chinese" / "zh-CN" / "jp"
|
||||
# get the right catalog instead of silently falling back to English.
|
||||
_LANGUAGE_ALIASES: dict[str, str] = {
|
||||
"english": "en", "en-us": "en", "en-gb": "en",
|
||||
# Simplified Chinese — explicit codes route here; bare "chinese" / "mandarin"
|
||||
# also default to Simplified since that's the larger user base.
|
||||
"chinese": "zh", "mandarin": "zh", "zh-cn": "zh", "zh-hans": "zh", "zh-sg": "zh",
|
||||
# Traditional Chinese — distinct catalog. Cover Taiwan / Hong Kong / Macau
|
||||
# locale tags plus the common "traditional" alias.
|
||||
"traditional-chinese": "zh-hant", "traditional_chinese": "zh-hant",
|
||||
"zh-tw": "zh-hant", "zh-hk": "zh-hant", "zh-mo": "zh-hant",
|
||||
"japanese": "ja", "jp": "ja", "ja-jp": "ja",
|
||||
"german": "de", "deutsch": "de", "de-de": "de", "de-at": "de", "de-ch": "de",
|
||||
"spanish": "es", "español": "es", "espanol": "es", "es-es": "es", "es-mx": "es", "es-ar": "es",
|
||||
"french": "fr", "français": "fr", "france": "fr", "fr-fr": "fr", "fr-be": "fr", "fr-ca": "fr", "fr-ch": "fr",
|
||||
"ukrainian": "uk", "ukrainisch": "uk", "українська": "uk", "uk-ua": "uk", "ua": "uk",
|
||||
"turkish": "tr", "türkçe": "tr", "tr-tr": "tr",
|
||||
# Afrikaans — South African Dutch-derived language; "af-ZA" is the common BCP-47 tag.
|
||||
"afrikaans": "af", "af-za": "af",
|
||||
# Korean
|
||||
"korean": "ko", "한국어": "ko", "ko-kr": "ko",
|
||||
# Italian
|
||||
"italian": "it", "italiano": "it", "it-it": "it", "it-ch": "it",
|
||||
# Irish (Gaeilge) — ga is the BCP-47 code
|
||||
"irish": "ga", "gaeilge": "ga", "ga-ie": "ga",
|
||||
# Portuguese — bare "portuguese" routes to European Portuguese; pt-br
|
||||
# is in the same family but rendered identically here (no separate br catalog).
|
||||
"portuguese": "pt", "português": "pt", "portugues": "pt",
|
||||
"pt-pt": "pt", "pt-br": "pt", "brazilian": "pt", "brasileiro": "pt",
|
||||
# Russian
|
||||
"russian": "ru", "русский": "ru", "ru-ru": "ru",
|
||||
# Hungarian
|
||||
"hungarian": "hu", "magyar": "hu", "hu-hu": "hu",
|
||||
}
|
||||
|
||||
_catalog_cache: dict[str, dict[str, str]] = {}
|
||||
_catalog_lock = threading.Lock()
|
||||
|
||||
|
||||
def _locales_dir() -> Path:
|
||||
"""Return the directory containing locale YAML files.
|
||||
|
||||
Lives next to the repo root so both the bundled install and editable
|
||||
checkouts find it without PYTHONPATH gymnastics.
|
||||
"""
|
||||
# agent/i18n.py -> agent/ -> repo root
|
||||
return Path(__file__).resolve().parent.parent / "locales"
|
||||
|
||||
|
||||
def _normalize_lang(value: Any) -> str:
|
||||
"""Normalize a user-supplied language value to a supported code.
|
||||
|
||||
Accepts supported codes directly, common aliases (``chinese`` -> ``zh``),
|
||||
and case-insensitive regional tags (``zh-CN`` -> ``zh``). Returns the
|
||||
default language for unknown values.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_LANGUAGE
|
||||
key = value.strip().lower()
|
||||
if not key:
|
||||
return DEFAULT_LANGUAGE
|
||||
if key in SUPPORTED_LANGUAGES:
|
||||
return key
|
||||
if key in _LANGUAGE_ALIASES:
|
||||
return _LANGUAGE_ALIASES[key]
|
||||
# Try stripping a region suffix (e.g. "pt-br" -> "pt" won't be supported,
|
||||
# but "zh-CN" -> "zh" will).
|
||||
base = key.split("-", 1)[0]
|
||||
if base in SUPPORTED_LANGUAGES:
|
||||
return base
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def _load_catalog(lang: str) -> dict[str, str]:
|
||||
"""Load and flatten one locale YAML file into a dotted-key dict.
|
||||
|
||||
YAML files can be nested for human readability; this produces the flat
|
||||
key space :func:`t` expects. Cached per-language for the process.
|
||||
"""
|
||||
with _catalog_lock:
|
||||
cached = _catalog_cache.get(lang)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
path = _locales_dir() / f"{lang}.yaml"
|
||||
if not path.is_file():
|
||||
logger.debug("i18n catalog missing for %s at %s", lang, path)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
try:
|
||||
import yaml # PyYAML is already a hermes dependency
|
||||
with path.open("r", encoding="utf-8") as f:
|
||||
raw = yaml.safe_load(f) or {}
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to load i18n catalog %s: %s", path, exc)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = {}
|
||||
return {}
|
||||
|
||||
flat: dict[str, str] = {}
|
||||
_flatten_into(raw, "", flat)
|
||||
with _catalog_lock:
|
||||
_catalog_cache[lang] = flat
|
||||
return flat
|
||||
|
||||
|
||||
def _flatten_into(node: Any, prefix: str, out: dict[str, str]) -> None:
|
||||
if isinstance(node, dict):
|
||||
for key, value in node.items():
|
||||
child_key = f"{prefix}.{key}" if prefix else str(key)
|
||||
_flatten_into(value, child_key, out)
|
||||
elif isinstance(node, str):
|
||||
out[prefix] = node
|
||||
# Non-string, non-dict leaves are ignored -- catalogs are text-only.
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _config_language_cached() -> str | None:
|
||||
"""Read ``display.language`` from config.yaml once per process.
|
||||
|
||||
Cached because ``t()`` is called in hot paths (every approval prompt,
|
||||
every gateway reply) and re-reading YAML each call would be wasteful.
|
||||
``reset_language_cache()`` clears this when config changes at runtime
|
||||
(e.g. after the setup wizard).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
lang = (cfg.get("display") or {}).get("language")
|
||||
if lang:
|
||||
return _normalize_lang(lang)
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read display.language from config: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def reset_language_cache() -> None:
|
||||
"""Invalidate cached language resolution and catalogs.
|
||||
|
||||
Call after :func:`hermes_cli.config.save_config` if a running process
|
||||
needs to pick up a changed ``display.language`` without restart.
|
||||
"""
|
||||
_config_language_cached.cache_clear()
|
||||
with _catalog_lock:
|
||||
_catalog_cache.clear()
|
||||
|
||||
|
||||
def get_language() -> str:
|
||||
"""Resolve the active language using env > config > default order."""
|
||||
env_lang = os.environ.get("HERMES_LANGUAGE")
|
||||
if env_lang:
|
||||
return _normalize_lang(env_lang)
|
||||
cfg_lang = _config_language_cached()
|
||||
if cfg_lang:
|
||||
return cfg_lang
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
def t(key: str, lang: str | None = None, **format_kwargs: Any) -> str:
|
||||
"""Translate a dotted key to the active language.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
key
|
||||
Dotted path into the catalog, e.g. ``"approval.choose_long"``.
|
||||
lang
|
||||
Explicit language override. Takes precedence over env + config.
|
||||
**format_kwargs
|
||||
``str.format`` substitution arguments (``t("gateway.drain", count=3)``
|
||||
expects a catalog entry with a ``{count}`` placeholder).
|
||||
|
||||
Returns
|
||||
-------
|
||||
The translated string, or the English fallback if the key is missing in
|
||||
the target language, or the bare key if English is also missing.
|
||||
"""
|
||||
target = _normalize_lang(lang) if lang else get_language()
|
||||
catalog = _load_catalog(target)
|
||||
value = catalog.get(key)
|
||||
|
||||
if value is None and target != DEFAULT_LANGUAGE:
|
||||
# Fall through to English rather than showing a key path to the user.
|
||||
value = _load_catalog(DEFAULT_LANGUAGE).get(key)
|
||||
|
||||
if value is None:
|
||||
# Last-ditch: return the key itself. A broken catalog should not
|
||||
# crash anything; it just looks ugly until someone fixes it.
|
||||
logger.debug("i18n miss: key=%r lang=%r", key, target)
|
||||
value = key
|
||||
|
||||
if format_kwargs:
|
||||
try:
|
||||
return value.format(**format_kwargs)
|
||||
except (KeyError, IndexError, ValueError) as exc:
|
||||
logger.warning(
|
||||
"i18n format failed for key=%r lang=%r kwargs=%r: %s",
|
||||
key, target, format_kwargs, exc,
|
||||
)
|
||||
return value
|
||||
return value
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SUPPORTED_LANGUAGES",
|
||||
"DEFAULT_LANGUAGE",
|
||||
"t",
|
||||
"get_language",
|
||||
"reset_language_cache",
|
||||
]
|
||||
@@ -1,301 +0,0 @@
|
||||
"""Routing helpers for inbound user-attached images.
|
||||
|
||||
Two modes:
|
||||
|
||||
native — attach images as OpenAI-style ``image_url`` content parts on the
|
||||
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
|
||||
OpenAI chat.completions) already translate these into their
|
||||
vendor-specific multimodal formats.
|
||||
|
||||
text — run ``vision_analyze`` on each image up-front and prepend the
|
||||
description to the user's text. The model never sees the pixels;
|
||||
it only sees a lossy text summary. This is the pre-existing
|
||||
behaviour and still the right choice for non-vision models.
|
||||
|
||||
The decision is made once per message turn by :func:`decide_image_input_mode`.
|
||||
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
|
||||
| ``text``, default ``auto``) and the active model's capability metadata.
|
||||
|
||||
In ``auto`` mode:
|
||||
- If the user has explicitly configured ``auxiliary.vision.provider``
|
||||
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
|
||||
regardless of the main model — they've opted in to a specific vision
|
||||
backend for a reason (cost, quality, local-only, etc.).
|
||||
- Otherwise, if the active model reports ``supports_vision=True`` in its
|
||||
models.dev metadata, we attach natively.
|
||||
- Otherwise (non-vision model, no explicit override), we fall back to text.
|
||||
|
||||
This keeps ``vision_analyze`` surfaced as a tool in every session — skills
|
||||
and agent flows that chain it (browser screenshots, deeper inspection of
|
||||
URL-referenced images, style-gating loops) keep working. The routing only
|
||||
affects *how user-attached images on the current turn* are presented to the
|
||||
main model.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
return "auto"
|
||||
val = raw.strip().lower()
|
||||
if val in _VALID_MODES:
|
||||
return val
|
||||
return "auto"
|
||||
|
||||
|
||||
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
|
||||
"""True when the user configured a specific auxiliary vision backend.
|
||||
|
||||
An explicit override means the user *wants* the text pipeline (they're
|
||||
paying for a dedicated vision model), so we don't silently bypass it.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return False
|
||||
aux = cfg.get("auxiliary") or {}
|
||||
if not isinstance(aux, dict):
|
||||
return False
|
||||
vision = aux.get("vision") or {}
|
||||
if not isinstance(vision, dict):
|
||||
return False
|
||||
|
||||
provider = str(vision.get("provider") or "").strip().lower()
|
||||
model = str(vision.get("model") or "").strip()
|
||||
base_url = str(vision.get("base_url") or "").strip()
|
||||
|
||||
# "auto" / "" / blank = not explicit
|
||||
if provider in {"", "auto"} and not model and not base_url:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown."""
|
||||
if not provider or not model:
|
||||
return None
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
caps = get_model_capabilities(provider, model)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
|
||||
return None
|
||||
if caps is None:
|
||||
return None
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
|
||||
def decide_image_input_mode(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""Return ``"native"`` or ``"text"`` for the given turn.
|
||||
|
||||
Args:
|
||||
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
|
||||
model: active model slug as it would be sent to the provider.
|
||||
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
|
||||
"""
|
||||
mode_cfg = "auto"
|
||||
if isinstance(cfg, dict):
|
||||
agent_cfg = cfg.get("agent") or {}
|
||||
if isinstance(agent_cfg, dict):
|
||||
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
|
||||
|
||||
if mode_cfg == "native":
|
||||
return "native"
|
||||
if mode_cfg == "text":
|
||||
return "text"
|
||||
|
||||
# auto
|
||||
if _explicit_aux_vision_override(cfg):
|
||||
return "text"
|
||||
|
||||
supports = _lookup_supports_vision(provider, model)
|
||||
if supports is True:
|
||||
return "native"
|
||||
return "text"
|
||||
|
||||
|
||||
# Image size handling is REACTIVE rather than proactive: we attempt native
|
||||
# attachment at full size regardless of provider, and rely on
|
||||
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
|
||||
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
|
||||
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
|
||||
#
|
||||
# Why reactive: our knowledge of provider ceilings is partial and evolving
|
||||
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
|
||||
# A proactive per-provider table would be stale the moment a provider raises
|
||||
# or lowers its limit, and silently degrading quality for users on providers
|
||||
# that would have accepted the full image is the worse failure mode.
|
||||
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
|
||||
# it fires, which is cheaper than permanent quality loss.
|
||||
|
||||
|
||||
def _sniff_mime_from_bytes(raw: bytes) -> Optional[str]:
|
||||
"""Detect image MIME from magic bytes. Returns None if unrecognised.
|
||||
|
||||
Filename-based detection (``mimetypes.guess_type``) is unreliable when
|
||||
upstream platforms lie about content-type. Discord, for example, can
|
||||
serve a PNG with ``content_type=image/webp`` for proxied/animated
|
||||
stickers, custom emoji previews, or images uploaded via certain bots.
|
||||
Anthropic strictly validates that declared media_type matches the
|
||||
actual bytes and returns HTTP 400 on mismatch, so we sniff to be safe.
|
||||
"""
|
||||
if not raw:
|
||||
return None
|
||||
# PNG: 89 50 4E 47 0D 0A 1A 0A
|
||||
if raw.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||
return "image/png"
|
||||
# JPEG: FF D8 FF
|
||||
if raw.startswith(b"\xff\xd8\xff"):
|
||||
return "image/jpeg"
|
||||
# GIF87a / GIF89a
|
||||
if raw[:6] in {b"GIF87a", b"GIF89a"}:
|
||||
return "image/gif"
|
||||
# WEBP: "RIFF" .... "WEBP"
|
||||
if len(raw) >= 12 and raw[:4] == b"RIFF" and raw[8:12] == b"WEBP":
|
||||
return "image/webp"
|
||||
# BMP: "BM"
|
||||
if raw.startswith(b"BM"):
|
||||
return "image/bmp"
|
||||
# HEIC/HEIF: ftypheic / ftypheix / ftypmif1 / ftypmsf1 etc.
|
||||
if len(raw) >= 12 and raw[4:8] == b"ftyp" and raw[8:12] in {
|
||||
b"heic", b"heix", b"hevc", b"hevx", b"mif1", b"msf1", b"heim", b"heis",
|
||||
}:
|
||||
return "image/heic"
|
||||
return None
|
||||
|
||||
|
||||
def _guess_mime(path: Path, raw: Optional[bytes] = None) -> str:
|
||||
"""Return image MIME type for *path*.
|
||||
|
||||
If *raw* bytes are provided, magic-byte sniffing wins (authoritative).
|
||||
Otherwise we fall back to ``mimetypes`` then suffix-based defaults.
|
||||
"""
|
||||
if raw is not None:
|
||||
sniffed = _sniff_mime_from_bytes(raw)
|
||||
if sniffed:
|
||||
return sniffed
|
||||
mime, _ = mimetypes.guess_type(str(path))
|
||||
if mime and mime.startswith("image/"):
|
||||
return mime
|
||||
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
|
||||
# the suffix looks imagey.
|
||||
suffix = path.suffix.lower()
|
||||
return {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
}.get(suffix, "image/jpeg")
|
||||
|
||||
|
||||
def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
"""Encode a local image as a base64 data URL at its native size.
|
||||
|
||||
Size limits are NOT enforced here — the agent retry loop
|
||||
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
|
||||
provider's first rejection. Keeping this simple means providers that
|
||||
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
|
||||
quality tax just because one other provider is stricter.
|
||||
|
||||
Returns None only if the file can't be read (missing, permission
|
||||
denied, etc.); the caller reports those paths in ``skipped``.
|
||||
"""
|
||||
try:
|
||||
raw = path.read_bytes()
|
||||
except Exception as exc:
|
||||
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
||||
return None
|
||||
mime = _guess_mime(path, raw=raw)
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
|
||||
def build_native_content_parts(
|
||||
user_text: str,
|
||||
image_paths: List[str],
|
||||
) -> Tuple[List[Dict[str, Any]], List[str]]:
|
||||
"""Build an OpenAI-style ``content`` list for a user turn.
|
||||
|
||||
Shape:
|
||||
[{"type": "text", "text": "...\\n\\n[Image attached at: /local/path]"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
|
||||
...]
|
||||
|
||||
The local path of each successfully attached image is appended to the
|
||||
text part as ``[Image attached at: <path>]``. The model still sees the
|
||||
pixels via the ``image_url`` part (full native vision); the path note
|
||||
just gives it a string handle so MCP/skill tools that take an image
|
||||
path or URL argument can be invoked on the same image without an
|
||||
extra round-trip. This parallels the text-mode hint produced by
|
||||
``Runner._enrich_message_with_vision`` (``vision_analyze using image_url:
|
||||
<path>``) so behaviour is consistent across both image input modes.
|
||||
|
||||
Images are attached at their native size. If a provider rejects the
|
||||
request because an image is too large (e.g. Anthropic's 5 MB per-image
|
||||
ceiling), the agent's retry loop transparently shrinks and retries
|
||||
once — see ``run_agent._try_shrink_image_parts_in_messages``.
|
||||
|
||||
Returns (content_parts, skipped_paths). Skipped paths are files that
|
||||
couldn't be read from disk and are NOT advertised in the path hints.
|
||||
"""
|
||||
skipped: List[str] = []
|
||||
image_parts: List[Dict[str, Any]] = []
|
||||
attached_paths: List[str] = []
|
||||
|
||||
for raw_path in image_paths:
|
||||
p = Path(raw_path)
|
||||
if not p.exists() or not p.is_file():
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
data_url = _file_to_data_url(p)
|
||||
if not data_url:
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
image_parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": data_url},
|
||||
})
|
||||
attached_paths.append(str(raw_path))
|
||||
|
||||
text = (user_text or "").strip()
|
||||
|
||||
# If at least one image attached, build a single text part that combines
|
||||
# the user's caption (or a neutral default) with one path hint per image.
|
||||
if attached_paths:
|
||||
base_text = text or "What do you see in this image?"
|
||||
path_hints = "\n".join(
|
||||
f"[Image attached at: {p}]" for p in attached_paths
|
||||
)
|
||||
combined_text = f"{base_text}\n\n{path_hints}"
|
||||
parts: List[Dict[str, Any]] = [{"type": "text", "text": combined_text}]
|
||||
parts.extend(image_parts)
|
||||
return parts, skipped
|
||||
|
||||
# No images successfully attached — fall back to plain text-only behaviour.
|
||||
parts = []
|
||||
if text:
|
||||
parts.append({"type": "text", "text": text})
|
||||
return parts, skipped
|
||||
|
||||
|
||||
__all__ = [
|
||||
"decide_image_input_mode",
|
||||
"build_native_content_parts",
|
||||
]
|
||||
@@ -1,48 +0,0 @@
|
||||
"""LM Studio reasoning-effort resolution shared by the chat-completions
|
||||
transport and run_agent's iteration-limit summary path.
|
||||
|
||||
LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
|
||||
``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
|
||||
graduated models). We map the user's ``reasoning_config`` onto LM Studio's
|
||||
OpenAI-compatible vocabulary, then clamp against the model's allowed set so
|
||||
the server doesn't 400 on an unsupported effort.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
# LM Studio accepts these top-level reasoning_effort values via its
|
||||
# OpenAI-compatible chat.completions endpoint.
|
||||
_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
|
||||
|
||||
# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
|
||||
# Map them onto the OpenAI-compatible request vocabulary.
|
||||
_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
|
||||
|
||||
|
||||
def resolve_lmstudio_effort(
|
||||
reasoning_config: Optional[dict],
|
||||
allowed_options: Optional[List[str]],
|
||||
) -> Optional[str]:
|
||||
"""Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
|
||||
|
||||
``None`` means "omit the field": the user picked a level the model can't
|
||||
honor, so let LM Studio fall back to the model's declared default rather
|
||||
than silently substituting a different effort. When ``allowed_options`` is
|
||||
falsy (probe failed), skip clamping and send the resolved effort anyway.
|
||||
"""
|
||||
effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
effort = "none"
|
||||
else:
|
||||
raw = (reasoning_config.get("effort") or "").strip().lower()
|
||||
raw = _LM_EFFORT_ALIASES.get(raw, raw)
|
||||
if raw in _LM_VALID_EFFORTS:
|
||||
effort = raw
|
||||
if allowed_options:
|
||||
allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
|
||||
if effort not in allowed:
|
||||
return None
|
||||
return effort
|
||||
@@ -1,106 +0,0 @@
|
||||
"""Language Server Protocol (LSP) integration for Hermes Agent.
|
||||
|
||||
Hermes runs full language servers (pyright, gopls, rust-analyzer,
|
||||
typescript-language-server, etc.) as subprocesses and pipes their
|
||||
``textDocument/publishDiagnostics`` output into the post-write lint
|
||||
delta filter used by ``write_file`` and ``patch``.
|
||||
|
||||
LSP is **gated on git workspace detection** — if the agent's cwd is
|
||||
inside a git repository, LSP runs against that workspace; otherwise the
|
||||
file_operations layer falls back to its existing in-process syntax
|
||||
checks. This keeps users on user-home cwd's (e.g. Telegram gateway
|
||||
chats) from spawning daemons they don't need.
|
||||
|
||||
Public API:
|
||||
|
||||
from agent.lsp import get_service
|
||||
|
||||
svc = get_service()
|
||||
if svc and svc.enabled_for(path):
|
||||
await svc.touch_file(path)
|
||||
diags = svc.diagnostics_for(path)
|
||||
|
||||
The bulk of the wiring is internal — most callers only need the layer
|
||||
in :func:`tools.file_operations.FileOperations._check_lint_delta`,
|
||||
which is already wired (see that module).
|
||||
|
||||
Architecture is documented in ``website/docs/user-guide/features/lsp.md``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import logging
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
from agent.lsp.manager import LSPService
|
||||
|
||||
logger = logging.getLogger("agent.lsp")
|
||||
|
||||
_service: Optional[LSPService] = None
|
||||
_atexit_registered = False
|
||||
_service_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_service() -> Optional[LSPService]:
|
||||
"""Return the process-wide LSP service singleton, or None when disabled.
|
||||
|
||||
The service is created lazily on first call. ``None`` is returned
|
||||
when LSP is disabled in config, when no workspace can be detected,
|
||||
or when the platform doesn't support subprocess-based LSP servers.
|
||||
|
||||
On first creation, registers an :mod:`atexit` handler that tears
|
||||
down spawned language servers on Python exit so a long-running
|
||||
CLI or gateway session doesn't leak pyright/gopls/etc. processes
|
||||
when it terminates.
|
||||
"""
|
||||
global _service, _atexit_registered
|
||||
if _service is not None:
|
||||
return _service if _service.is_active() else None
|
||||
with _service_lock:
|
||||
if _service is not None:
|
||||
return _service if _service.is_active() else None
|
||||
_service = LSPService.create_from_config()
|
||||
if not _atexit_registered:
|
||||
# ``atexit`` handlers run in LIFO order on normal Python
|
||||
# exit and on SystemExit, but NOT on os._exit() or
|
||||
# uncaught signals. Language servers are stateless
|
||||
# subprocesses — losing them on SIGKILL is fine; they'll
|
||||
# be reaped by the kernel along with their parent. We
|
||||
# care about clean exits where Python flushes stdio
|
||||
# before terminating; without this hook every
|
||||
# ``hermes chat`` exit would leak pyright processes that
|
||||
# outlive the parent for a few seconds while their
|
||||
# stdout buffers drain.
|
||||
atexit.register(_atexit_shutdown)
|
||||
_atexit_registered = True
|
||||
return _service if (_service is not None and _service.is_active()) else None
|
||||
|
||||
|
||||
def shutdown_service() -> None:
|
||||
"""Tear down the LSP service if one was started.
|
||||
|
||||
Safe to call multiple times; safe to call when no service was created.
|
||||
"""
|
||||
global _service
|
||||
with _service_lock:
|
||||
svc = _service
|
||||
_service = None
|
||||
if svc is not None:
|
||||
try:
|
||||
svc.shutdown()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP shutdown error: %s", e)
|
||||
|
||||
|
||||
def _atexit_shutdown() -> None:
|
||||
"""atexit-registered wrapper. Logs at debug because by the time
|
||||
atexit fires the user has already seen the agent's final output —
|
||||
a noisy shutdown line on top of that is just clutter."""
|
||||
try:
|
||||
shutdown_service()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("atexit LSP shutdown failed: %s", e)
|
||||
|
||||
|
||||
__all__ = ["get_service", "shutdown_service", "LSPService"]
|
||||
308
agent/lsp/cli.py
308
agent/lsp/cli.py
@@ -1,308 +0,0 @@
|
||||
"""``hermes lsp`` CLI subcommand.
|
||||
|
||||
Subcommands:
|
||||
|
||||
- ``status`` — show service state, configured servers, install status.
|
||||
- ``install <server_id>`` — eagerly install one server's binary.
|
||||
- ``install-all`` — try to install every server with a known recipe.
|
||||
- ``restart`` — tear down running clients so the next edit re-spawns.
|
||||
- ``which <server_id>`` — print the resolved binary path for one server.
|
||||
- ``list`` — print the registry of supported servers.
|
||||
|
||||
The handlers are kept here (rather than in
|
||||
``hermes_cli/main.py``) so the LSP module ships self-contained.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def register_subparser(subparsers: argparse._SubParsersAction) -> None:
|
||||
"""Wire the ``hermes lsp`` subcommand tree into the main argparse."""
|
||||
parser = subparsers.add_parser(
|
||||
"lsp",
|
||||
help="Language Server Protocol management",
|
||||
description=(
|
||||
"Manage the LSP layer that powers post-write semantic "
|
||||
"diagnostics in write_file/patch."
|
||||
),
|
||||
)
|
||||
sub = parser.add_subparsers(dest="lsp_command")
|
||||
|
||||
sub_status = sub.add_parser("status", help="Show LSP service status")
|
||||
sub_status.add_argument(
|
||||
"--json", action="store_true", help="Emit machine-readable JSON"
|
||||
)
|
||||
|
||||
sub_list = sub.add_parser("list", help="List supported language servers")
|
||||
sub_list.add_argument(
|
||||
"--installed-only",
|
||||
action="store_true",
|
||||
help="Only show servers whose binary is currently available",
|
||||
)
|
||||
|
||||
sub_install = sub.add_parser("install", help="Install a server binary")
|
||||
sub_install.add_argument("server", help="Server id (e.g. pyright, gopls)")
|
||||
|
||||
sub_install_all = sub.add_parser(
|
||||
"install-all",
|
||||
help="Install every server with a known auto-install recipe",
|
||||
)
|
||||
sub_install_all.add_argument(
|
||||
"--include-manual",
|
||||
action="store_true",
|
||||
help="Even attempt servers marked manual-install (best effort)",
|
||||
)
|
||||
|
||||
sub_restart = sub.add_parser(
|
||||
"restart",
|
||||
help="Tear down running LSP clients (next edit re-spawns)",
|
||||
)
|
||||
|
||||
sub_which = sub.add_parser("which", help="Print binary path for a server")
|
||||
sub_which.add_argument("server", help="Server id")
|
||||
|
||||
parser.set_defaults(func=run_lsp_command)
|
||||
|
||||
|
||||
def run_lsp_command(args: argparse.Namespace) -> int:
|
||||
"""Top-level dispatcher for ``hermes lsp <subcommand>``."""
|
||||
sub = getattr(args, "lsp_command", None) or "status"
|
||||
try:
|
||||
if sub == "status":
|
||||
return _cmd_status(getattr(args, "json", False))
|
||||
if sub == "list":
|
||||
return _cmd_list(getattr(args, "installed_only", False))
|
||||
if sub == "install":
|
||||
return _cmd_install(args.server)
|
||||
if sub == "install-all":
|
||||
return _cmd_install_all(getattr(args, "include_manual", False))
|
||||
if sub == "restart":
|
||||
return _cmd_restart()
|
||||
if sub == "which":
|
||||
return _cmd_which(args.server)
|
||||
sys.stderr.write(f"unknown lsp subcommand: {sub}\n")
|
||||
return 2
|
||||
except KeyboardInterrupt:
|
||||
return 130
|
||||
|
||||
|
||||
def _cmd_status(emit_json: bool) -> int:
|
||||
from agent.lsp import get_service
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import detect_status
|
||||
|
||||
svc = get_service()
|
||||
service_active = svc is not None
|
||||
info = svc.get_status() if svc is not None else {"enabled": False}
|
||||
|
||||
if emit_json:
|
||||
import json
|
||||
payload = {
|
||||
"service": info,
|
||||
"registry": [
|
||||
{
|
||||
"server_id": s.server_id,
|
||||
"extensions": list(s.extensions),
|
||||
"description": s.description,
|
||||
"binary_status": detect_status(_recipe_pkg_for(s.server_id)),
|
||||
}
|
||||
for s in SERVERS
|
||||
],
|
||||
}
|
||||
sys.stdout.write(json.dumps(payload, indent=2) + "\n")
|
||||
return 0
|
||||
|
||||
out = []
|
||||
out.append("LSP Service")
|
||||
out.append("===========")
|
||||
out.append(f" enabled: {info.get('enabled', False)}")
|
||||
if service_active:
|
||||
out.append(f" wait_mode: {info.get('wait_mode')}")
|
||||
out.append(f" wait_timeout: {info.get('wait_timeout')}s")
|
||||
out.append(f" install_strategy:{info.get('install_strategy')}")
|
||||
clients = info.get("clients") or []
|
||||
if clients:
|
||||
out.append(f" active clients: {len(clients)}")
|
||||
for c in clients:
|
||||
out.append(
|
||||
f" - {c['server_id']:20s} state={c['state']:10s} root={c['workspace_root']}"
|
||||
)
|
||||
else:
|
||||
out.append(" active clients: none")
|
||||
broken = info.get("broken") or []
|
||||
if broken:
|
||||
out.append(f" broken pairs: {len(broken)}")
|
||||
for b in broken:
|
||||
out.append(f" - {b}")
|
||||
disabled = info.get("disabled_servers") or []
|
||||
if disabled:
|
||||
out.append(f" disabled in cfg: {', '.join(disabled)}")
|
||||
|
||||
# Surface backend-tool gaps that aren't visible in the registry table:
|
||||
# some servers spawn fine but emit no diagnostics without a sidecar
|
||||
# binary (bash-language-server -> shellcheck).
|
||||
backend_warnings = _backend_warnings()
|
||||
if backend_warnings:
|
||||
out.append("")
|
||||
out.append("Backend warnings")
|
||||
out.append("================")
|
||||
for line in backend_warnings:
|
||||
out.append(f" ! {line}")
|
||||
out.append("")
|
||||
out.append("Registered Servers")
|
||||
out.append("==================")
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
status = detect_status(pkg)
|
||||
marker = {
|
||||
"installed": "✓",
|
||||
"missing": "·",
|
||||
"manual-only": "?",
|
||||
}.get(status, " ")
|
||||
ext_summary = ", ".join(list(s.extensions)[:5])
|
||||
if len(s.extensions) > 5:
|
||||
ext_summary += f", … (+{len(s.extensions) - 5})"
|
||||
out.append(
|
||||
f" {marker} {s.server_id:24s} [{status:11s}] {ext_summary}"
|
||||
)
|
||||
if s.description:
|
||||
out.append(f" {s.description}")
|
||||
sys.stdout.write("\n".join(out) + "\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_list(installed_only: bool) -> int:
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import detect_status
|
||||
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
status = detect_status(pkg)
|
||||
if installed_only and status != "installed":
|
||||
continue
|
||||
sys.stdout.write(
|
||||
f"{s.server_id:24s} [{status:11s}] {','.join(s.extensions)}\n"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_install(server_id: str) -> int:
|
||||
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
|
||||
pkg = _recipe_pkg_for(server_id)
|
||||
pre_status = detect_status(pkg)
|
||||
if pre_status == "installed":
|
||||
sys.stdout.write(f"{server_id} already installed\n")
|
||||
return 0
|
||||
sys.stdout.write(f"installing {server_id} (pkg={pkg}) ...\n")
|
||||
sys.stdout.flush()
|
||||
bin_path = try_install(pkg, "auto")
|
||||
if bin_path is None:
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe and recipe.get("strategy") == "manual":
|
||||
sys.stderr.write(
|
||||
f"{server_id}: this server requires a manual install. "
|
||||
f"See documentation.\n"
|
||||
)
|
||||
else:
|
||||
sys.stderr.write(f"{server_id}: install failed (see logs).\n")
|
||||
return 1
|
||||
sys.stdout.write(f"installed: {bin_path}\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_install_all(include_manual: bool) -> int:
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
|
||||
|
||||
rc = 0
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe is None:
|
||||
continue
|
||||
if recipe.get("strategy") == "manual" and not include_manual:
|
||||
continue
|
||||
if detect_status(pkg) == "installed":
|
||||
sys.stdout.write(f" {s.server_id:24s} already installed\n")
|
||||
continue
|
||||
sys.stdout.write(f" installing {s.server_id} (pkg={pkg}) ... ")
|
||||
sys.stdout.flush()
|
||||
path = try_install(pkg, "auto")
|
||||
if path:
|
||||
sys.stdout.write(f"ok ({path})\n")
|
||||
else:
|
||||
sys.stdout.write("FAILED\n")
|
||||
rc = 1
|
||||
return rc
|
||||
|
||||
|
||||
def _cmd_restart() -> int:
|
||||
from agent.lsp import shutdown_service
|
||||
|
||||
shutdown_service()
|
||||
sys.stdout.write("LSP service shut down. Next edit will respawn clients.\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_which(server_id: str) -> int:
|
||||
from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
|
||||
import os
|
||||
import shutil as _shutil
|
||||
|
||||
recipe = INSTALL_RECIPES.get(server_id)
|
||||
bin_name = (recipe or {}).get("bin", server_id)
|
||||
staged = hermes_lsp_bin_dir() / bin_name
|
||||
if staged.exists():
|
||||
sys.stdout.write(str(staged) + "\n")
|
||||
return 0
|
||||
on_path = _shutil.which(bin_name)
|
||||
if on_path:
|
||||
sys.stdout.write(on_path + "\n")
|
||||
return 0
|
||||
sys.stderr.write(f"{server_id}: not installed\n")
|
||||
return 1
|
||||
|
||||
|
||||
def _recipe_pkg_for(server_id: str) -> str:
|
||||
"""Map a registry ``server_id`` to its install-recipe package key."""
|
||||
# The mapping lives here (not in install.py) because it's a CLI
|
||||
# convenience layer. Most server_ids are also their own recipe
|
||||
# key, but a few differ (e.g. ``vue-language-server`` →
|
||||
# ``@vue/language-server``).
|
||||
aliases = {
|
||||
"vue-language-server": "@vue/language-server",
|
||||
"astro-language-server": "@astrojs/language-server",
|
||||
"dockerfile-ls": "dockerfile-language-server-nodejs",
|
||||
"typescript": "typescript-language-server",
|
||||
}
|
||||
return aliases.get(server_id, server_id)
|
||||
|
||||
|
||||
def _backend_warnings() -> list:
|
||||
"""Return human-readable notes about LSP backend tools that are missing
|
||||
in a way that won't surface elsewhere.
|
||||
|
||||
Some language servers ship as thin wrappers around an external CLI for
|
||||
actual diagnostics — they spawn cleanly but never emit any errors when
|
||||
the sidecar binary isn't on PATH. bash-language-server / shellcheck
|
||||
is the load-bearing example.
|
||||
|
||||
Returned strings are short, actionable, and include the install
|
||||
suggestion across common platforms.
|
||||
"""
|
||||
import shutil as _shutil
|
||||
from agent.lsp.install import hermes_lsp_bin_dir
|
||||
notes: list = []
|
||||
bash_installed = _shutil.which("bash-language-server") is not None or (
|
||||
(hermes_lsp_bin_dir() / "bash-language-server").exists()
|
||||
)
|
||||
if bash_installed and _shutil.which("shellcheck") is None:
|
||||
notes.append(
|
||||
"bash-language-server is installed but shellcheck is missing — "
|
||||
"diagnostics will be empty (apt: shellcheck, brew: shellcheck, "
|
||||
"scoop: shellcheck)."
|
||||
)
|
||||
return notes
|
||||
@@ -1,930 +0,0 @@
|
||||
"""Async LSP client over stdin/stdout.
|
||||
|
||||
One :class:`LSPClient` corresponds to one ``(language_server, workspace_root)``
|
||||
pair — exactly what OpenCode keys clients on, and the same shape Claude
|
||||
Code uses. The client owns a child process, drives the JSON-RPC
|
||||
exchange, and exposes:
|
||||
|
||||
- :meth:`open_file` / :meth:`change_file` — text document sync
|
||||
- :meth:`wait_for_diagnostics` — block until the server emits fresh
|
||||
diagnostics for a specific file (or a timeout fires)
|
||||
- :meth:`diagnostics_for` — read the current per-file diagnostic store
|
||||
- :meth:`shutdown` — graceful close + SIGTERM/SIGKILL fallback
|
||||
|
||||
The class is designed for async use from a single asyncio event loop.
|
||||
The :class:`agent.lsp.manager.LSPService` runs an event loop in a
|
||||
background thread so the synchronous file_operations layer can call
|
||||
into it via :func:`agent.lsp.manager.LSPService.touch_file`.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Push diagnostics are stored per-URI in :attr:`_push_diagnostics` from
|
||||
``textDocument/publishDiagnostics`` notifications. Pull diagnostics
|
||||
go in :attr:`_pull_diagnostics`. The merged view dedupes by content.
|
||||
|
||||
- Whole-document sync. Even when the server advertises incremental
|
||||
sync, we send a single ``contentChanges`` entry replacing the
|
||||
entire document. Pretending to be incremental while sending a
|
||||
full replacement is well-tolerated by every major server and saves
|
||||
range bookkeeping. See OpenCode's ``client.ts:584-659`` for the
|
||||
same trick.
|
||||
|
||||
- The "touch-file dance": every ``open_file`` call also fires a
|
||||
``workspace/didChangeWatchedFiles`` notification (CREATED on the
|
||||
first open, CHANGED thereafter). Some servers (clangd, eslint)
|
||||
only re-scan when this notification fires, even though the LSP spec
|
||||
doesn't strictly require it.
|
||||
|
||||
- ``ContentModified`` (-32801) errors get retried with exponential
|
||||
backoff up to 3 times. This matches Claude Code's
|
||||
``LSPServerInstance.sendRequest``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
|
||||
from urllib.parse import quote, unquote
|
||||
|
||||
from agent.lsp.protocol import (
|
||||
ERROR_CONTENT_MODIFIED,
|
||||
ERROR_METHOD_NOT_FOUND,
|
||||
LSPProtocolError,
|
||||
LSPRequestError,
|
||||
classify_message,
|
||||
encode_message,
|
||||
make_error_response,
|
||||
make_notification,
|
||||
make_request,
|
||||
make_response,
|
||||
read_message,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("agent.lsp.client")
|
||||
|
||||
# Timeouts (seconds) — mirror OpenCode's constants, scaled to seconds.
|
||||
INITIALIZE_TIMEOUT = 45.0
|
||||
DIAGNOSTICS_DOCUMENT_WAIT = 5.0
|
||||
DIAGNOSTICS_FULL_WAIT = 10.0
|
||||
DIAGNOSTICS_REQUEST_TIMEOUT = 3.0
|
||||
PUSH_DEBOUNCE = 0.15
|
||||
SHUTDOWN_GRACE = 1.0 # seconds between SIGTERM and SIGKILL
|
||||
|
||||
# Retry policy for transient ContentModified errors.
|
||||
MAX_CONTENT_MODIFIED_RETRIES = 3
|
||||
RETRY_BASE_DELAY = 0.5 # 0.5, 1.0, 2.0 — exponential
|
||||
|
||||
|
||||
def file_uri(path: str) -> str:
|
||||
"""Return ``file://`` URI for an absolute filesystem path.
|
||||
|
||||
Mirrors Node's ``pathToFileURL`` — handles spaces, unicode, and
|
||||
Windows drive letters (``C:\\foo`` → ``file:///C:/foo``).
|
||||
"""
|
||||
abs_path = os.path.abspath(path)
|
||||
if os.name == "nt":
|
||||
# Windows: backslash → forward slash, prepend extra slash so
|
||||
# the drive letter shows up as part of the path component.
|
||||
abs_path = abs_path.replace("\\", "/")
|
||||
if not abs_path.startswith("/"):
|
||||
abs_path = "/" + abs_path
|
||||
return "file://" + quote(abs_path, safe="/:")
|
||||
|
||||
|
||||
def uri_to_path(uri: str) -> str:
|
||||
"""Inverse of :func:`file_uri`."""
|
||||
if not uri.startswith("file://"):
|
||||
return uri
|
||||
raw = uri[len("file://"):]
|
||||
if os.name == "nt" and raw.startswith("/") and len(raw) > 2 and raw[2] == ":":
|
||||
raw = raw[1:] # strip leading slash before drive letter
|
||||
return os.path.normpath(unquote(raw))
|
||||
|
||||
|
||||
def _end_position(text: str) -> Dict[str, int]:
|
||||
"""Return the LSP Position at the end of ``text``.
|
||||
|
||||
Used to construct a single-range "replace whole document" change
|
||||
for ``textDocument/didChange`` regardless of the server's declared
|
||||
sync mode.
|
||||
"""
|
||||
if not text:
|
||||
return {"line": 0, "character": 0}
|
||||
lines = text.splitlines(keepends=False)
|
||||
last_line = len(lines) - 1
|
||||
last_col = len(lines[-1]) if lines else 0
|
||||
# If the text ends with a trailing newline, ``splitlines`` won't
|
||||
# represent it. The end position is then the start of the next
|
||||
# (empty) line — line index is len(lines), column 0.
|
||||
if text.endswith(("\n", "\r")):
|
||||
return {"line": last_line + 1, "character": 0}
|
||||
return {"line": last_line, "character": last_col}
|
||||
|
||||
|
||||
class LSPClient:
|
||||
"""Async LSP client tied to one server process and one workspace root.
|
||||
|
||||
Lifecycle:
|
||||
|
||||
c = LSPClient(server_id, workspace_root, command, args, init_options)
|
||||
await c.start() # spawn + initialize
|
||||
ver = await c.open_file("/path/to/foo.py")
|
||||
await c.wait_for_diagnostics("/path/to/foo.py", ver)
|
||||
diags = c.diagnostics_for("/path/to/foo.py")
|
||||
await c.shutdown()
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# construction + lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
server_id: str,
|
||||
workspace_root: str,
|
||||
command: List[str],
|
||||
env: Optional[Dict[str, str]] = None,
|
||||
cwd: Optional[str] = None,
|
||||
initialization_options: Optional[Dict[str, Any]] = None,
|
||||
seed_diagnostics_on_first_push: bool = False,
|
||||
) -> None:
|
||||
self.server_id = server_id
|
||||
self.workspace_root = workspace_root
|
||||
self._command = list(command)
|
||||
self._env = env
|
||||
self._cwd = cwd or workspace_root
|
||||
self._init_options = initialization_options or {}
|
||||
self._seed_first_push = seed_diagnostics_on_first_push
|
||||
|
||||
# Process + streams
|
||||
self._proc: Optional[asyncio.subprocess.Process] = None
|
||||
self._stderr_task: Optional[asyncio.Task] = None
|
||||
self._reader_task: Optional[asyncio.Task] = None
|
||||
|
||||
# Request/response correlation
|
||||
self._next_id: int = 0
|
||||
self._pending: Dict[int, asyncio.Future] = {}
|
||||
|
||||
# Server-side request handlers (server → client requests).
|
||||
# Kept small and explicit; everything else returns method-not-found.
|
||||
self._request_handlers: Dict[str, Callable[[Any], Awaitable[Any]]] = {
|
||||
"window/workDoneProgress/create": self._handle_work_done_create,
|
||||
"workspace/configuration": self._handle_workspace_configuration,
|
||||
"client/registerCapability": self._handle_register_capability,
|
||||
"client/unregisterCapability": self._handle_unregister_capability,
|
||||
"workspace/workspaceFolders": self._handle_workspace_folders,
|
||||
"workspace/diagnostic/refresh": self._handle_diagnostic_refresh,
|
||||
}
|
||||
# Notifications (server → client) we care about.
|
||||
self._notification_handlers: Dict[str, Callable[[Any], None]] = {
|
||||
"textDocument/publishDiagnostics": self._handle_publish_diagnostics,
|
||||
# Everything else (window/showMessage, $/progress, etc.)
|
||||
# is silently dropped by default.
|
||||
}
|
||||
|
||||
# Tracked file state — required for didChange version bumps.
|
||||
self._files: Dict[str, Dict[str, Any]] = {}
|
||||
# Diagnostic stores, keyed by file path (NOT URI).
|
||||
self._push_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
|
||||
self._pull_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
|
||||
# Per-path "last published" time so wait-for-fresh logic works.
|
||||
self._published: Dict[str, float] = {}
|
||||
# Per-path version of the latest push (matches our didChange
|
||||
# version when the server respects it).
|
||||
self._published_version: Dict[str, int] = {}
|
||||
# First-push seen flag, for typescript-style seed-on-first-push.
|
||||
self._first_push_seen: Set[str] = set()
|
||||
# Capability registrations — only diagnostic ones are tracked.
|
||||
self._diagnostic_registrations: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# State machine
|
||||
self._state: str = "stopped"
|
||||
self._initialize_result: Optional[Dict[str, Any]] = None
|
||||
self._sync_kind: int = 1 # 1=Full, 2=Incremental
|
||||
self._stopping: bool = False
|
||||
|
||||
# Push event for waiters.
|
||||
self._push_event = asyncio.Event()
|
||||
# Monotonic counter incremented on every publishDiagnostics push.
|
||||
# Waiters snapshot it on entry and treat any increase as
|
||||
# "something happened, recheck the predicate". Avoids the
|
||||
# asyncio.Event sticky-state trap.
|
||||
self._push_counter = 0
|
||||
# Registration change event so wait_for_diagnostics can re-loop
|
||||
# when the server announces a new dynamic provider.
|
||||
self._registration_event = asyncio.Event()
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._state == "running" and self._proc is not None and self._proc.returncode is None
|
||||
|
||||
@property
|
||||
def state(self) -> str:
|
||||
return self._state
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Spawn the server and complete the initialize handshake.
|
||||
|
||||
Raises any exception encountered during spawn/init. On failure
|
||||
the process is killed and the client is left in state
|
||||
``"error"`` — re-call ``start()`` to retry.
|
||||
"""
|
||||
if self._state in ("running", "starting"):
|
||||
return
|
||||
self._state = "starting"
|
||||
try:
|
||||
await self._spawn()
|
||||
await self._initialize()
|
||||
self._state = "running"
|
||||
except Exception:
|
||||
self._state = "error"
|
||||
await self._cleanup_process()
|
||||
raise
|
||||
|
||||
async def _spawn(self) -> None:
|
||||
env = dict(os.environ)
|
||||
if self._env:
|
||||
env.update(self._env)
|
||||
|
||||
try:
|
||||
self._proc = await asyncio.create_subprocess_exec(
|
||||
self._command[0],
|
||||
*self._command[1:],
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
cwd=self._cwd,
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
raise LSPProtocolError(
|
||||
f"LSP server binary not found: {self._command[0]} ({e})"
|
||||
) from e
|
||||
|
||||
# Drain stderr at debug level — if we don't, the pipe buffer
|
||||
# fills and the server hangs.
|
||||
self._stderr_task = asyncio.create_task(self._drain_stderr())
|
||||
# Start the reader loop.
|
||||
self._reader_task = asyncio.create_task(self._reader_loop())
|
||||
|
||||
async def _drain_stderr(self) -> None:
|
||||
if self._proc is None or self._proc.stderr is None:
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
line = await self._proc.stderr.readline()
|
||||
if not line:
|
||||
break
|
||||
text = line.decode("utf-8", errors="replace").rstrip()
|
||||
if text:
|
||||
logger.debug("[%s] stderr: %s", self.server_id, text[:1000])
|
||||
except (asyncio.CancelledError, OSError):
|
||||
pass
|
||||
|
||||
async def _reader_loop(self) -> None:
|
||||
if self._proc is None or self._proc.stdout is None:
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
msg = await read_message(self._proc.stdout)
|
||||
if msg is None:
|
||||
logger.debug("[%s] server closed stdout cleanly", self.server_id)
|
||||
break
|
||||
kind, key = classify_message(msg)
|
||||
if kind == "response":
|
||||
self._dispatch_response(key, msg)
|
||||
elif kind == "request":
|
||||
asyncio.create_task(self._dispatch_request(key, msg))
|
||||
elif kind == "notification":
|
||||
self._dispatch_notification(key, msg)
|
||||
else:
|
||||
logger.warning("[%s] dropping invalid message: %r", self.server_id, msg)
|
||||
except LSPProtocolError as e:
|
||||
logger.warning("[%s] protocol error in reader loop: %s", self.server_id, e)
|
||||
except (asyncio.CancelledError, OSError):
|
||||
pass
|
||||
finally:
|
||||
# Wake up any pending requests so they can fail fast.
|
||||
for fut in list(self._pending.values()):
|
||||
if not fut.done():
|
||||
fut.set_exception(LSPProtocolError("server connection closed"))
|
||||
self._pending.clear()
|
||||
|
||||
async def _initialize(self) -> None:
|
||||
params = {
|
||||
"rootUri": file_uri(self.workspace_root),
|
||||
"rootPath": self.workspace_root,
|
||||
"processId": os.getpid(),
|
||||
"workspaceFolders": [
|
||||
{"name": "workspace", "uri": file_uri(self.workspace_root)}
|
||||
],
|
||||
"initializationOptions": self._init_options,
|
||||
"capabilities": {
|
||||
"window": {"workDoneProgress": True},
|
||||
"workspace": {
|
||||
"configuration": True,
|
||||
"workspaceFolders": True,
|
||||
"didChangeWatchedFiles": {"dynamicRegistration": True},
|
||||
"diagnostics": {"refreshSupport": False},
|
||||
},
|
||||
"textDocument": {
|
||||
"synchronization": {
|
||||
"dynamicRegistration": False,
|
||||
"didOpen": True,
|
||||
"didChange": True,
|
||||
"didSave": True,
|
||||
"willSave": False,
|
||||
"willSaveWaitUntil": False,
|
||||
},
|
||||
"diagnostic": {
|
||||
"dynamicRegistration": True,
|
||||
"relatedDocumentSupport": True,
|
||||
},
|
||||
"publishDiagnostics": {
|
||||
"relatedInformation": True,
|
||||
"tagSupport": {"valueSet": [1, 2]},
|
||||
"versionSupport": True,
|
||||
"codeDescriptionSupport": True,
|
||||
"dataSupport": False,
|
||||
},
|
||||
"hover": {"contentFormat": ["markdown", "plaintext"]},
|
||||
"definition": {"linkSupport": True},
|
||||
"references": {},
|
||||
"documentSymbol": {"hierarchicalDocumentSymbolSupport": True},
|
||||
},
|
||||
"general": {"positionEncodings": ["utf-16"]},
|
||||
},
|
||||
}
|
||||
|
||||
result = await asyncio.wait_for(
|
||||
self._send_request("initialize", params),
|
||||
timeout=INITIALIZE_TIMEOUT,
|
||||
)
|
||||
self._initialize_result = result
|
||||
self._sync_kind = self._extract_sync_kind(result.get("capabilities") or {})
|
||||
|
||||
await self._send_notification("initialized", {})
|
||||
if self._init_options:
|
||||
# Some servers (vtsls, eslint) want config pushed via
|
||||
# didChangeConfiguration even if it was sent in
|
||||
# initializationOptions.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeConfiguration",
|
||||
{"settings": self._init_options},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _extract_sync_kind(capabilities: dict) -> int:
|
||||
sync = capabilities.get("textDocumentSync")
|
||||
if isinstance(sync, int):
|
||||
return sync
|
||||
if isinstance(sync, dict):
|
||||
change = sync.get("change")
|
||||
if isinstance(change, int):
|
||||
return change
|
||||
return 1 # default to Full
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
"""Best-effort graceful shutdown.
|
||||
|
||||
Sends ``shutdown`` + ``exit``, then SIGTERMs/SIGKILLs the
|
||||
process if it doesn't exit cleanly. Idempotent.
|
||||
"""
|
||||
if self._stopping:
|
||||
return
|
||||
self._stopping = True
|
||||
try:
|
||||
if self.is_running:
|
||||
try:
|
||||
await asyncio.wait_for(self._send_request("shutdown", None), timeout=2.0)
|
||||
except (asyncio.TimeoutError, LSPRequestError, LSPProtocolError):
|
||||
pass
|
||||
try:
|
||||
await self._send_notification("exit", None)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
self._state = "stopped"
|
||||
await self._cleanup_process()
|
||||
|
||||
async def _cleanup_process(self) -> None:
|
||||
if self._reader_task is not None and not self._reader_task.done():
|
||||
self._reader_task.cancel()
|
||||
try:
|
||||
await self._reader_task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
if self._stderr_task is not None and not self._stderr_task.done():
|
||||
self._stderr_task.cancel()
|
||||
try:
|
||||
await self._stderr_task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
proc = self._proc
|
||||
self._proc = None
|
||||
if proc is None:
|
||||
return
|
||||
if proc.returncode is None:
|
||||
try:
|
||||
proc.terminate()
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=SHUTDOWN_GRACE)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# request / notification plumbing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _send_request(self, method: str, params: Any) -> Any:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
raise LSPProtocolError(f"cannot send {method!r}: stdin closed")
|
||||
loop = asyncio.get_running_loop()
|
||||
req_id = self._next_id
|
||||
self._next_id += 1
|
||||
fut: asyncio.Future = loop.create_future()
|
||||
self._pending[req_id] = fut
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_request(req_id, method, params)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError) as e:
|
||||
self._pending.pop(req_id, None)
|
||||
raise LSPProtocolError(f"send failed for {method!r}: {e}") from e
|
||||
try:
|
||||
return await fut
|
||||
finally:
|
||||
self._pending.pop(req_id, None)
|
||||
|
||||
async def _send_request_with_retry(self, method: str, params: Any, *, timeout: float) -> Any:
|
||||
"""Send a request, retrying on ``ContentModified`` (-32801).
|
||||
|
||||
Other errors propagate. The retry policy matches Claude Code's
|
||||
``LSPServerInstance.sendRequest`` — 3 attempts with delays
|
||||
0.5s, 1.0s, 2.0s.
|
||||
"""
|
||||
for attempt in range(MAX_CONTENT_MODIFIED_RETRIES + 1):
|
||||
try:
|
||||
return await asyncio.wait_for(self._send_request(method, params), timeout=timeout)
|
||||
except LSPRequestError as e:
|
||||
if e.code == ERROR_CONTENT_MODIFIED and attempt < MAX_CONTENT_MODIFIED_RETRIES:
|
||||
await asyncio.sleep(RETRY_BASE_DELAY * (2 ** attempt))
|
||||
continue
|
||||
raise
|
||||
|
||||
async def _send_notification(self, method: str, params: Any) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_notification(method, params)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError) as e:
|
||||
logger.debug("[%s] notify %s failed: %s", self.server_id, method, e)
|
||||
|
||||
async def _send_response(self, req_id: Any, result: Any) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_response(req_id, result)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError):
|
||||
pass
|
||||
|
||||
async def _send_error_response(self, req_id: Any, code: int, message: str) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_error_response(req_id, code, message)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError):
|
||||
pass
|
||||
|
||||
def _dispatch_response(self, req_id: int, msg: dict) -> None:
|
||||
fut = self._pending.get(req_id)
|
||||
if fut is None or fut.done():
|
||||
return
|
||||
if "error" in msg:
|
||||
err = msg["error"] or {}
|
||||
fut.set_exception(
|
||||
LSPRequestError(
|
||||
code=int(err.get("code", -32000)),
|
||||
message=str(err.get("message", "unknown")),
|
||||
data=err.get("data"),
|
||||
)
|
||||
)
|
||||
else:
|
||||
fut.set_result(msg.get("result"))
|
||||
|
||||
async def _dispatch_request(self, req_id: Any, msg: dict) -> None:
|
||||
method = msg.get("method", "")
|
||||
params = msg.get("params")
|
||||
handler = self._request_handlers.get(method)
|
||||
if handler is None:
|
||||
await self._send_error_response(req_id, ERROR_METHOD_NOT_FOUND, f"method not found: {method}")
|
||||
return
|
||||
try:
|
||||
result = await handler(params)
|
||||
except Exception as e: # noqa: BLE001 — protocol must not blow up
|
||||
logger.warning("[%s] request handler %s failed: %s", self.server_id, method, e)
|
||||
await self._send_error_response(req_id, -32000, f"handler failed: {e}")
|
||||
return
|
||||
await self._send_response(req_id, result)
|
||||
|
||||
def _dispatch_notification(self, method: str, msg: dict) -> None:
|
||||
handler = self._notification_handlers.get(method)
|
||||
if handler is None:
|
||||
return
|
||||
try:
|
||||
handler(msg.get("params"))
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("[%s] notification handler %s failed: %s", self.server_id, method, e)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# built-in server-→-client request handlers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _handle_work_done_create(self, params: Any) -> Any:
|
||||
# Acknowledge progress tokens — required by some servers.
|
||||
return None
|
||||
|
||||
async def _handle_workspace_configuration(self, params: Any) -> Any:
|
||||
# Walk dotted sections through initializationOptions. Mirrors
|
||||
# OpenCode's `client.ts:198-220` — return null when missing.
|
||||
if not isinstance(params, dict):
|
||||
return [None]
|
||||
items = params.get("items") or []
|
||||
out: List[Any] = []
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
out.append(None)
|
||||
continue
|
||||
section = item.get("section")
|
||||
if not section or not self._init_options:
|
||||
out.append(self._init_options or None)
|
||||
continue
|
||||
cur: Any = self._init_options
|
||||
for part in str(section).split("."):
|
||||
if isinstance(cur, dict) and part in cur:
|
||||
cur = cur[part]
|
||||
else:
|
||||
cur = None
|
||||
break
|
||||
out.append(cur)
|
||||
return out
|
||||
|
||||
async def _handle_register_capability(self, params: Any) -> Any:
|
||||
if not isinstance(params, dict):
|
||||
return None
|
||||
for reg in params.get("registrations") or []:
|
||||
if not isinstance(reg, dict):
|
||||
continue
|
||||
method = reg.get("method")
|
||||
reg_id = reg.get("id")
|
||||
if method == "textDocument/diagnostic" and reg_id:
|
||||
self._diagnostic_registrations[str(reg_id)] = reg
|
||||
self._registration_event.set()
|
||||
return None
|
||||
|
||||
async def _handle_unregister_capability(self, params: Any) -> Any:
|
||||
if not isinstance(params, dict):
|
||||
return None
|
||||
for unreg in params.get("unregisterations") or []:
|
||||
if not isinstance(unreg, dict):
|
||||
continue
|
||||
reg_id = unreg.get("id")
|
||||
if reg_id:
|
||||
self._diagnostic_registrations.pop(str(reg_id), None)
|
||||
return None
|
||||
|
||||
async def _handle_workspace_folders(self, params: Any) -> Any:
|
||||
return [{"name": "workspace", "uri": file_uri(self.workspace_root)}]
|
||||
|
||||
async def _handle_diagnostic_refresh(self, params: Any) -> Any:
|
||||
# We don't honour refresh — we re-pull on every touchFile.
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# publishDiagnostics handler
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _handle_publish_diagnostics(self, params: Any) -> None:
|
||||
if not isinstance(params, dict):
|
||||
return
|
||||
uri = params.get("uri")
|
||||
if not isinstance(uri, str):
|
||||
return
|
||||
path = uri_to_path(uri)
|
||||
diagnostics = params.get("diagnostics") or []
|
||||
if not isinstance(diagnostics, list):
|
||||
diagnostics = []
|
||||
version = params.get("version")
|
||||
loop_time = asyncio.get_event_loop().time()
|
||||
|
||||
if self._seed_first_push and path not in self._first_push_seen:
|
||||
# First push: seed without firing the event so a waiter
|
||||
# doesn't resolve on the very first push (which arrives
|
||||
# before the user-triggered didChange could've produced
|
||||
# fresh diagnostics).
|
||||
self._first_push_seen.add(path)
|
||||
self._push_diagnostics[path] = diagnostics
|
||||
self._published[path] = loop_time
|
||||
if isinstance(version, int):
|
||||
self._published_version[path] = version
|
||||
return
|
||||
|
||||
self._push_diagnostics[path] = diagnostics
|
||||
self._published[path] = loop_time
|
||||
if isinstance(version, int):
|
||||
self._published_version[path] = version
|
||||
self._first_push_seen.add(path)
|
||||
# Bump the monotonic push counter and wake every waiter. We
|
||||
# keep the Event sticky-set so any wait already in progress
|
||||
# resolves; waiters re-check their predicate after waking and
|
||||
# decide whether to keep waiting. ``_push_counter`` is what
|
||||
# they actually compare against to detect a fresh event.
|
||||
self._push_counter += 1
|
||||
self._push_event.set()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# public file-sync API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def open_file(self, path: str, *, language_id: str = "plaintext") -> int:
|
||||
"""Send didOpen (first time) or didChange (subsequent) for ``path``.
|
||||
|
||||
Returns the new document version number that the agent's
|
||||
``wait_for_diagnostics`` should match against.
|
||||
"""
|
||||
if not self.is_running:
|
||||
raise LSPProtocolError("client not running")
|
||||
|
||||
abs_path = os.path.abspath(path)
|
||||
try:
|
||||
text = Path(abs_path).read_text(encoding="utf-8", errors="replace")
|
||||
except OSError as e:
|
||||
raise LSPProtocolError(f"cannot read {abs_path}: {e}") from e
|
||||
|
||||
uri = file_uri(abs_path)
|
||||
existing = self._files.get(abs_path)
|
||||
|
||||
if existing is not None:
|
||||
# Re-open: bump version, fire didChangeWatchedFiles + didChange.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeWatchedFiles",
|
||||
{"changes": [{"uri": uri, "type": 2}]}, # 2 = CHANGED
|
||||
)
|
||||
new_version = existing["version"] + 1
|
||||
old_text = existing["text"]
|
||||
content_changes: List[Dict[str, Any]]
|
||||
if self._sync_kind == 2:
|
||||
content_changes = [
|
||||
{
|
||||
"range": {
|
||||
"start": {"line": 0, "character": 0},
|
||||
"end": _end_position(old_text),
|
||||
},
|
||||
"text": text,
|
||||
}
|
||||
]
|
||||
else:
|
||||
content_changes = [{"text": text}]
|
||||
await self._send_notification(
|
||||
"textDocument/didChange",
|
||||
{
|
||||
"textDocument": {"uri": uri, "version": new_version},
|
||||
"contentChanges": content_changes,
|
||||
},
|
||||
)
|
||||
self._files[abs_path] = {"version": new_version, "text": text}
|
||||
return new_version
|
||||
|
||||
# First open: didChangeWatchedFiles CREATED + didOpen.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeWatchedFiles",
|
||||
{"changes": [{"uri": uri, "type": 1}]}, # 1 = CREATED
|
||||
)
|
||||
# Clear any stale push/pull entries — fresh open should start
|
||||
# from scratch.
|
||||
self._push_diagnostics.pop(abs_path, None)
|
||||
self._pull_diagnostics.pop(abs_path, None)
|
||||
self._published.pop(abs_path, None)
|
||||
self._published_version.pop(abs_path, None)
|
||||
await self._send_notification(
|
||||
"textDocument/didOpen",
|
||||
{
|
||||
"textDocument": {
|
||||
"uri": uri,
|
||||
"languageId": language_id,
|
||||
"version": 0,
|
||||
"text": text,
|
||||
}
|
||||
},
|
||||
)
|
||||
self._files[abs_path] = {"version": 0, "text": text}
|
||||
return 0
|
||||
|
||||
async def save_file(self, path: str) -> None:
|
||||
"""Send didSave for ``path``. Some linters re-scan only on save."""
|
||||
if not self.is_running:
|
||||
return
|
||||
abs_path = os.path.abspath(path)
|
||||
await self._send_notification(
|
||||
"textDocument/didSave",
|
||||
{"textDocument": {"uri": file_uri(abs_path)}},
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# diagnostics: pull + wait
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _pull_document_diagnostics(self, path: str) -> None:
|
||||
"""Send ``textDocument/diagnostic`` for one file.
|
||||
|
||||
Stores results into :attr:`_pull_diagnostics`. Silently
|
||||
no-ops on errors (server may not support the pull endpoint).
|
||||
"""
|
||||
try:
|
||||
params: Dict[str, Any] = {
|
||||
"textDocument": {"uri": file_uri(os.path.abspath(path))}
|
||||
}
|
||||
result = await self._send_request_with_retry(
|
||||
"textDocument/diagnostic",
|
||||
params,
|
||||
timeout=DIAGNOSTICS_REQUEST_TIMEOUT,
|
||||
)
|
||||
except (LSPRequestError, LSPProtocolError, asyncio.TimeoutError) as e:
|
||||
logger.debug("[%s] document diagnostic pull failed: %s", self.server_id, e)
|
||||
return
|
||||
if not isinstance(result, dict):
|
||||
return
|
||||
items = result.get("items")
|
||||
if isinstance(items, list):
|
||||
self._pull_diagnostics[os.path.abspath(path)] = items
|
||||
related = result.get("relatedDocuments")
|
||||
if isinstance(related, dict):
|
||||
for uri, sub in related.items():
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_items = sub.get("items")
|
||||
if isinstance(sub_items, list):
|
||||
self._pull_diagnostics[uri_to_path(uri)] = sub_items
|
||||
|
||||
async def wait_for_diagnostics(
|
||||
self,
|
||||
path: str,
|
||||
version: int,
|
||||
*,
|
||||
mode: str = "document",
|
||||
) -> None:
|
||||
"""Wait for the server to publish diagnostics for ``path`` at ``version``.
|
||||
|
||||
``mode`` is ``"document"`` (5s budget, document pulls) or
|
||||
``"full"`` (10s budget, also workspace pulls). Best-effort —
|
||||
returns silently on timeout. Does NOT throw if the server
|
||||
doesn't support pull diagnostics; we still get the push side.
|
||||
"""
|
||||
budget = DIAGNOSTICS_FULL_WAIT if mode == "full" else DIAGNOSTICS_DOCUMENT_WAIT
|
||||
deadline = asyncio.get_event_loop().time() + budget
|
||||
abs_path = os.path.abspath(path)
|
||||
|
||||
while True:
|
||||
remaining = deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
return
|
||||
|
||||
# Concurrent: document pull + push wait.
|
||||
pull_task = asyncio.create_task(self._pull_document_diagnostics(abs_path))
|
||||
push_task = asyncio.create_task(self._wait_for_fresh_push(abs_path, version, remaining))
|
||||
done, pending = await asyncio.wait(
|
||||
{pull_task, push_task},
|
||||
timeout=remaining,
|
||||
return_when=asyncio.FIRST_COMPLETED,
|
||||
)
|
||||
for t in pending:
|
||||
t.cancel()
|
||||
for t in pending:
|
||||
try:
|
||||
await t
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
|
||||
# If we got a fresh push for our version, we're done.
|
||||
current_v = self._published_version.get(abs_path)
|
||||
if abs_path in self._published and (
|
||||
current_v is None or current_v >= version
|
||||
):
|
||||
return
|
||||
|
||||
# Pull may have populated _pull_diagnostics — that's also
|
||||
# success.
|
||||
if abs_path in self._pull_diagnostics:
|
||||
return
|
||||
|
||||
# Loop until budget runs out.
|
||||
|
||||
async def _wait_for_fresh_push(self, path: str, version: int, timeout: float) -> None:
|
||||
"""Wait until a publishDiagnostics arrives for ``path`` at ``version``+."""
|
||||
deadline = asyncio.get_event_loop().time() + timeout
|
||||
baseline = self._push_counter
|
||||
while True:
|
||||
current_v = self._published_version.get(path)
|
||||
if path in self._published and (current_v is None or current_v >= version):
|
||||
# Debounce — wait a tick in case more diagnostics arrive
|
||||
# immediately after. TS often emits in pairs. We
|
||||
# snapshot the counter so we wake on a *new* push, not
|
||||
# on the one that satisfied us a moment ago.
|
||||
debounce_baseline = self._push_counter
|
||||
debounce_deadline = asyncio.get_event_loop().time() + PUSH_DEBOUNCE
|
||||
while self._push_counter == debounce_baseline:
|
||||
remaining = debounce_deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
break
|
||||
self._push_event.clear()
|
||||
try:
|
||||
await asyncio.wait_for(self._push_event.wait(), timeout=remaining)
|
||||
except asyncio.TimeoutError:
|
||||
break
|
||||
return
|
||||
remaining = deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
return
|
||||
if self._push_counter > baseline:
|
||||
# New event arrived but predicate still false — re-check
|
||||
# immediately without waiting again.
|
||||
baseline = self._push_counter
|
||||
continue
|
||||
self._push_event.clear()
|
||||
try:
|
||||
await asyncio.wait_for(self._push_event.wait(), timeout=min(remaining, 0.5))
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
|
||||
def diagnostics_for(self, path: str) -> List[Dict[str, Any]]:
|
||||
"""Return current merged + deduped diagnostics for one file.
|
||||
|
||||
Diagnostics from push and pull stores are concatenated and
|
||||
deduplicated by ``(severity, code, message, range)`` content
|
||||
key. Empty list if the server hasn't published anything.
|
||||
"""
|
||||
abs_path = os.path.abspath(path)
|
||||
push = self._push_diagnostics.get(abs_path) or []
|
||||
pull = self._pull_diagnostics.get(abs_path) or []
|
||||
return _dedupe(push, pull)
|
||||
|
||||
|
||||
def _dedupe(*lists: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
seen: Set[str] = set()
|
||||
out: List[Dict[str, Any]] = []
|
||||
for lst in lists:
|
||||
for d in lst:
|
||||
if not isinstance(d, dict):
|
||||
continue
|
||||
key = _diagnostic_key(d)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
|
||||
def _diagnostic_key(d: Dict[str, Any]) -> str:
|
||||
"""Content-equality key for a diagnostic.
|
||||
|
||||
Matches the structural-equality used in claude-code's
|
||||
``areDiagnosticsEqual`` — message + severity + source + code +
|
||||
range coords. The range is reduced to a tuple to keep the key
|
||||
stable across dict orderings.
|
||||
"""
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
end = rng.get("end") or {}
|
||||
code = d.get("code")
|
||||
if code is not None and not isinstance(code, str):
|
||||
code = str(code)
|
||||
return "\x00".join(
|
||||
[
|
||||
str(d.get("severity") or 1),
|
||||
str(code or ""),
|
||||
str(d.get("source") or ""),
|
||||
str(d.get("message") or "").strip(),
|
||||
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"LSPClient",
|
||||
"file_uri",
|
||||
"uri_to_path",
|
||||
"INITIALIZE_TIMEOUT",
|
||||
"DIAGNOSTICS_DOCUMENT_WAIT",
|
||||
"DIAGNOSTICS_FULL_WAIT",
|
||||
]
|
||||
@@ -1,213 +0,0 @@
|
||||
"""Structured logging with steady-state silence for the LSP layer.
|
||||
|
||||
The LSP layer fires on every write_file/patch. In a busy session
|
||||
that's hundreds of events. We want users to be able to ``rg`` the
|
||||
log for "did LSP fire on that edit?" without drowning in noise.
|
||||
|
||||
The level model:
|
||||
|
||||
- ``DEBUG`` for steady-state events that have no novel signal:
|
||||
``clean``, ``feature off``, ``extension not mapped``, ``no project
|
||||
root for already-announced file``, ``server unavailable for
|
||||
already-announced binary``. These never reach ``agent.log`` at the
|
||||
default INFO threshold.
|
||||
|
||||
- ``INFO`` for state transitions worth surfacing exactly once per
|
||||
session: ``active for <root>`` the first time a (server_id,
|
||||
workspace_root) client starts, ``no project root for <path>``
|
||||
the first time we see that file. Plus every diagnostic event
|
||||
(those are inherently rare and per-edit, exactly what users grep
|
||||
for).
|
||||
|
||||
- ``WARNING`` for action-required failures: ``server unavailable``
|
||||
(binary not on PATH) the first time per (server_id, binary),
|
||||
``no server configured`` once per language. Per-call WARNING for
|
||||
timeouts and unexpected bridge exceptions.
|
||||
|
||||
The dedup is in-process module-level sets. Each set grows at most by
|
||||
the number of distinct (server_id, root) and (server_id, binary)
|
||||
pairs touched in one Python process — bytes of memory in even an
|
||||
aggressive monorepo session. Bounded LRU was rejected: evicting an
|
||||
entry would risk re-firing the WARNING/INFO line we explicitly want
|
||||
to suppress.
|
||||
|
||||
Grep recipe::
|
||||
|
||||
tail -f ~/.hermes/logs/agent.log | rg 'lsp\\['
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from typing import Tuple
|
||||
|
||||
# Dedicated logger name so the documented grep recipe survives a
|
||||
# ``logging.getLogger(__name__)`` rename of any internal module.
|
||||
event_log = logging.getLogger("hermes.lint.lsp")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Once-per-X dedup sets
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_announce_lock = threading.Lock()
|
||||
_announced_active: set = set() # keys: (server_id, workspace_root)
|
||||
_announced_unavailable: set = set() # keys: (server_id, binary_path_or_name)
|
||||
_announced_no_root: set = set() # keys: (server_id, file_path)
|
||||
_announced_no_server: set = set() # keys: (server_id,)
|
||||
|
||||
|
||||
def _short_path(file_path: str) -> str:
|
||||
"""Render *file_path* relative to the cwd when sensible, else absolute.
|
||||
|
||||
Keeps log lines readable for the common case (the user is inside
|
||||
the project they're editing) without emitting brittle ``../../..``
|
||||
chains for the cross-tree case.
|
||||
"""
|
||||
if not file_path:
|
||||
return file_path
|
||||
try:
|
||||
rel = os.path.relpath(file_path)
|
||||
except ValueError:
|
||||
return file_path
|
||||
if rel.startswith(".." + os.sep) or rel == "..":
|
||||
return file_path
|
||||
return rel
|
||||
|
||||
|
||||
def _emit(server_id: str, level: int, message: str) -> None:
|
||||
event_log.log(level, "lsp[%s] %s", server_id, message)
|
||||
|
||||
|
||||
def _announce_once(bucket: set, key: Tuple) -> bool:
|
||||
"""Return True if *key* has not been announced for *bucket* yet.
|
||||
|
||||
Atomically marks the key as announced so concurrent callers
|
||||
cannot both win the race and double-log.
|
||||
"""
|
||||
with _announce_lock:
|
||||
if key in bucket:
|
||||
return False
|
||||
bucket.add(key)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public event helpers — call these from the LSP layer.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def log_clean(server_id: str, file_path: str) -> None:
|
||||
"""No diagnostics emitted for *file_path*. DEBUG (silent at default)."""
|
||||
_emit(server_id, logging.DEBUG, f"clean ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_disabled(server_id: str, file_path: str, reason: str) -> None:
|
||||
"""LSP intentionally skipped for this file (feature off, ext unmapped,
|
||||
backend not local, etc.). DEBUG."""
|
||||
_emit(server_id, logging.DEBUG, f"skipped: {reason} ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_active(server_id: str, workspace_root: str) -> None:
|
||||
"""A new LSP client started for (server_id, workspace_root).
|
||||
|
||||
INFO once per (server_id, workspace_root); DEBUG thereafter.
|
||||
Lets users verify "is LSP actually running?" with a single grep.
|
||||
"""
|
||||
key = (server_id, workspace_root)
|
||||
if _announce_once(_announced_active, key):
|
||||
_emit(server_id, logging.INFO, f"active for {workspace_root}")
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"reused client for {workspace_root}")
|
||||
|
||||
|
||||
def log_diagnostics(server_id: str, file_path: str, count: int) -> None:
|
||||
"""Diagnostics arrived for a file. INFO every time — these are the
|
||||
failure signals users actually want to grep for, and they are
|
||||
inherently rare per edit."""
|
||||
_emit(server_id, logging.INFO, f"{count} diags ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_no_project_root(server_id: str, file_path: str) -> None:
|
||||
"""File had no recognised project marker. INFO once per file,
|
||||
DEBUG thereafter."""
|
||||
key = (server_id, file_path)
|
||||
if _announce_once(_announced_no_root, key):
|
||||
_emit(server_id, logging.INFO, f"no project root for {_short_path(file_path)}")
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"no project root for {_short_path(file_path)}")
|
||||
|
||||
|
||||
def log_server_unavailable(server_id: str, binary_or_pkg: str) -> None:
|
||||
"""The server binary couldn't be resolved. WARNING once per
|
||||
(server_id, binary), DEBUG thereafter so a hundred subsequent
|
||||
.py edits don't spam the log."""
|
||||
key = (server_id, binary_or_pkg)
|
||||
if _announce_once(_announced_unavailable, key):
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"server unavailable: {binary_or_pkg} not found "
|
||||
"(install via `hermes lsp install <id>` or set lsp.servers.<id>.command)",
|
||||
)
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"server still unavailable: {binary_or_pkg}")
|
||||
|
||||
|
||||
def log_no_server_configured(server_id: str) -> None:
|
||||
"""No spawn recipe for this language. WARNING once."""
|
||||
if _announce_once(_announced_no_server, (server_id,)):
|
||||
_emit(server_id, logging.WARNING, "no server configured")
|
||||
|
||||
|
||||
def log_timeout(server_id: str, file_path: str, kind: str = "diagnostics") -> None:
|
||||
"""A request to the server timed out. WARNING every time — these are
|
||||
inherently novel events worth surfacing on each occurrence."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"{kind} timed out for {_short_path(file_path)}",
|
||||
)
|
||||
|
||||
|
||||
def log_server_error(server_id: str, file_path: str, exc: BaseException) -> None:
|
||||
"""An unexpected exception bubbled out of the LSP layer. WARNING."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"unexpected error for {_short_path(file_path)}: {type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
def log_spawn_failed(server_id: str, workspace_root: str, exc: BaseException) -> None:
|
||||
"""The LSP server failed to spawn or initialize. WARNING."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"spawn/initialize failed for {workspace_root}: {type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
def reset_announce_caches() -> None:
|
||||
"""Test-only: clear the dedup caches. Production code never calls this."""
|
||||
with _announce_lock:
|
||||
_announced_active.clear()
|
||||
_announced_unavailable.clear()
|
||||
_announced_no_root.clear()
|
||||
_announced_no_server.clear()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"event_log",
|
||||
"log_clean",
|
||||
"log_disabled",
|
||||
"log_active",
|
||||
"log_diagnostics",
|
||||
"log_no_project_root",
|
||||
"log_server_unavailable",
|
||||
"log_no_server_configured",
|
||||
"log_timeout",
|
||||
"log_server_error",
|
||||
"log_spawn_failed",
|
||||
"reset_announce_caches",
|
||||
]
|
||||
@@ -1,376 +0,0 @@
|
||||
"""Auto-installation of LSP server binaries.
|
||||
|
||||
Tries to install missing servers using whatever package manager is
|
||||
appropriate. All installs go to a Hermes-owned bin staging dir,
|
||||
``<HERMES_HOME>/lsp/bin/``, so we don't pollute the user's global
|
||||
toolchain.
|
||||
|
||||
Strategies:
|
||||
|
||||
- ``auto`` — attempt to install with the best available package
|
||||
manager. This is the default.
|
||||
- ``manual`` — never install; if a binary is missing, the server is
|
||||
silently skipped and the user is told about it via ``hermes lsp
|
||||
status``.
|
||||
- ``off`` — same as ``manual`` for now (kept distinct so we can
|
||||
evolve behavior later, e.g. logging differently).
|
||||
|
||||
The actual installs happen synchronously the first time a server is
|
||||
needed and concurrent calls to :func:`try_install` for the same
|
||||
package are deduplicated via a per-package lock.
|
||||
|
||||
Failure modes are non-fatal: every install path is wrapped in
|
||||
try/except and returns ``None`` on failure. The tool layer then
|
||||
falls back to its in-process syntax checker, exactly as if the user
|
||||
hadn't enabled LSP at all.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger("agent.lsp.install")
|
||||
|
||||
# Package-name → install-strategy hint registry. Each entry is a
|
||||
# tuple of strategy name + package name + executable name. When the
|
||||
# install completes, we look for the executable in
|
||||
# ``<HERMES_HOME>/lsp/bin/`` first, then on PATH.
|
||||
#
|
||||
# Optional fields:
|
||||
# - ``extra_pkgs``: list of sibling packages to install alongside
|
||||
# ``pkg`` in the same node_modules tree. Used when an LSP server
|
||||
# has a runtime peer dependency that npm doesn't auto-pull (e.g.
|
||||
# typescript-language-server needs ``typescript``).
|
||||
INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
|
||||
# Python
|
||||
"pyright": {"strategy": "npm", "pkg": "pyright", "bin": "pyright-langserver"},
|
||||
# JS/TS family
|
||||
"typescript-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "typescript-language-server",
|
||||
"bin": "typescript-language-server",
|
||||
# typescript-language-server requires the `typescript` SDK
|
||||
# (tsserver) to be importable from the same node_modules tree;
|
||||
# otherwise initialize() fails with "Could not find a valid
|
||||
# TypeScript installation". Install them together.
|
||||
"extra_pkgs": ["typescript"],
|
||||
},
|
||||
"@vue/language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "@vue/language-server",
|
||||
"bin": "vue-language-server",
|
||||
},
|
||||
"svelte-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "svelte-language-server",
|
||||
"bin": "svelteserver",
|
||||
},
|
||||
"@astrojs/language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "@astrojs/language-server",
|
||||
"bin": "astro-ls",
|
||||
},
|
||||
"yaml-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "yaml-language-server",
|
||||
"bin": "yaml-language-server",
|
||||
},
|
||||
"bash-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "bash-language-server",
|
||||
"bin": "bash-language-server",
|
||||
},
|
||||
"intelephense": {"strategy": "npm", "pkg": "intelephense", "bin": "intelephense"},
|
||||
"dockerfile-language-server-nodejs": {
|
||||
"strategy": "npm",
|
||||
"pkg": "dockerfile-language-server-nodejs",
|
||||
"bin": "docker-langserver",
|
||||
},
|
||||
# Go
|
||||
"gopls": {"strategy": "go", "pkg": "golang.org/x/tools/gopls@latest", "bin": "gopls"},
|
||||
# Rust — too heavy (hundreds of MB to bootstrap). We do NOT
|
||||
# auto-install rust-analyzer; users install via rustup.
|
||||
"rust-analyzer": {"strategy": "manual", "pkg": "", "bin": "rust-analyzer"},
|
||||
# C/C++ — manual (clangd ships with LLVM, very heavy)
|
||||
"clangd": {"strategy": "manual", "pkg": "", "bin": "clangd"},
|
||||
# Lua — manual (LuaLS is platform-specific binaries from GitHub
|
||||
# releases; complex enough that we punt to the user)
|
||||
"lua-language-server": {"strategy": "manual", "pkg": "", "bin": "lua-language-server"},
|
||||
}
|
||||
|
||||
|
||||
_install_locks: Dict[str, threading.Lock] = {}
|
||||
_install_results: Dict[str, Optional[str]] = {}
|
||||
_install_lock_meta = threading.Lock()
|
||||
|
||||
|
||||
def hermes_lsp_bin_dir() -> Path:
|
||||
"""Return the Hermes-owned bin staging dir for LSP servers."""
|
||||
home = os.environ.get("HERMES_HOME")
|
||||
if home is None:
|
||||
home = os.path.join(os.path.expanduser("~"), ".hermes")
|
||||
p = Path(home) / "lsp" / "bin"
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return p
|
||||
|
||||
|
||||
def _existing_binary(name: str) -> Optional[str]:
|
||||
"""Probe the staging dir + PATH for a binary named ``name``."""
|
||||
staged = hermes_lsp_bin_dir() / name
|
||||
if staged.exists() and os.access(staged, os.X_OK):
|
||||
return str(staged)
|
||||
on_path = shutil.which(name)
|
||||
if on_path:
|
||||
return on_path
|
||||
return None
|
||||
|
||||
|
||||
def _get_lock(pkg: str) -> threading.Lock:
|
||||
with _install_lock_meta:
|
||||
lock = _install_locks.get(pkg)
|
||||
if lock is None:
|
||||
lock = threading.Lock()
|
||||
_install_locks[pkg] = lock
|
||||
return lock
|
||||
|
||||
|
||||
def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
|
||||
"""Try to install ``pkg`` and return the binary path if successful.
|
||||
|
||||
``strategy`` is ``"auto"``, ``"manual"``, or ``"off"``. In
|
||||
``manual``/``off`` mode, this function only probes for an
|
||||
existing binary and returns ``None`` if not found.
|
||||
|
||||
The install is cached per-package — a second call returns the
|
||||
same path (or ``None``) without reinstalling. Concurrent calls
|
||||
are serialized.
|
||||
"""
|
||||
if strategy not in ("auto",):
|
||||
# Only ``auto`` triggers an actual install. In manual/off,
|
||||
# we still check whether the binary already exists.
|
||||
recipe = INSTALL_RECIPES.get(pkg, {})
|
||||
bin_name = recipe.get("bin", pkg)
|
||||
return _existing_binary(bin_name)
|
||||
|
||||
if pkg in _install_results:
|
||||
return _install_results[pkg]
|
||||
|
||||
lock = _get_lock(pkg)
|
||||
with lock:
|
||||
# Double-check after acquiring lock.
|
||||
if pkg in _install_results:
|
||||
return _install_results[pkg]
|
||||
result = _do_install(pkg)
|
||||
_install_results[pkg] = result
|
||||
return result
|
||||
|
||||
|
||||
def _do_install(pkg: str) -> Optional[str]:
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe is None:
|
||||
# Not in our registry — best-effort: just probe PATH.
|
||||
return shutil.which(pkg)
|
||||
|
||||
strategy = recipe.get("strategy", "manual")
|
||||
bin_name = recipe.get("bin", pkg)
|
||||
|
||||
# Check if already present (shutil.which or staging dir)
|
||||
existing = _existing_binary(bin_name)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
if strategy == "manual":
|
||||
logger.debug("[install] %s requires manual install (recipe=%s)", pkg, recipe)
|
||||
return None
|
||||
|
||||
if strategy == "npm":
|
||||
return _install_npm(
|
||||
recipe.get("pkg", pkg),
|
||||
bin_name,
|
||||
extra_pkgs=recipe.get("extra_pkgs") or [],
|
||||
)
|
||||
if strategy == "go":
|
||||
return _install_go(recipe.get("pkg", pkg), bin_name)
|
||||
if strategy == "pip":
|
||||
return _install_pip(recipe.get("pkg", pkg), bin_name)
|
||||
|
||||
logger.warning("[install] unknown strategy %r for %s", strategy, pkg)
|
||||
return None
|
||||
|
||||
|
||||
def _install_npm(
|
||||
pkg: str,
|
||||
bin_name: str,
|
||||
extra_pkgs: Optional[list] = None,
|
||||
) -> Optional[str]:
|
||||
"""Install an npm package into our staging dir.
|
||||
|
||||
Uses ``npm install --prefix`` so the binaries land in
|
||||
``<staging>/node_modules/.bin/<bin_name>`` and we symlink them up
|
||||
one level for direct PATH-style access.
|
||||
|
||||
``extra_pkgs`` is a list of sibling packages to install in the
|
||||
same ``node_modules`` tree. Used for LSP servers with runtime
|
||||
peer deps that npm doesn't auto-pull (typescript-language-server
|
||||
needs ``typescript`` next to it; intelephense ships standalone).
|
||||
"""
|
||||
npm = shutil.which("npm")
|
||||
if npm is None:
|
||||
logger.info("[install] cannot install %s: npm not on PATH", pkg)
|
||||
return None
|
||||
staging = hermes_lsp_bin_dir().parent # <HERMES_HOME>/lsp/
|
||||
install_targets = [pkg] + list(extra_pkgs or [])
|
||||
try:
|
||||
logger.info(
|
||||
"[install] npm install --prefix %s %s",
|
||||
staging,
|
||||
" ".join(install_targets),
|
||||
)
|
||||
proc = subprocess.run(
|
||||
[npm, "install", "--prefix", str(staging), "--silent", "--no-fund", "--no-audit", *install_targets],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] npm install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] npm install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
|
||||
# Find the bin
|
||||
nm_bin = staging / "node_modules" / ".bin" / bin_name
|
||||
if os.name == "nt":
|
||||
# On Windows npm sometimes drops `.cmd` shims
|
||||
candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
|
||||
else:
|
||||
candidates = [nm_bin]
|
||||
for c in candidates:
|
||||
if c.exists():
|
||||
# Symlink into our `lsp/bin/` for stable PATH access.
|
||||
link = hermes_lsp_bin_dir() / c.name
|
||||
if not link.exists():
|
||||
try:
|
||||
link.symlink_to(c)
|
||||
except (OSError, NotImplementedError):
|
||||
# Symlinks fail on some Windows setups — copy instead.
|
||||
try:
|
||||
shutil.copy2(c, link)
|
||||
except OSError:
|
||||
return str(c)
|
||||
return str(link if link.exists() else c)
|
||||
logger.warning("[install] npm install for %s succeeded but bin %s not found", pkg, bin_name)
|
||||
return None
|
||||
|
||||
|
||||
def _install_go(pkg: str, bin_name: str) -> Optional[str]:
|
||||
"""Install a Go module to GOBIN=<staging>."""
|
||||
go = shutil.which("go")
|
||||
if go is None:
|
||||
logger.info("[install] cannot install %s: go not on PATH", pkg)
|
||||
return None
|
||||
staging = hermes_lsp_bin_dir()
|
||||
env = dict(os.environ)
|
||||
env["GOBIN"] = str(staging)
|
||||
try:
|
||||
logger.info("[install] go install %s (GOBIN=%s)", pkg, staging)
|
||||
proc = subprocess.run(
|
||||
[go, "install", pkg],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
env=env,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] go install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] go install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
bin_path = staging / bin_name
|
||||
if os.name == "nt":
|
||||
bin_path = bin_path.with_suffix(".exe")
|
||||
if bin_path.exists():
|
||||
return str(bin_path)
|
||||
logger.warning("[install] go install for %s succeeded but bin %s not found", pkg, bin_name)
|
||||
return None
|
||||
|
||||
|
||||
def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
|
||||
"""Install a Python package into a hermes-owned target dir.
|
||||
|
||||
We avoid polluting the user's site-packages by using
|
||||
``pip install --target``. Bins go into
|
||||
``<staging>/python-packages/bin/`` which we symlink into
|
||||
``<staging>/bin``. Note: this only works for packages that ship a
|
||||
console script.
|
||||
"""
|
||||
pip_target = hermes_lsp_bin_dir().parent / "python-packages"
|
||||
pip_target.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
logger.info("[install] pip install --target %s %s", pip_target, pkg)
|
||||
proc = subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "--target", str(pip_target), "--quiet", pkg],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] pip install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] pip install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
# Look for the script
|
||||
bin_path = pip_target / "bin" / bin_name
|
||||
if bin_path.exists():
|
||||
link = hermes_lsp_bin_dir() / bin_name
|
||||
if not link.exists():
|
||||
try:
|
||||
link.symlink_to(bin_path)
|
||||
except (OSError, NotImplementedError):
|
||||
try:
|
||||
shutil.copy2(bin_path, link)
|
||||
except OSError:
|
||||
return str(bin_path)
|
||||
return str(link if link.exists() else bin_path)
|
||||
return None
|
||||
|
||||
|
||||
def detect_status(pkg: str) -> str:
|
||||
"""Return ``installed``, ``missing``, or ``manual-only`` for a package.
|
||||
|
||||
Used by the ``hermes lsp status`` CLI to give users a quick
|
||||
overview of what's available without spawning anything.
|
||||
"""
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
bin_name = recipe.get("bin", pkg) if recipe else pkg
|
||||
if _existing_binary(bin_name):
|
||||
return "installed"
|
||||
if recipe and recipe.get("strategy") == "manual":
|
||||
return "manual-only"
|
||||
return "missing"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"INSTALL_RECIPES",
|
||||
"try_install",
|
||||
"detect_status",
|
||||
"hermes_lsp_bin_dir",
|
||||
]
|
||||
@@ -1,607 +0,0 @@
|
||||
"""Service-level orchestration for LSP clients.
|
||||
|
||||
The :class:`LSPService` is the bridge between the synchronous
|
||||
file_operations layer and the async :class:`agent.lsp.client.LSPClient`.
|
||||
|
||||
Design choices:
|
||||
|
||||
- A **single asyncio event loop** runs in a background thread. All
|
||||
client work happens on that loop. Synchronous callers from
|
||||
``tools/file_operations.py`` use :meth:`get_diagnostics_sync` to
|
||||
open + wait + drain in one blocking call.
|
||||
|
||||
- One client per ``(server_id, workspace_root)`` key. Lazy spawn:
|
||||
the first request for a key spawns the client; subsequent requests
|
||||
re-use it.
|
||||
|
||||
- A **broken-set** records ``(server_id, workspace_root)`` pairs that
|
||||
failed to spawn or initialize. These are never retried for the
|
||||
life of the service. Mirrors OpenCode's design.
|
||||
|
||||
- A **delta baseline** map keeps "diagnostics-as-of-the-last-snapshot"
|
||||
per file. ``snapshot_baseline()`` is called BEFORE a write; the
|
||||
next ``get_diagnostics_sync()`` returns only diagnostics that
|
||||
weren't in the baseline. This is the lift from Claude Code's
|
||||
``beforeFileEdited`` / ``getNewDiagnostics`` pattern, except wired
|
||||
to the local LSP layer instead of MCP IDE RPC.
|
||||
|
||||
The service is **off by default** — call :meth:`is_active` to check
|
||||
whether it's actually doing anything. When LSP is disabled in
|
||||
config, when no git workspace can be detected, when all configured
|
||||
servers are missing binaries and auto-install is off, ``is_active``
|
||||
returns False and the file_operations layer falls through to the
|
||||
in-process syntax check.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import Future as ConcurrentFuture
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from agent.lsp import eventlog
|
||||
from agent.lsp.client import (
|
||||
DIAGNOSTICS_DOCUMENT_WAIT,
|
||||
LSPClient,
|
||||
file_uri,
|
||||
)
|
||||
from agent.lsp.servers import (
|
||||
ServerContext,
|
||||
ServerDef,
|
||||
SpawnSpec,
|
||||
find_server_for_file,
|
||||
language_id_for,
|
||||
)
|
||||
from agent.lsp.workspace import (
|
||||
clear_cache,
|
||||
is_inside_workspace,
|
||||
resolve_workspace_for_file,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("agent.lsp.manager")
|
||||
|
||||
DEFAULT_IDLE_TIMEOUT = 600 # seconds; servers idle for >10min get reaped
|
||||
|
||||
|
||||
class _BackgroundLoop:
|
||||
"""A daemon thread that owns one asyncio event loop.
|
||||
|
||||
Provides :meth:`run` for synchronous callers — submits a coroutine
|
||||
to the loop and blocks until it finishes (or a timeout fires).
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._ready = threading.Event()
|
||||
|
||||
def start(self) -> None:
|
||||
if self._thread is not None:
|
||||
return
|
||||
self._thread = threading.Thread(
|
||||
target=self._run_forever,
|
||||
name="hermes-lsp-loop",
|
||||
daemon=True,
|
||||
)
|
||||
self._thread.start()
|
||||
self._ready.wait(timeout=5.0)
|
||||
|
||||
def _run_forever(self) -> None:
|
||||
loop = asyncio.new_event_loop()
|
||||
self._loop = loop
|
||||
asyncio.set_event_loop(loop)
|
||||
self._ready.set()
|
||||
try:
|
||||
loop.run_forever()
|
||||
finally:
|
||||
try:
|
||||
loop.close()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
def run(self, coro, *, timeout: Optional[float] = None) -> Any:
|
||||
"""Submit a coroutine to the loop and block until done.
|
||||
|
||||
Returns the coroutine's result, or raises its exception.
|
||||
"""
|
||||
if self._loop is None:
|
||||
raise RuntimeError("background loop not started")
|
||||
fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||
try:
|
||||
return fut.result(timeout=timeout)
|
||||
except Exception:
|
||||
fut.cancel()
|
||||
raise
|
||||
|
||||
def stop(self) -> None:
|
||||
loop = self._loop
|
||||
if loop is None:
|
||||
return
|
||||
try:
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
except RuntimeError:
|
||||
pass
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=2.0)
|
||||
self._loop = None
|
||||
self._thread = None
|
||||
|
||||
|
||||
class LSPService:
|
||||
"""The process-wide LSP service.
|
||||
|
||||
Created once via :meth:`create_from_config`; the
|
||||
:func:`agent.lsp.get_service` accessor manages the singleton.
|
||||
Most callers should use that accessor rather than constructing
|
||||
:class:`LSPService` directly.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# construction + factory
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
enabled: bool,
|
||||
wait_mode: str,
|
||||
wait_timeout: float,
|
||||
install_strategy: str,
|
||||
binary_overrides: Optional[Dict[str, List[str]]] = None,
|
||||
env_overrides: Optional[Dict[str, Dict[str, str]]] = None,
|
||||
init_overrides: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
disabled_servers: Optional[List[str]] = None,
|
||||
idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
|
||||
) -> None:
|
||||
self._enabled = enabled
|
||||
self._wait_mode = wait_mode if wait_mode in ("document", "full") else "document"
|
||||
self._wait_timeout = wait_timeout
|
||||
self._install_strategy = install_strategy
|
||||
self._binary_overrides = binary_overrides or {}
|
||||
self._env_overrides = env_overrides or {}
|
||||
self._init_overrides = init_overrides or {}
|
||||
self._disabled_servers = set(disabled_servers or [])
|
||||
self._idle_timeout = idle_timeout
|
||||
|
||||
self._loop = _BackgroundLoop()
|
||||
if self._enabled:
|
||||
self._loop.start()
|
||||
|
||||
# Per-(server_id, workspace_root) state
|
||||
self._clients: Dict[Tuple[str, str], LSPClient] = {}
|
||||
self._broken: set = set()
|
||||
self._spawning: Dict[Tuple[str, str], asyncio.Future] = {}
|
||||
self._last_used: Dict[Tuple[str, str], float] = {}
|
||||
self._state_lock = threading.Lock()
|
||||
|
||||
# Delta baseline: file path → snapshot of diagnostics taken
|
||||
# immediately before a write. ``get_diagnostics_sync`` filters
|
||||
# out anything in the baseline so the agent only sees errors
|
||||
# introduced by the current edit.
|
||||
self._delta_baseline: Dict[str, List[Dict[str, Any]]] = {}
|
||||
|
||||
@classmethod
|
||||
def create_from_config(cls) -> Optional["LSPService"]:
|
||||
"""Build a service from ``hermes_cli.config`` settings.
|
||||
|
||||
Returns ``None`` if the config can't be loaded. The service
|
||||
itself returns ``is_active()`` False when LSP is disabled.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP config load failed: %s", e)
|
||||
return None
|
||||
|
||||
lsp_cfg = (cfg.get("lsp") or {}) if isinstance(cfg, dict) else {}
|
||||
if not isinstance(lsp_cfg, dict):
|
||||
lsp_cfg = {}
|
||||
|
||||
enabled = bool(lsp_cfg.get("enabled", True))
|
||||
wait_mode = lsp_cfg.get("wait_mode", "document")
|
||||
wait_timeout = float(lsp_cfg.get("wait_timeout", DIAGNOSTICS_DOCUMENT_WAIT))
|
||||
install_strategy = lsp_cfg.get("install_strategy", "auto")
|
||||
servers_cfg = lsp_cfg.get("servers") or {}
|
||||
disabled = []
|
||||
binary_overrides: Dict[str, List[str]] = {}
|
||||
env_overrides: Dict[str, Dict[str, str]] = {}
|
||||
init_overrides: Dict[str, Dict[str, Any]] = {}
|
||||
if isinstance(servers_cfg, dict):
|
||||
for name, sub in servers_cfg.items():
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
if sub.get("disabled"):
|
||||
disabled.append(name)
|
||||
cmd = sub.get("command")
|
||||
if isinstance(cmd, list) and cmd:
|
||||
binary_overrides[name] = cmd
|
||||
env = sub.get("env")
|
||||
if isinstance(env, dict):
|
||||
env_overrides[name] = {k: str(v) for k, v in env.items()}
|
||||
init = sub.get("initialization_options")
|
||||
if isinstance(init, dict):
|
||||
init_overrides[name] = init
|
||||
|
||||
return cls(
|
||||
enabled=enabled,
|
||||
wait_mode=wait_mode,
|
||||
wait_timeout=wait_timeout,
|
||||
install_strategy=install_strategy,
|
||||
binary_overrides=binary_overrides,
|
||||
env_overrides=env_overrides,
|
||||
init_overrides=init_overrides,
|
||||
disabled_servers=disabled,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def is_active(self) -> bool:
|
||||
"""Return True iff this service should be consulted at all."""
|
||||
return self._enabled
|
||||
|
||||
def enabled_for(self, file_path: str) -> bool:
|
||||
"""Return True iff LSP should run for this specific file.
|
||||
|
||||
Gates on workspace detection (file or cwd inside a git worktree),
|
||||
on whether any registered server matches the extension, and
|
||||
on whether the (server_id, workspace_root) pair is in the
|
||||
broken-set from a previous spawn failure.
|
||||
|
||||
Files in already-broken pairs return False so the file_operations
|
||||
layer skips the LSP path entirely — no spawn attempts, no
|
||||
timeout cost — until the service is restarted (``hermes lsp
|
||||
restart``) or the process exits.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return False
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None or srv.server_id in self._disabled_servers:
|
||||
return False
|
||||
ws_root, gated_in = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated_in):
|
||||
return False
|
||||
# Broken-set short-circuit. Use the per-server root if we can
|
||||
# compute one cheaply; otherwise fall back to the workspace
|
||||
# root as the broken key (which is what _get_or_spawn would
|
||||
# have used anyway when it failed).
|
||||
try:
|
||||
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
|
||||
except Exception: # noqa: BLE001
|
||||
per_server_root = ws_root
|
||||
if (srv.server_id, per_server_root) in self._broken:
|
||||
return False
|
||||
return True
|
||||
|
||||
def snapshot_baseline(self, file_path: str) -> None:
|
||||
"""Snapshot current diagnostics for ``file_path`` as the delta baseline.
|
||||
|
||||
Called BEFORE a write so the next ``get_diagnostics_sync()``
|
||||
can filter out pre-existing errors. Best-effort — failures
|
||||
are silently swallowed so a flaky server can't break a write.
|
||||
|
||||
Outer timeouts (e.g. server hangs during initialize) mark the
|
||||
(server_id, workspace_root) pair as broken so subsequent edits
|
||||
skip it instantly instead of re-paying the timeout cost.
|
||||
"""
|
||||
if not self.enabled_for(file_path):
|
||||
return
|
||||
try:
|
||||
diags = self._loop.run(self._snapshot_async(file_path), timeout=8.0)
|
||||
self._delta_baseline[os.path.abspath(file_path)] = diags or []
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("baseline snapshot failed for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
self._delta_baseline[os.path.abspath(file_path)] = []
|
||||
|
||||
def get_diagnostics_sync(
|
||||
self,
|
||||
file_path: str,
|
||||
*,
|
||||
delta: bool = True,
|
||||
timeout: Optional[float] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Synchronously open ``file_path`` in the right server, wait for
|
||||
diagnostics, return them.
|
||||
|
||||
If ``delta`` is True (default), the result is filtered against
|
||||
any baseline previously captured via :meth:`snapshot_baseline`.
|
||||
Diagnostics present in the baseline are removed so the caller
|
||||
only sees errors introduced by the current edit.
|
||||
|
||||
Returns an empty list when LSP is disabled, when no workspace
|
||||
can be detected, when no server matches, or when the server
|
||||
can't be spawned. Never raises.
|
||||
"""
|
||||
if not self.enabled_for(file_path):
|
||||
return []
|
||||
|
||||
# Resolve server_id eagerly so we can emit structured logs even
|
||||
# when the request errors out below.
|
||||
srv = find_server_for_file(file_path)
|
||||
server_id = srv.server_id if srv else "?"
|
||||
|
||||
try:
|
||||
t = timeout if timeout is not None else self._wait_timeout + 2.0
|
||||
diags = self._loop.run(self._open_and_wait_async(file_path), timeout=t) or []
|
||||
except asyncio.TimeoutError as e:
|
||||
eventlog.log_timeout(server_id, file_path)
|
||||
logger.debug("LSP diagnostics timeout for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
return []
|
||||
except Exception as e: # noqa: BLE001
|
||||
eventlog.log_server_error(server_id, file_path, e)
|
||||
logger.debug("LSP diagnostics fetch failed for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
return []
|
||||
|
||||
abs_path = os.path.abspath(file_path)
|
||||
if delta:
|
||||
baseline = self._delta_baseline.get(abs_path) or []
|
||||
if baseline:
|
||||
seen = {_diag_key(d) for d in baseline}
|
||||
diags = [d for d in diags if _diag_key(d) not in seen]
|
||||
# Roll baseline forward — next call returns deltas relative
|
||||
# to the just-emitted state, mirroring claude-code's
|
||||
# diagnosticTracking.
|
||||
try:
|
||||
fresh = self._loop.run(self._current_diags_async(file_path), timeout=2.0) or []
|
||||
except Exception: # noqa: BLE001
|
||||
fresh = []
|
||||
if fresh:
|
||||
self._delta_baseline[abs_path] = fresh
|
||||
|
||||
if diags:
|
||||
eventlog.log_diagnostics(server_id, file_path, len(diags))
|
||||
else:
|
||||
eventlog.log_clean(server_id, file_path)
|
||||
return diags
|
||||
|
||||
def _mark_broken_for_file(self, file_path: str, exc: BaseException) -> None:
|
||||
"""Mark the (server_id, workspace_root) pair as broken so subsequent
|
||||
edits skip it instantly instead of re-paying timeout cost.
|
||||
|
||||
Called when the outer ``_loop.run`` timeout cancels an in-flight
|
||||
spawn/initialize that the inner ``_get_or_spawn`` task was still
|
||||
holding open. Without this, every subsequent write would re-enter
|
||||
the spawn path and re-pay the full ``snapshot_baseline``
|
||||
timeout (8s) until the binary is fixed.
|
||||
|
||||
Also kills any orphan client process that survived the cancelled
|
||||
future, and emits a single eventlog WARNING so the user knows
|
||||
which server gave up.
|
||||
|
||||
``exc`` is whatever exception the outer wrapper caught — used
|
||||
only for logging, never re-raised.
|
||||
"""
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None:
|
||||
return
|
||||
ws_root, gated = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated):
|
||||
return
|
||||
try:
|
||||
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
|
||||
except Exception: # noqa: BLE001
|
||||
per_server_root = ws_root
|
||||
key = (srv.server_id, per_server_root)
|
||||
already_broken = key in self._broken
|
||||
self._broken.add(key)
|
||||
|
||||
# Kill any client we managed to spawn before the timeout. The
|
||||
# cancelled future never reached the broken-set add inside
|
||||
# ``_get_or_spawn`` so the client may still be hanging in
|
||||
# ``_clients`` with a half-initialized state.
|
||||
with self._state_lock:
|
||||
client = self._clients.pop(key, None)
|
||||
if client is not None:
|
||||
try:
|
||||
# Fire-and-forget shutdown — give it a second to cleanup,
|
||||
# but don't block. We're already on a slow path.
|
||||
self._loop.run(client.shutdown(), timeout=1.0)
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
if not already_broken:
|
||||
eventlog.log_spawn_failed(srv.server_id, per_server_root, exc)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Tear down all clients and stop the background loop."""
|
||||
if not self._enabled:
|
||||
return
|
||||
try:
|
||||
self._loop.run(self._shutdown_async(), timeout=10.0)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP shutdown error: %s", e)
|
||||
self._loop.stop()
|
||||
clear_cache()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# async internals
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _snapshot_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
client = await self._get_or_spawn(file_path)
|
||||
if client is None:
|
||||
return []
|
||||
try:
|
||||
version = await client.open_file(file_path, language_id=language_id_for(file_path))
|
||||
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("snapshot open/wait failed: %s", e)
|
||||
return []
|
||||
self._last_used[(client.server_id, client.workspace_root)] = time.time()
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _open_and_wait_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
client = await self._get_or_spawn(file_path)
|
||||
if client is None:
|
||||
return []
|
||||
try:
|
||||
version = await client.open_file(file_path, language_id=language_id_for(file_path))
|
||||
await client.save_file(file_path)
|
||||
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("open/wait failed for %s: %s", file_path, e)
|
||||
return []
|
||||
self._last_used[(client.server_id, client.workspace_root)] = time.time()
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _current_diags_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
ws, gated = resolve_workspace_for_file(file_path)
|
||||
srv = find_server_for_file(file_path)
|
||||
if not (ws and gated and srv):
|
||||
return []
|
||||
with self._state_lock:
|
||||
client = self._clients.get((srv.server_id, ws))
|
||||
if client is None:
|
||||
return []
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _get_or_spawn(self, file_path: str) -> Optional[LSPClient]:
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None:
|
||||
return None
|
||||
if srv.server_id in self._disabled_servers:
|
||||
eventlog.log_disabled(srv.server_id, file_path, "disabled in config")
|
||||
return None
|
||||
ws_root, gated = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated):
|
||||
eventlog.log_no_project_root(srv.server_id, file_path)
|
||||
return None
|
||||
per_server_root = srv.resolve_root(file_path, ws_root)
|
||||
if per_server_root is None:
|
||||
eventlog.log_disabled(
|
||||
srv.server_id, file_path, "exclude marker hit (server gated off)"
|
||||
)
|
||||
return None # exclude marker hit, server gated off
|
||||
|
||||
key = (srv.server_id, per_server_root)
|
||||
if key in self._broken:
|
||||
return None
|
||||
with self._state_lock:
|
||||
client = self._clients.get(key)
|
||||
if client is not None and client.is_running:
|
||||
eventlog.log_active(srv.server_id, per_server_root)
|
||||
return client
|
||||
spawning = self._spawning.get(key)
|
||||
if spawning is not None:
|
||||
try:
|
||||
return await spawning
|
||||
except Exception: # noqa: BLE001
|
||||
return None
|
||||
|
||||
# Begin spawn
|
||||
loop = asyncio.get_running_loop()
|
||||
spawn_future: asyncio.Future = loop.create_future()
|
||||
with self._state_lock:
|
||||
self._spawning[key] = spawn_future
|
||||
try:
|
||||
ctx = ServerContext(
|
||||
workspace_root=per_server_root,
|
||||
install_strategy=self._install_strategy,
|
||||
binary_overrides=self._binary_overrides,
|
||||
env_overrides=self._env_overrides,
|
||||
init_overrides=self._init_overrides,
|
||||
)
|
||||
spec = srv.build_spawn(per_server_root, ctx)
|
||||
if spec is None:
|
||||
# ``build_spawn`` returns None when the binary can't be
|
||||
# located (auto-install disabled, manual-only server,
|
||||
# or install attempt failed). Surface this once via
|
||||
# the structured logger so the user can act on it.
|
||||
eventlog.log_server_unavailable(srv.server_id, srv.server_id)
|
||||
self._broken.add(key)
|
||||
spawn_future.set_result(None)
|
||||
return None
|
||||
client = LSPClient(
|
||||
server_id=srv.server_id,
|
||||
workspace_root=spec.workspace_root,
|
||||
command=spec.command,
|
||||
env=spec.env,
|
||||
cwd=spec.cwd,
|
||||
initialization_options=spec.initialization_options,
|
||||
seed_diagnostics_on_first_push=spec.seed_diagnostics_on_first_push or srv.seed_first_push,
|
||||
)
|
||||
try:
|
||||
await client.start()
|
||||
except Exception as e: # noqa: BLE001
|
||||
eventlog.log_spawn_failed(srv.server_id, per_server_root, e)
|
||||
self._broken.add(key)
|
||||
spawn_future.set_result(None)
|
||||
return None
|
||||
with self._state_lock:
|
||||
self._clients[key] = client
|
||||
self._last_used[key] = time.time()
|
||||
eventlog.log_active(srv.server_id, per_server_root)
|
||||
spawn_future.set_result(client)
|
||||
return client
|
||||
finally:
|
||||
with self._state_lock:
|
||||
self._spawning.pop(key, None)
|
||||
|
||||
async def _shutdown_async(self) -> None:
|
||||
with self._state_lock:
|
||||
clients = list(self._clients.values())
|
||||
self._clients.clear()
|
||||
self._broken.clear()
|
||||
self._last_used.clear()
|
||||
await asyncio.gather(
|
||||
*(c.shutdown() for c in clients),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# status / introspection (used by ``hermes lsp status``)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Return a snapshot of the service for the CLI status command."""
|
||||
with self._state_lock:
|
||||
clients = [
|
||||
{
|
||||
"server_id": k[0],
|
||||
"workspace_root": k[1],
|
||||
"state": c.state,
|
||||
"running": c.is_running,
|
||||
}
|
||||
for k, c in self._clients.items()
|
||||
]
|
||||
broken = list(self._broken)
|
||||
return {
|
||||
"enabled": self._enabled,
|
||||
"wait_mode": self._wait_mode,
|
||||
"wait_timeout": self._wait_timeout,
|
||||
"install_strategy": self._install_strategy,
|
||||
"clients": clients,
|
||||
"broken": broken,
|
||||
"disabled_servers": sorted(self._disabled_servers),
|
||||
}
|
||||
|
||||
|
||||
def _diag_key(d: Dict[str, Any]) -> str:
|
||||
"""Content equality key used for delta filtering. Mirrors
|
||||
:func:`agent.lsp.client._diagnostic_key`."""
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
end = rng.get("end") or {}
|
||||
code = d.get("code")
|
||||
if code is not None and not isinstance(code, str):
|
||||
code = str(code)
|
||||
return "\x00".join(
|
||||
[
|
||||
str(d.get("severity") or 1),
|
||||
str(code or ""),
|
||||
str(d.get("source") or ""),
|
||||
str(d.get("message") or "").strip(),
|
||||
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["LSPService"]
|
||||
@@ -1,196 +0,0 @@
|
||||
"""Minimal LSP JSON-RPC 2.0 framer over async streams.
|
||||
|
||||
LSP wire format:
|
||||
|
||||
Content-Length: <bytes>\\r\\n
|
||||
\\r\\n
|
||||
<utf-8 JSON body>
|
||||
|
||||
The body is a JSON-RPC 2.0 envelope: request, response, or notification.
|
||||
|
||||
This module replaces what ``vscode-jsonrpc/node`` would do in a
|
||||
TypeScript implementation. We keep it deliberately small — just the
|
||||
framer + envelope helpers — so :class:`agent.lsp.client.LSPClient` can
|
||||
focus on protocol semantics.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger("agent.lsp.protocol")
|
||||
|
||||
# LSP error codes we care about. Full list in
|
||||
# https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#errorCodes
|
||||
ERROR_CONTENT_MODIFIED = -32801
|
||||
ERROR_REQUEST_CANCELLED = -32800
|
||||
ERROR_METHOD_NOT_FOUND = -32601
|
||||
|
||||
|
||||
class LSPProtocolError(Exception):
|
||||
"""Raised when the wire protocol is violated.
|
||||
|
||||
Distinct from :class:`LSPRequestError` which represents a server
|
||||
returning a JSON-RPC error response — that's protocol-conformant.
|
||||
This exception means the framing or envelope itself is broken.
|
||||
"""
|
||||
|
||||
|
||||
class LSPRequestError(Exception):
|
||||
"""Raised when an LSP request returns an error response.
|
||||
|
||||
Carries the JSON-RPC ``code``, ``message``, and optional ``data``.
|
||||
"""
|
||||
|
||||
def __init__(self, code: int, message: str, data: Any = None) -> None:
|
||||
super().__init__(f"LSP error {code}: {message}")
|
||||
self.code = code
|
||||
self.message = message
|
||||
self.data = data
|
||||
|
||||
|
||||
def encode_message(obj: dict) -> bytes:
|
||||
"""Encode a JSON-RPC envelope as a Content-Length framed byte string.
|
||||
|
||||
The body is encoded as compact UTF-8 JSON (no spaces between
|
||||
separators) — matches what ``vscode-jsonrpc`` emits and keeps the
|
||||
Content-Length count exact.
|
||||
"""
|
||||
body = json.dumps(obj, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
||||
header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii")
|
||||
return header + body
|
||||
|
||||
|
||||
async def read_message(reader: asyncio.StreamReader) -> Optional[dict]:
|
||||
"""Read one Content-Length framed JSON-RPC message from the stream.
|
||||
|
||||
Returns ``None`` on clean EOF (server closed stdout cleanly between
|
||||
messages — typical shutdown). Raises :class:`LSPProtocolError` on
|
||||
malformed framing.
|
||||
|
||||
The reader is advanced to just past the JSON body on success.
|
||||
"""
|
||||
headers: dict = {}
|
||||
header_bytes = 0
|
||||
while True:
|
||||
try:
|
||||
line = await reader.readuntil(b"\r\n")
|
||||
except asyncio.IncompleteReadError as e:
|
||||
# EOF while reading headers. If we hadn't started a header
|
||||
# block, treat as clean EOF; otherwise the framing is bad.
|
||||
if not e.partial and not headers:
|
||||
return None
|
||||
raise LSPProtocolError(
|
||||
f"unexpected EOF while reading LSP headers (partial={e.partial!r})"
|
||||
) from e
|
||||
# Defensive cap against a server streaming headers without ever
|
||||
# emitting CRLF-CRLF. Caps total header bytes at 8 KiB — a
|
||||
# well-behaved server fits in well under 200 bytes.
|
||||
header_bytes += len(line)
|
||||
if header_bytes > 8192:
|
||||
raise LSPProtocolError(
|
||||
f"LSP header block exceeded 8 KiB without terminator"
|
||||
)
|
||||
line = line[:-2] # strip CRLF
|
||||
if not line:
|
||||
break # blank line ends header block
|
||||
try:
|
||||
key, _, value = line.decode("ascii").partition(":")
|
||||
except UnicodeDecodeError as e:
|
||||
raise LSPProtocolError(f"non-ASCII LSP header: {line!r}") from e
|
||||
if not key:
|
||||
raise LSPProtocolError(f"malformed LSP header line: {line!r}")
|
||||
headers[key.strip().lower()] = value.strip()
|
||||
|
||||
cl = headers.get("content-length")
|
||||
if cl is None:
|
||||
raise LSPProtocolError(f"LSP message missing Content-Length: {headers!r}")
|
||||
try:
|
||||
n = int(cl)
|
||||
except ValueError as e:
|
||||
raise LSPProtocolError(f"non-integer Content-Length: {cl!r}") from e
|
||||
if n < 0 or n > 64 * 1024 * 1024: # 64 MiB sanity cap
|
||||
raise LSPProtocolError(f"unreasonable Content-Length: {n}")
|
||||
|
||||
try:
|
||||
body = await reader.readexactly(n)
|
||||
except asyncio.IncompleteReadError as e:
|
||||
raise LSPProtocolError(
|
||||
f"truncated LSP body: expected {n} bytes, got {len(e.partial)}"
|
||||
) from e
|
||||
|
||||
try:
|
||||
return json.loads(body.decode("utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise LSPProtocolError(f"invalid JSON in LSP body: {e}") from e
|
||||
except UnicodeDecodeError as e:
|
||||
raise LSPProtocolError(f"non-UTF-8 LSP body: {e}") from e
|
||||
|
||||
|
||||
def make_request(req_id: int, method: str, params: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 request envelope."""
|
||||
msg: dict = {"jsonrpc": "2.0", "id": req_id, "method": method}
|
||||
if params is not None:
|
||||
msg["params"] = params
|
||||
return msg
|
||||
|
||||
|
||||
def make_notification(method: str, params: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 notification envelope (no ``id``)."""
|
||||
msg: dict = {"jsonrpc": "2.0", "method": method}
|
||||
if params is not None:
|
||||
msg["params"] = params
|
||||
return msg
|
||||
|
||||
|
||||
def make_response(req_id: Any, result: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 success response envelope."""
|
||||
return {"jsonrpc": "2.0", "id": req_id, "result": result}
|
||||
|
||||
|
||||
def make_error_response(req_id: Any, code: int, message: str, data: Any = None) -> dict:
|
||||
"""Build a JSON-RPC 2.0 error response envelope."""
|
||||
err: dict = {"code": code, "message": message}
|
||||
if data is not None:
|
||||
err["data"] = data
|
||||
return {"jsonrpc": "2.0", "id": req_id, "error": err}
|
||||
|
||||
|
||||
def classify_message(msg: dict) -> Tuple[str, Any]:
|
||||
"""Return ``(kind, key)`` where kind is one of ``request``,
|
||||
``response``, ``notification``, ``invalid``.
|
||||
|
||||
The key is the request id for request/response, the method name
|
||||
for notifications, and ``None`` for invalid messages.
|
||||
"""
|
||||
if not isinstance(msg, dict):
|
||||
return "invalid", None
|
||||
if msg.get("jsonrpc") != "2.0":
|
||||
return "invalid", None
|
||||
has_id = "id" in msg
|
||||
has_method = "method" in msg
|
||||
if has_id and has_method:
|
||||
return "request", msg["id"]
|
||||
if has_id and ("result" in msg or "error" in msg):
|
||||
return "response", msg["id"]
|
||||
if has_method and not has_id:
|
||||
return "notification", msg["method"]
|
||||
return "invalid", None
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ERROR_CONTENT_MODIFIED",
|
||||
"ERROR_REQUEST_CANCELLED",
|
||||
"ERROR_METHOD_NOT_FOUND",
|
||||
"LSPProtocolError",
|
||||
"LSPRequestError",
|
||||
"encode_message",
|
||||
"read_message",
|
||||
"make_request",
|
||||
"make_notification",
|
||||
"make_response",
|
||||
"make_error_response",
|
||||
"classify_message",
|
||||
]
|
||||
@@ -1,78 +0,0 @@
|
||||
"""Format LSP diagnostics for inclusion in tool output.
|
||||
|
||||
The model sees a compact, severity-filtered, line-bounded summary of
|
||||
diagnostics introduced by the latest edit. Format matches what
|
||||
OpenCode's ``lsp/diagnostic.ts`` and Claude Code's
|
||||
``formatDiagnosticsSummary`` produce — ``<diagnostics>`` blocks with
|
||||
1-indexed line/column, capped at ``MAX_PER_FILE`` errors.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Severity-1 only by default — warnings/info/hints would flood the
|
||||
# agent. Lift this in config under ``lsp.severities`` if needed.
|
||||
SEVERITY_NAMES = {1: "ERROR", 2: "WARN", 3: "INFO", 4: "HINT"}
|
||||
DEFAULT_SEVERITIES = frozenset({1}) # ERROR only
|
||||
|
||||
MAX_PER_FILE = 20
|
||||
MAX_TOTAL_CHARS = 4000
|
||||
|
||||
|
||||
def format_diagnostic(d: Dict[str, Any]) -> str:
|
||||
"""One-line representation of a single diagnostic."""
|
||||
sev = SEVERITY_NAMES.get(d.get("severity") or 1, "ERROR")
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
line = int(start.get("line", 0)) + 1
|
||||
col = int(start.get("character", 0)) + 1
|
||||
msg = str(d.get("message") or "").rstrip()
|
||||
code = d.get("code")
|
||||
code_part = f" [{code}]" if code not in (None, "") else ""
|
||||
source = d.get("source")
|
||||
source_part = f" ({source})" if source else ""
|
||||
return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"
|
||||
|
||||
|
||||
def report_for_file(
|
||||
file_path: str,
|
||||
diagnostics: List[Dict[str, Any]],
|
||||
*,
|
||||
severities: frozenset = DEFAULT_SEVERITIES,
|
||||
max_per_file: int = MAX_PER_FILE,
|
||||
) -> str:
|
||||
"""Build a ``<diagnostics file=...>`` block for one file.
|
||||
|
||||
Returns an empty string when no diagnostics pass the severity
|
||||
filter, so callers can do ``if block:`` to skip empty cases.
|
||||
"""
|
||||
if not diagnostics:
|
||||
return ""
|
||||
filtered = [d for d in diagnostics if (d.get("severity") or 1) in severities]
|
||||
if not filtered:
|
||||
return ""
|
||||
limited = filtered[:max_per_file]
|
||||
extra = len(filtered) - len(limited)
|
||||
lines = [format_diagnostic(d) for d in limited]
|
||||
body = "\n".join(lines)
|
||||
if extra > 0:
|
||||
body += f"\n... and {extra} more"
|
||||
return f"<diagnostics file=\"{file_path}\">\n{body}\n</diagnostics>"
|
||||
|
||||
|
||||
def truncate(s: str, *, limit: int = MAX_TOTAL_CHARS) -> str:
|
||||
"""Hard-cap a formatted summary string."""
|
||||
if len(s) <= limit:
|
||||
return s
|
||||
marker = "\n…[truncated]"
|
||||
return s[: limit - len(marker)] + marker
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SEVERITY_NAMES",
|
||||
"DEFAULT_SEVERITIES",
|
||||
"MAX_PER_FILE",
|
||||
"format_diagnostic",
|
||||
"report_for_file",
|
||||
"truncate",
|
||||
]
|
||||
1040
agent/lsp/servers.py
1040
agent/lsp/servers.py
File diff suppressed because it is too large
Load Diff
@@ -1,223 +0,0 @@
|
||||
"""Workspace and project-root resolution for LSP.
|
||||
|
||||
Two concerns live here:
|
||||
|
||||
1. **Workspace gate** — the upper-level "is this directory a project?"
|
||||
check. Hermes only runs LSP when the cwd (or the file being edited)
|
||||
sits inside a git worktree. Files outside any git root never
|
||||
trigger LSP, even if a server is configured. This keeps Telegram
|
||||
gateway users on user-home cwd's from spawning daemons.
|
||||
|
||||
2. **NearestRoot** — the per-server project-root walk. Each language
|
||||
server cares about a different marker (``pyproject.toml`` for
|
||||
Python, ``Cargo.toml`` for Rust, ``go.mod`` for Go, etc.) and
|
||||
wants the directory containing that marker. ``nearest_root()``
|
||||
walks up from a starting path looking for any of a list of marker
|
||||
files, optionally bailing if an exclude marker shows up first.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger("agent.lsp.workspace")
|
||||
|
||||
# Cache: cwd → (worktree_root, is_git) so repeated calls don't re-stat.
|
||||
# Cleared on shutdown. Keyed by absolute resolved path so symlink
|
||||
# folds collapse to one entry.
|
||||
_workspace_cache: dict = {}
|
||||
|
||||
|
||||
def normalize_path(path: str) -> str:
|
||||
"""Normalize a path for use as a stable map key.
|
||||
|
||||
Resolves ``~``, makes absolute, and collapses ``.``/``..``. We do
|
||||
NOT resolve symlinks here — symlink stability matters for some
|
||||
LSP servers (rust-analyzer cares about Cargo workspace identity)
|
||||
and we want the canonical path the user typed when possible.
|
||||
"""
|
||||
return os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
|
||||
def find_git_worktree(start: str) -> Optional[str]:
|
||||
"""Walk up from ``start`` looking for a ``.git`` entry (file or dir).
|
||||
|
||||
Returns the directory containing ``.git``, or ``None`` if no git
|
||||
root is found before hitting the filesystem root.
|
||||
|
||||
A ``.git`` *file* (not directory) means we're inside a git
|
||||
worktree set up via ``git worktree add`` — both forms count.
|
||||
"""
|
||||
try:
|
||||
start_path = Path(normalize_path(start))
|
||||
if start_path.is_file():
|
||||
start_path = start_path.parent
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
# Pathological input (loop in symlinks, encoding error, etc.) —
|
||||
# bail out rather than crash the lint hook.
|
||||
return None
|
||||
|
||||
# Cache check
|
||||
cached = _workspace_cache.get(str(start_path))
|
||||
if cached is not None:
|
||||
root, _is_git = cached
|
||||
return root
|
||||
|
||||
cur = start_path
|
||||
# Defensive cap: the deepest reasonable monorepo is well under 64
|
||||
# levels. Caps the walk so a pathological cwd or a symlink cycle
|
||||
# we somehow traverse can't keep us looping.
|
||||
for _ in range(64):
|
||||
git_marker = cur / ".git"
|
||||
try:
|
||||
if git_marker.exists():
|
||||
resolved = str(cur)
|
||||
_workspace_cache[str(start_path)] = (resolved, True)
|
||||
return resolved
|
||||
except OSError:
|
||||
# Permission error on a parent dir — bail out cleanly.
|
||||
break
|
||||
parent = cur.parent
|
||||
if parent == cur:
|
||||
break
|
||||
cur = parent
|
||||
|
||||
_workspace_cache[str(start_path)] = (None, False)
|
||||
return None
|
||||
|
||||
|
||||
def is_inside_workspace(path: str, workspace_root: str) -> bool:
|
||||
"""Return True iff ``path`` is inside (or equal to) ``workspace_root``.
|
||||
|
||||
Uses absolute paths but does not resolve symlinks — a file accessed
|
||||
via a symlink that points outside the workspace still counts as
|
||||
outside. This is the conservative interpretation; matches LSP
|
||||
behaviour where servers reject didOpen for unrelated files.
|
||||
"""
|
||||
p = normalize_path(path)
|
||||
root = normalize_path(workspace_root)
|
||||
if p == root:
|
||||
return True
|
||||
# Use os.path.commonpath to handle case-insensitive filesystems
|
||||
# correctly on macOS/Windows.
|
||||
try:
|
||||
common = os.path.commonpath([p, root])
|
||||
except ValueError:
|
||||
# Different drives on Windows.
|
||||
return False
|
||||
return common == root
|
||||
|
||||
|
||||
def nearest_root(
|
||||
start: str,
|
||||
markers: Iterable[str],
|
||||
*,
|
||||
excludes: Optional[Iterable[str]] = None,
|
||||
ceiling: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Walk up from ``start`` looking for any of the given marker files.
|
||||
|
||||
Returns the **directory containing** the first matched marker, or
|
||||
``None`` if no marker is found before hitting ``ceiling`` (or the
|
||||
filesystem root if no ceiling).
|
||||
|
||||
If ``excludes`` is provided and an exclude marker matches *first*
|
||||
in the upward walk, returns ``None`` — the server is gated off
|
||||
for that file. Mirrors OpenCode's NearestRoot exclude semantics
|
||||
(e.g. typescript skips deno projects when ``deno.json`` is found
|
||||
before ``package.json``).
|
||||
"""
|
||||
start_path = Path(normalize_path(start))
|
||||
try:
|
||||
if start_path.is_file():
|
||||
start_path = start_path.parent
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
return None
|
||||
ceiling_path = Path(normalize_path(ceiling)) if ceiling else None
|
||||
|
||||
markers_list = list(markers)
|
||||
excludes_list = list(excludes) if excludes else []
|
||||
|
||||
cur = start_path
|
||||
# Defensive cap matching ``find_git_worktree``. Bounded walk
|
||||
# protects against pathological inputs even though the
|
||||
# parent-equality stop normally terminates within ~10 steps.
|
||||
for _ in range(64):
|
||||
# Check excludes first — if an exclude is found at this level,
|
||||
# the server is gated off for this file.
|
||||
for exc in excludes_list:
|
||||
try:
|
||||
if (cur / exc).exists():
|
||||
return None
|
||||
except OSError:
|
||||
continue
|
||||
# Then check markers.
|
||||
for marker in markers_list:
|
||||
try:
|
||||
if (cur / marker).exists():
|
||||
return str(cur)
|
||||
except OSError:
|
||||
continue
|
||||
# Stop conditions.
|
||||
if ceiling_path is not None and cur == ceiling_path:
|
||||
return None
|
||||
parent = cur.parent
|
||||
if parent == cur:
|
||||
return None
|
||||
cur = parent
|
||||
return None
|
||||
|
||||
|
||||
def resolve_workspace_for_file(
|
||||
file_path: str,
|
||||
*,
|
||||
cwd: Optional[str] = None,
|
||||
) -> Tuple[Optional[str], bool]:
|
||||
"""Resolve the workspace root for a file.
|
||||
|
||||
Returns ``(workspace_root, gated_in)`` where ``gated_in`` is True
|
||||
iff LSP should run for this file at all. Currently the gate is
|
||||
"file is inside a git worktree found by walking up from cwd OR
|
||||
from the file itself".
|
||||
|
||||
The cwd path takes precedence — if the agent was launched in a
|
||||
git project, that worktree is the workspace, and any edit inside
|
||||
it (regardless of where the file lives) is in-scope. If the cwd
|
||||
isn't in a git worktree, we try the file's own location as a
|
||||
fallback.
|
||||
|
||||
Returns ``(None, False)`` when neither path is in a git worktree.
|
||||
"""
|
||||
cwd = cwd or os.getcwd()
|
||||
cwd_root = find_git_worktree(cwd)
|
||||
if cwd_root is not None:
|
||||
if is_inside_workspace(file_path, cwd_root):
|
||||
return cwd_root, True
|
||||
# File is outside the cwd's worktree — try the file's own
|
||||
# location as a secondary anchor. Useful for monorepos where
|
||||
# the user opens an unrelated checkout.
|
||||
file_root = find_git_worktree(file_path)
|
||||
if file_root is not None:
|
||||
return file_root, True
|
||||
return None, False
|
||||
|
||||
|
||||
def clear_cache() -> None:
|
||||
"""Clear the workspace-resolution cache.
|
||||
|
||||
Called on service shutdown so a subsequent re-init doesn't pick
|
||||
up stale results from a previous session.
|
||||
"""
|
||||
_workspace_cache.clear()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"find_git_worktree",
|
||||
"is_inside_workspace",
|
||||
"nearest_root",
|
||||
"normalize_path",
|
||||
"resolve_workspace_for_file",
|
||||
"clear_cache",
|
||||
]
|
||||
@@ -1,309 +0,0 @@
|
||||
"""CJK/wide-character-aware re-alignment of model-emitted markdown tables.
|
||||
|
||||
Models pad markdown tables assuming each character occupies one terminal
|
||||
cell. CJK glyphs and most emoji render as two cells, so the model's
|
||||
spacing collapses into drift the moment a table reaches a real terminal —
|
||||
header pipes line up, every body row drifts right by N cells per CJK
|
||||
char.
|
||||
|
||||
This module rebuilds row padding using ``wcwidth.wcswidth`` (display
|
||||
columns), preserving the table's pipes and dashes so it still reads as a
|
||||
plain-text table in ``strip`` / unrendered display modes. Standard Rich
|
||||
markdown rendering already aligns CJK correctly inside a wide enough
|
||||
panel; this helper is for the paths that print the model's text more or
|
||||
less verbatim.
|
||||
|
||||
The helper is deliberately conservative:
|
||||
|
||||
* Only contiguous ``| ... |`` blocks with a divider line are rewritten.
|
||||
* Anything that does not look like a table is passed through unchanged.
|
||||
* Single-line / mid-stream fragments are left alone — callers buffer
|
||||
table rows and flush them once the block is complete.
|
||||
|
||||
There is a small, intentional caveat: ``wcwidth`` returns ``-1`` for some
|
||||
emoji-with-variation-selector sequences (e.g. ``⚠️``); we clamp those to
|
||||
0 so they do not corrupt the column width math. The 1-cell drift on
|
||||
those specific glyphs is preferable to silently widening every table
|
||||
that contains one.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
from wcwidth import wcswidth
|
||||
|
||||
__all__ = [
|
||||
"is_table_divider",
|
||||
"looks_like_table_row",
|
||||
"realign_markdown_tables",
|
||||
"split_table_row",
|
||||
]
|
||||
|
||||
|
||||
_DIVIDER_CELL_RE = re.compile(r"^\s*:?-{3,}:?\s*$")
|
||||
_MIN_COL_WIDTH = 3 # matches the divider's minimum dash run.
|
||||
|
||||
|
||||
def _disp_width(s: str) -> int:
|
||||
"""``wcswidth`` clamped to a non-negative integer.
|
||||
|
||||
``wcswidth`` returns ``-1`` when it encounters a control char or an
|
||||
unknown sequence; treat those as zero-width rather than letting a
|
||||
negative number flow into ``max`` and break the column-width math.
|
||||
"""
|
||||
|
||||
w = wcswidth(s)
|
||||
return w if w > 0 else 0
|
||||
|
||||
|
||||
def _pad_to_width(s: str, target: int) -> str:
|
||||
return s + " " * max(0, target - _disp_width(s))
|
||||
|
||||
|
||||
def split_table_row(row: str) -> List[str]:
|
||||
"""Split ``| a | b | c |`` into ``["a", "b", "c"]`` with trims."""
|
||||
|
||||
s = row.strip()
|
||||
if s.startswith("|"):
|
||||
s = s[1:]
|
||||
if s.endswith("|"):
|
||||
s = s[:-1]
|
||||
return [c.strip() for c in s.split("|")]
|
||||
|
||||
|
||||
def is_table_divider(row: str) -> bool:
|
||||
"""True when ``row`` is a markdown table separator line."""
|
||||
|
||||
cells = split_table_row(row)
|
||||
return len(cells) > 1 and all(_DIVIDER_CELL_RE.match(c) for c in cells)
|
||||
|
||||
|
||||
def looks_like_table_row(row: str) -> bool:
|
||||
"""True when ``row`` could plausibly be a markdown table row.
|
||||
|
||||
Used by streaming callers to decide whether to buffer an in-flight
|
||||
line. We are intentionally permissive here — the realigner itself
|
||||
only rewrites blocks that are accompanied by a divider, so a false
|
||||
positive here at most delays the print of one line.
|
||||
"""
|
||||
|
||||
if "|" not in row:
|
||||
return False
|
||||
stripped = row.strip()
|
||||
if not stripped:
|
||||
return False
|
||||
# A leading pipe is the strongest signal; without it we still allow
|
||||
# rows with at least two pipes so models that omit the leading pipe
|
||||
# don't slip past us.
|
||||
if stripped.startswith("|"):
|
||||
return True
|
||||
return stripped.count("|") >= 2
|
||||
|
||||
|
||||
def _render_block(rows: List[List[str]], available_width: int | None = None) -> List[str]:
|
||||
"""Render ``rows`` (header + body, divider implied) at uniform widths.
|
||||
|
||||
If ``available_width`` is given and the rebuilt horizontal table
|
||||
would exceed it, fall back to a vertical key-value rendering so
|
||||
rows do not soft-wrap mid-cell — terminal soft-wrap destroys
|
||||
column alignment visually even when the underlying bytes are
|
||||
perfectly padded, which is exactly the "tables look broken"
|
||||
user report this code path is meant to address.
|
||||
"""
|
||||
|
||||
ncols = max(len(r) for r in rows)
|
||||
rows = [r + [""] * (ncols - len(r)) for r in rows]
|
||||
|
||||
widths = [
|
||||
max(_MIN_COL_WIDTH, *(_disp_width(r[c]) for r in rows))
|
||||
for c in range(ncols)
|
||||
]
|
||||
|
||||
# Total horizontal width for the rendered row:
|
||||
# `| ` + cell + ` ` for each column, plus the final closing `|`.
|
||||
horizontal_width = sum(widths) + 3 * ncols + 1
|
||||
|
||||
if available_width is not None and horizontal_width > max(available_width, 20):
|
||||
return _render_vertical(rows, ncols, available_width)
|
||||
|
||||
def _row(cells: List[str]) -> str:
|
||||
return (
|
||||
"| "
|
||||
+ " | ".join(_pad_to_width(c, widths[k]) for k, c in enumerate(cells))
|
||||
+ " |"
|
||||
)
|
||||
|
||||
out = [_row(rows[0])]
|
||||
out.append("|" + "|".join("-" * (w + 2) for w in widths) + "|")
|
||||
for r in rows[1:]:
|
||||
out.append(_row(r))
|
||||
return out
|
||||
|
||||
|
||||
def _wrap_to_width(text: str, width: int) -> List[str]:
|
||||
"""Soft-wrap ``text`` at word boundaries to fit ``width`` display cells.
|
||||
|
||||
Falls back to hard-breaking the longest word if a single token is
|
||||
wider than ``width``. Empty input yields a single empty string so
|
||||
the caller's row count stays predictable.
|
||||
"""
|
||||
|
||||
if width <= 0 or not text:
|
||||
return [text]
|
||||
|
||||
words = text.split()
|
||||
if not words:
|
||||
return [""]
|
||||
|
||||
lines: List[str] = []
|
||||
current = ""
|
||||
current_w = 0
|
||||
|
||||
def _hard_break(word: str, w: int) -> List[str]:
|
||||
out: List[str] = []
|
||||
buf = ""
|
||||
bw = 0
|
||||
for ch in word:
|
||||
cw = _disp_width(ch) or 1
|
||||
if bw + cw > w and buf:
|
||||
out.append(buf)
|
||||
buf = ch
|
||||
bw = cw
|
||||
else:
|
||||
buf += ch
|
||||
bw += cw
|
||||
if buf:
|
||||
out.append(buf)
|
||||
return out
|
||||
|
||||
for word in words:
|
||||
ww = _disp_width(word)
|
||||
if not current:
|
||||
if ww <= width:
|
||||
current = word
|
||||
current_w = ww
|
||||
else:
|
||||
pieces = _hard_break(word, width)
|
||||
lines.extend(pieces[:-1])
|
||||
current = pieces[-1] if pieces else ""
|
||||
current_w = _disp_width(current)
|
||||
continue
|
||||
if current_w + 1 + ww <= width:
|
||||
current += " " + word
|
||||
current_w += 1 + ww
|
||||
else:
|
||||
lines.append(current)
|
||||
if ww <= width:
|
||||
current = word
|
||||
current_w = ww
|
||||
else:
|
||||
pieces = _hard_break(word, width)
|
||||
lines.extend(pieces[:-1])
|
||||
current = pieces[-1] if pieces else ""
|
||||
current_w = _disp_width(current)
|
||||
if current:
|
||||
lines.append(current)
|
||||
return lines or [""]
|
||||
|
||||
|
||||
def _render_vertical(
|
||||
rows: List[List[str]], ncols: int, available_width: int
|
||||
) -> List[str]:
|
||||
"""Render a too-wide table as vertical ``Header: value`` rows.
|
||||
|
||||
Mirrors Claude Code's narrow-terminal fallback in
|
||||
``MarkdownTable.tsx``: each body row becomes a small block of
|
||||
``Header: cell-value`` lines (continuation lines indented two
|
||||
spaces) separated by a thin ``─`` divider between rows. Keeps
|
||||
every line narrower than ``available_width`` so the terminal does
|
||||
not soft-wrap mid-cell.
|
||||
"""
|
||||
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
headers = rows[0] + [""] * (ncols - len(rows[0]))
|
||||
body = rows[1:]
|
||||
|
||||
labels = [h or f"Column {i + 1}" for i, h in enumerate(headers)]
|
||||
|
||||
sep_width = max(20, min(40, available_width - 2)) if available_width else 30
|
||||
separator = "─" * sep_width
|
||||
indent = " "
|
||||
indent_w = _disp_width(indent)
|
||||
|
||||
out: List[str] = []
|
||||
for ri, row in enumerate(body):
|
||||
if ri > 0:
|
||||
out.append(separator)
|
||||
for ci in range(ncols):
|
||||
label = labels[ci]
|
||||
value = row[ci] if ci < len(row) else ""
|
||||
label_w = _disp_width(label)
|
||||
first_budget = max(10, available_width - label_w - 2)
|
||||
cont_budget = max(10, available_width - indent_w)
|
||||
if not value:
|
||||
out.append(f"{label}:")
|
||||
continue
|
||||
wrapped = _wrap_to_width(value, first_budget)
|
||||
out.append(f"{label}: {wrapped[0]}")
|
||||
if len(wrapped) > 1:
|
||||
# Re-flow continuation text at the wider continuation
|
||||
# budget — words split across the narrower first-line
|
||||
# budget should re-pack greedily for the rest.
|
||||
cont_text = " ".join(wrapped[1:])
|
||||
for cl in _wrap_to_width(cont_text, cont_budget):
|
||||
if cl.strip():
|
||||
out.append(f"{indent}{cl}")
|
||||
return out
|
||||
|
||||
|
||||
def realign_markdown_tables(text: str, available_width: int | None = None) -> str:
|
||||
"""Rewrite every ``| ... |`` + divider block with wcwidth-aware padding.
|
||||
|
||||
Lines that are not part of a recognised table are returned verbatim,
|
||||
so this is safe to apply to arbitrary assistant prose.
|
||||
|
||||
If ``available_width`` is given (terminal cells available for the
|
||||
rendered table), tables wider than that are rendered as vertical
|
||||
key-value pairs instead of a horizontal pipe-bordered grid. This
|
||||
avoids the terminal soft-wrapping mid-cell, which destroys column
|
||||
alignment visually even when the bytes are perfectly padded.
|
||||
"""
|
||||
|
||||
if "|" not in text:
|
||||
return text
|
||||
|
||||
lines = text.split("\n")
|
||||
out: List[str] = []
|
||||
i = 0
|
||||
n = len(lines)
|
||||
|
||||
while i < n:
|
||||
line = lines[i]
|
||||
# A table starts with a header row whose next line is a divider.
|
||||
if (
|
||||
"|" in line
|
||||
and i + 1 < n
|
||||
and is_table_divider(lines[i + 1])
|
||||
):
|
||||
header = split_table_row(line)
|
||||
body: List[List[str]] = []
|
||||
j = i + 2
|
||||
while j < n and "|" in lines[j] and lines[j].strip():
|
||||
if is_table_divider(lines[j]):
|
||||
j += 1
|
||||
continue
|
||||
body.append(split_table_row(lines[j]))
|
||||
j += 1
|
||||
|
||||
if any(c for c in header) or body:
|
||||
out.extend(_render_block([header] + body, available_width))
|
||||
i = j
|
||||
continue
|
||||
out.append(line)
|
||||
i += 1
|
||||
|
||||
return "\n".join(out)
|
||||
@@ -1,231 +0,0 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
|
||||
|
||||
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
|
||||
tool calling. Requests that violate it fail with HTTP 400:
|
||||
|
||||
tools.function.parameters is not a valid moonshot flavored json schema,
|
||||
details: <...>
|
||||
|
||||
Known rejection modes documented at
|
||||
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
|
||||
and MoonshotAI/kimi-cli#1595:
|
||||
|
||||
1. Every property schema must carry a ``type``. Standard JSON Schema allows
|
||||
type to be omitted (the value is then unconstrained); Moonshot refuses.
|
||||
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
|
||||
the parent. Presence of both causes "type should be defined in anyOf
|
||||
items instead of the parent schema".
|
||||
|
||||
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
|
||||
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
|
||||
applies at MCP registration time for all providers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Keys whose values are maps of name → schema (not schemas themselves).
|
||||
# When we recurse, we walk the values of these maps as schemas, but we do
|
||||
# NOT apply the missing-type repair to the map itself.
|
||||
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
|
||||
|
||||
# Keys whose values are lists of schemas.
|
||||
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
|
||||
|
||||
# Keys whose values are a single nested schema.
|
||||
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
|
||||
|
||||
|
||||
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
"""Recursively apply Moonshot repairs to a schema node.
|
||||
|
||||
``is_schema=True`` means this dict is a JSON Schema node and gets the
|
||||
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
|
||||
it's a container map (e.g. the value of ``properties``) and we only
|
||||
recurse into its values.
|
||||
"""
|
||||
if isinstance(node, list):
|
||||
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
|
||||
# every element is itself a schema.
|
||||
return [_repair_schema(item, is_schema=True) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
# Walk the dict, deciding per-key whether recursion is into a schema
|
||||
# node, a container map, or a scalar.
|
||||
repaired: Dict[str, Any] = {}
|
||||
for key, value in node.items():
|
||||
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
|
||||
# Map of name → schema. Don't treat the map itself as a schema
|
||||
# (it has no type / properties of its own), but each value is.
|
||||
repaired[key] = {
|
||||
sub_key: _repair_schema(sub_val, is_schema=True)
|
||||
for sub_key, sub_val in value.items()
|
||||
}
|
||||
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
|
||||
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
|
||||
elif key in _SCHEMA_NODE_KEYS:
|
||||
# items / not / additionalProperties: single nested schema.
|
||||
# additionalProperties can also be a bool — leave those alone.
|
||||
if isinstance(value, dict):
|
||||
repaired[key] = _repair_schema(value, is_schema=True)
|
||||
else:
|
||||
repaired[key] = value
|
||||
else:
|
||||
# Scalars (description, title, format, enum values, etc.) pass through.
|
||||
repaired[key] = value
|
||||
|
||||
if not is_schema:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
# Additionally, Moonshot rejects null-type branches inside anyOf
|
||||
# (enum value (<nil>) does not match any type in [string]).
|
||||
# Collapse the anyOf to the first non-null branch and infer its type.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
non_null = [b for b in repaired["anyOf"]
|
||||
if isinstance(b, dict) and b.get("type") != "null"]
|
||||
if non_null and len(non_null) < len(repaired["anyOf"]):
|
||||
# Drop the anyOf wrapper — keep only the non-null branch.
|
||||
# If there's a single non-null branch, promote it and fall
|
||||
# through to Rules 1/3 so nullable/enum cleanup still applies
|
||||
# to the merged node.
|
||||
if len(non_null) == 1:
|
||||
merge = {k: v for k, v in repaired.items() if k != "anyOf"}
|
||||
merge.update(non_null[0])
|
||||
repaired = merge
|
||||
else:
|
||||
repaired["anyOf"] = non_null
|
||||
return repaired
|
||||
else:
|
||||
# Nothing to collapse — parent type stripped, children already
|
||||
# repaired by the recursive walk above.
|
||||
return repaired
|
||||
|
||||
# Moonshot also rejects non-standard keywords like ``nullable`` on
|
||||
# parameter schemas — strip it.
|
||||
repaired.pop("nullable", None)
|
||||
|
||||
# Rule 1: property schemas without type need one. $ref nodes are exempt
|
||||
# — their type comes from the referenced definition.
|
||||
# Fill missing type BEFORE Rule 3 so enum cleanup can check the type.
|
||||
if "$ref" not in repaired:
|
||||
repaired = _fill_missing_type(repaired)
|
||||
|
||||
# Rule 3: Moonshot rejects null/empty-string values inside enum arrays
|
||||
# when the parent type is a scalar (string, integer, etc.). The error:
|
||||
# "enum value (<nil>) does not match any type in [string]"
|
||||
# Strip null and empty-string from enum values, and if the enum becomes
|
||||
# empty, drop it entirely.
|
||||
if "enum" in repaired and isinstance(repaired["enum"], list):
|
||||
node_type = repaired.get("type")
|
||||
if node_type in {"string", "integer", "number", "boolean"}:
|
||||
cleaned = [v for v in repaired["enum"]
|
||||
if v is not None and v != ""]
|
||||
if cleaned:
|
||||
repaired["enum"] = cleaned
|
||||
else:
|
||||
repaired.pop("enum")
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Infer a reasonable ``type`` if this schema node has none."""
|
||||
if "type" in node and node["type"] not in {None, ""}:
|
||||
return node
|
||||
|
||||
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
|
||||
# → type of first enum value, else fall back to ``string`` (safest scalar).
|
||||
if "properties" in node or "required" in node or "additionalProperties" in node:
|
||||
inferred = "object"
|
||||
elif "items" in node or "prefixItems" in node:
|
||||
inferred = "array"
|
||||
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
|
||||
sample = node["enum"][0]
|
||||
if isinstance(sample, bool):
|
||||
inferred = "boolean"
|
||||
elif isinstance(sample, int):
|
||||
inferred = "integer"
|
||||
elif isinstance(sample, float):
|
||||
inferred = "number"
|
||||
else:
|
||||
inferred = "string"
|
||||
else:
|
||||
inferred = "string"
|
||||
|
||||
return {**node, "type": inferred}
|
||||
|
||||
|
||||
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a Moonshot-compatible object schema.
|
||||
|
||||
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
|
||||
applied. Input is not mutated.
|
||||
"""
|
||||
if not isinstance(parameters, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
|
||||
if not isinstance(repaired, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
# Top-level must be an object schema
|
||||
if repaired.get("type") != "object":
|
||||
repaired["type"] = "object"
|
||||
if "properties" not in repaired:
|
||||
repaired["properties"] = {}
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
|
||||
if not tools:
|
||||
return tools
|
||||
|
||||
sanitized: List[Dict[str, Any]] = []
|
||||
any_change = False
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
fn = tool.get("function")
|
||||
if not isinstance(fn, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
params = fn.get("parameters")
|
||||
repaired = sanitize_moonshot_tool_parameters(params)
|
||||
if repaired is not params:
|
||||
any_change = True
|
||||
new_fn = {**fn, "parameters": repaired}
|
||||
sanitized.append({**tool, "function": new_fn})
|
||||
else:
|
||||
sanitized.append(tool)
|
||||
|
||||
return sanitized if any_change else tools
|
||||
|
||||
|
||||
def is_moonshot_model(model: str | None) -> bool:
|
||||
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
|
||||
|
||||
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
|
||||
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
|
||||
Detection by model name covers Nous / OpenRouter / other aggregators that
|
||||
route to Moonshot's inference, where the base URL is the aggregator's, not
|
||||
``api.moonshot.ai``.
|
||||
"""
|
||||
if not model:
|
||||
return False
|
||||
bare = model.strip().lower()
|
||||
# Last path segment (covers aggregator-prefixed slugs)
|
||||
tail = bare.rsplit("/", 1)[-1]
|
||||
if tail.startswith("kimi-") or tail == "kimi":
|
||||
return True
|
||||
# Vendor-prefixed forms commonly used on aggregators
|
||||
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
|
||||
return True
|
||||
return False
|
||||
@@ -1,325 +0,0 @@
|
||||
"""Cross-session rate limit guard for Nous Portal.
|
||||
|
||||
Writes rate limit state to a shared file so all sessions (CLI, gateway,
|
||||
cron, auxiliary) can check whether Nous Portal is currently rate-limited
|
||||
before making requests. Prevents retry amplification when RPH is tapped.
|
||||
|
||||
Each 429 from Nous triggers up to 9 API calls per conversation turn
|
||||
(3 SDK retries x 3 Hermes retries), and every one of those calls counts
|
||||
against RPH. By recording the rate limit state on first 429 and checking
|
||||
it before subsequent attempts, we eliminate the amplification effect.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from typing import Any, Mapping, Optional
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_STATE_SUBDIR = "rate_limits"
|
||||
_STATE_FILENAME = "nous.json"
|
||||
|
||||
|
||||
def _state_path() -> str:
|
||||
"""Return the path to the Nous rate limit state file."""
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
base = get_hermes_home()
|
||||
except ImportError:
|
||||
base = os.path.join(os.path.expanduser("~"), ".hermes")
|
||||
return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
|
||||
|
||||
|
||||
def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
|
||||
"""Extract the best available reset-time estimate from response headers.
|
||||
|
||||
Priority:
|
||||
1. x-ratelimit-reset-requests-1h (hourly RPH window — most useful)
|
||||
2. x-ratelimit-reset-requests (per-minute RPM window)
|
||||
3. retry-after (generic HTTP header)
|
||||
|
||||
Returns seconds-from-now, or None if no usable header found.
|
||||
"""
|
||||
if not headers:
|
||||
return None
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
|
||||
for key in (
|
||||
"x-ratelimit-reset-requests-1h",
|
||||
"x-ratelimit-reset-requests",
|
||||
"retry-after",
|
||||
):
|
||||
raw = lowered.get(key)
|
||||
if raw is not None:
|
||||
try:
|
||||
val = float(raw)
|
||||
if val > 0:
|
||||
return val
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def record_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
error_context: Optional[dict[str, Any]] = None,
|
||||
default_cooldown: float = 300.0,
|
||||
) -> None:
|
||||
"""Record that Nous Portal is rate-limited.
|
||||
|
||||
Parses the reset time from response headers or error context.
|
||||
Falls back to ``default_cooldown`` (5 minutes) if no reset info
|
||||
is available. Writes to a shared file that all sessions can read.
|
||||
|
||||
Args:
|
||||
headers: HTTP response headers from the 429 error.
|
||||
error_context: Structured error context from _extract_api_error_context().
|
||||
default_cooldown: Fallback cooldown in seconds when no header data.
|
||||
"""
|
||||
now = time.time()
|
||||
reset_at = None
|
||||
|
||||
# Try headers first (most accurate)
|
||||
header_seconds = _parse_reset_seconds(headers)
|
||||
if header_seconds is not None:
|
||||
reset_at = now + header_seconds
|
||||
|
||||
# Try error_context reset_at (from body parsing)
|
||||
if reset_at is None and isinstance(error_context, dict):
|
||||
ctx_reset = error_context.get("reset_at")
|
||||
if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
|
||||
reset_at = float(ctx_reset)
|
||||
|
||||
# Default cooldown
|
||||
if reset_at is None:
|
||||
reset_at = now + default_cooldown
|
||||
|
||||
path = _state_path()
|
||||
try:
|
||||
state_dir = os.path.dirname(path)
|
||||
os.makedirs(state_dir, exist_ok=True)
|
||||
|
||||
state = {
|
||||
"reset_at": reset_at,
|
||||
"recorded_at": now,
|
||||
"reset_seconds": reset_at - now,
|
||||
}
|
||||
|
||||
# Atomic write: write to temp file + rename
|
||||
fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(state, f)
|
||||
atomic_replace(tmp_path, path)
|
||||
except Exception:
|
||||
# Clean up temp file on failure
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
logger.info(
|
||||
"Nous rate limit recorded: resets in %.0fs (at %.0f)",
|
||||
reset_at - now, reset_at,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to write Nous rate limit state: %s", exc)
|
||||
|
||||
|
||||
def nous_rate_limit_remaining() -> Optional[float]:
|
||||
"""Check if Nous Portal is currently rate-limited.
|
||||
|
||||
Returns:
|
||||
Seconds remaining until reset, or None if not rate-limited.
|
||||
"""
|
||||
path = _state_path()
|
||||
try:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
state = json.load(f)
|
||||
reset_at = state.get("reset_at", 0)
|
||||
remaining = reset_at - time.time()
|
||||
if remaining > 0:
|
||||
return remaining
|
||||
# Expired — clean up
|
||||
try:
|
||||
os.unlink(path)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
def clear_nous_rate_limit() -> None:
|
||||
"""Clear the rate limit state (e.g., after a successful Nous request)."""
|
||||
try:
|
||||
os.unlink(_state_path())
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError as exc:
|
||||
logger.debug("Failed to clear Nous rate limit state: %s", exc)
|
||||
|
||||
|
||||
def format_remaining(seconds: float) -> str:
|
||||
"""Format seconds remaining into human-readable duration."""
|
||||
s = max(0, int(seconds))
|
||||
if s < 60:
|
||||
return f"{s}s"
|
||||
if s < 3600:
|
||||
m, sec = divmod(s, 60)
|
||||
return f"{m}m {sec}s" if sec else f"{m}m"
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
@@ -1,193 +0,0 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
if mode == "steer":
|
||||
return (
|
||||
"💡 First-time tip — I steered your message into the current run; "
|
||||
"it will arrive after the next tool call instead of interrupting. "
|
||||
"Send `/busy interrupt` or `/busy queue` to change this, or "
|
||||
"`/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"`/busy steer` to inject them mid-run without interrupting, or "
|
||||
"`/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead, "
|
||||
"or /busy steer to inject mid-run. This tip only shows once."
|
||||
)
|
||||
if mode == "steer":
|
||||
return (
|
||||
"(tip) Your message was steered into the current run; it arrives "
|
||||
"after the next tool call. Use /busy interrupt or /busy queue to "
|
||||
"change this. This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead, "
|
||||
"or /busy steer to inject mid-run. This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def openclaw_residue_hint_cli() -> str:
|
||||
"""Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
|
||||
|
||||
Points users at ``hermes claw migrate`` (non-destructive port of config,
|
||||
memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
|
||||
follow-up step for users who have already migrated and want to archive
|
||||
the old directory — with a warning that archiving breaks OpenClaw.
|
||||
"""
|
||||
return (
|
||||
"A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
|
||||
"To port your config, memory, and skills over to Hermes, run "
|
||||
"`hermes claw migrate`.\n"
|
||||
"If you've already migrated and want to archive the old directory, "
|
||||
"run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
|
||||
"OpenClaw will stop working after this).\n"
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
|
||||
"""Return True if an OpenClaw workspace directory is present in ``$HOME``.
|
||||
|
||||
Pure filesystem check — no side effects. ``home`` override exists for tests.
|
||||
"""
|
||||
base = home or Path.home()
|
||||
try:
|
||||
return (base / ".openclaw").is_dir()
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"OPENCLAW_RESIDUE_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"openclaw_residue_hint_cli",
|
||||
"detect_openclaw_residue",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
1046
agent/plugin_llm.py
1046
agent/plugin_llm.py
File diff suppressed because it is too large
Load Diff
@@ -1,64 +0,0 @@
|
||||
"""Centralized Nous Portal request tags.
|
||||
|
||||
Every Hermes request that hits the Nous Portal — main agent loop, auxiliary
|
||||
client (compression / titles / vision / web_extract / session_search / etc.),
|
||||
and any future code path — must carry the same product-attribution tags so
|
||||
Nous can attribute usage to Hermes Agent and bucket it by client release.
|
||||
|
||||
Tag shape (sent in OpenAI-compatible ``extra_body['tags']``):
|
||||
|
||||
[
|
||||
"product=hermes-agent",
|
||||
"client=hermes-client-v<__version__>",
|
||||
]
|
||||
|
||||
The version is sourced live from ``hermes_cli.__version__`` so it auto-aligns
|
||||
to whatever release is installed; the release script
|
||||
(``scripts/release.py``) regex-bumps that single string, and every Portal
|
||||
request picks up the new tag on the next process start.
|
||||
|
||||
Why one helper instead of inlining the literal at each site:
|
||||
* Four call sites (main loop profile, aux client, run_agent compression
|
||||
fallback, web_tools fallback) used to drift apart — see PR #24194 which
|
||||
only got the aux site, leaving the main loop sending a different tag set.
|
||||
* Tests should assert the same tag list everywhere; centralizing makes that
|
||||
assertion a one-liner against this module.
|
||||
|
||||
Do NOT pre-compute these as module-level constants in the consumers. The
|
||||
version can change at runtime (editable installs, hot-reload tooling), and
|
||||
``hermes_cli.__version__`` is the canonical source of truth.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def _hermes_version() -> str:
|
||||
"""Return the current Hermes release version, e.g. ``"0.13.0"``.
|
||||
|
||||
Falls back to ``"unknown"`` if ``hermes_cli`` cannot be imported (should
|
||||
never happen in a real install — guarded for defensive testing).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli import __version__
|
||||
return __version__
|
||||
except Exception:
|
||||
return "unknown"
|
||||
|
||||
|
||||
def hermes_client_tag() -> str:
|
||||
"""Return the ``client=...`` tag for Nous Portal requests.
|
||||
|
||||
Format: ``client=hermes-client-v<MAJOR>.<MINOR>.<PATCH>``.
|
||||
"""
|
||||
return f"client=hermes-client-v{_hermes_version()}"
|
||||
|
||||
|
||||
def nous_portal_tags() -> List[str]:
|
||||
"""Return the canonical list of Nous Portal product tags.
|
||||
|
||||
Always returns a fresh list so callers can mutate it freely
|
||||
(e.g. ``merged_extra.setdefault("tags", []).extend(nous_portal_tags())``).
|
||||
"""
|
||||
return ["product=hermes-agent", hermes_client_tag()]
|
||||
@@ -1,131 +0,0 @@
|
||||
"""Shared SKILL.md preprocessing helpers."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only -- no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available --
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return "[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def preprocess_skill_content(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None = None,
|
||||
skills_cfg: dict | None = None,
|
||||
) -> str:
|
||||
"""Apply configured SKILL.md template and inline-shell preprocessing."""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
|
||||
if cfg.get("template_vars", True):
|
||||
content = substitute_template_vars(content, skill_dir, session_id)
|
||||
if cfg.get("inline_shell", False):
|
||||
timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = expand_inline_shell(content, skill_dir, timeout)
|
||||
return content
|
||||
@@ -1,386 +0,0 @@
|
||||
"""Stateful scrubber for reasoning/thinking blocks in streamed assistant text.
|
||||
|
||||
``run_agent._strip_think_blocks`` is regex-based and correct for a complete
|
||||
string, but when it runs *per-delta* in ``_fire_stream_delta`` it destroys
|
||||
the state that downstream consumers (CLI ``_stream_delta``, gateway
|
||||
``GatewayStreamConsumer._filter_and_accumulate``) rely on.
|
||||
|
||||
Concretely, when MiniMax-M2.7 streams
|
||||
|
||||
delta1 = "<think>"
|
||||
delta2 = "Let me check their config"
|
||||
delta3 = "</think>"
|
||||
|
||||
the per-delta regex erases delta1 entirely (case 2: unterminated-open at
|
||||
boundary matches ``^<think>...``), so the downstream state machine never
|
||||
sees the open tag, treats delta2 as regular content, and leaks reasoning
|
||||
to the user. Consumers that don't run their own state machine (ACP,
|
||||
api_server, TTS) never had any defence at all — they just emitted
|
||||
whatever survived the upstream regex.
|
||||
|
||||
This module centralises the tag-suppression state machine at the
|
||||
upstream layer so every stream_delta_callback sees text that has
|
||||
already had reasoning blocks removed. Partial tags at delta
|
||||
boundaries are held back until the next delta resolves them, and
|
||||
end-of-stream flushing surfaces any held-back prose that turned out
|
||||
not to be a real tag.
|
||||
|
||||
Usage::
|
||||
|
||||
scrubber = StreamingThinkScrubber()
|
||||
for delta in stream:
|
||||
visible = scrubber.feed(delta)
|
||||
if visible:
|
||||
emit(visible)
|
||||
tail = scrubber.flush() # at end of stream
|
||||
if tail:
|
||||
emit(tail)
|
||||
|
||||
The scrubber is re-entrant per agent instance. Call ``reset()`` at
|
||||
the top of each new turn so a hung block from an interrupted prior
|
||||
stream cannot taint the next turn's output.
|
||||
|
||||
Tag variants handled (case-insensitive):
|
||||
``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
|
||||
``<REASONING_SCRATCHPAD>``.
|
||||
|
||||
Block-boundary rule for opens: an opening tag is only treated as a
|
||||
reasoning-block opener when it appears at the start of the stream,
|
||||
after a newline (optionally followed by whitespace), or when only
|
||||
whitespace has been emitted on the current line. This prevents prose
|
||||
that *mentions* the tag name (e.g. ``"use <think> tags here"``) from
|
||||
being incorrectly suppressed. Closed pairs (``<think>X</think>``) are
|
||||
always suppressed regardless of boundary; a closed pair is an
|
||||
intentional, bounded construct.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
__all__ = ["StreamingThinkScrubber"]
|
||||
|
||||
|
||||
class StreamingThinkScrubber:
|
||||
"""Stateful scrubber for streaming reasoning/thinking blocks.
|
||||
|
||||
State machine:
|
||||
- ``_in_block``: True while inside an opened block, waiting for
|
||||
a close tag. All text inside is discarded.
|
||||
- ``_buf``: held-back partial-tag tail. Emitted / discarded on
|
||||
the next ``feed()`` call or by ``flush()``.
|
||||
- ``_last_emitted_ended_newline``: True iff the most recent
|
||||
emission to the consumer ended with ``\\n``, or nothing has
|
||||
been emitted yet (start-of-stream counts as a boundary). Used
|
||||
to decide whether an open tag at buffer position 0 is at a
|
||||
block boundary.
|
||||
"""
|
||||
|
||||
_OPEN_TAG_NAMES: Tuple[str, ...] = (
|
||||
"think",
|
||||
"thinking",
|
||||
"reasoning",
|
||||
"thought",
|
||||
"REASONING_SCRATCHPAD",
|
||||
)
|
||||
|
||||
# Materialise literal tag strings so the hot path does string
|
||||
# operations, not regex compilation per feed().
|
||||
_OPEN_TAGS: Tuple[str, ...] = tuple(f"<{name}>" for name in _OPEN_TAG_NAMES)
|
||||
_CLOSE_TAGS: Tuple[str, ...] = tuple(f"</{name}>" for name in _OPEN_TAG_NAMES)
|
||||
|
||||
# Pre-compute the longest tag (for partial-tag hold-back bound).
|
||||
_MAX_TAG_LEN: int = max(len(tag) for tag in _OPEN_TAGS + _CLOSE_TAGS)
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._in_block: bool = False
|
||||
self._buf: str = ""
|
||||
self._last_emitted_ended_newline: bool = True
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset all state. Call at the top of every new turn."""
|
||||
self._in_block = False
|
||||
self._buf = ""
|
||||
self._last_emitted_ended_newline = True
|
||||
|
||||
def feed(self, text: str) -> str:
|
||||
"""Feed one delta; return the scrubbed visible portion.
|
||||
|
||||
May return an empty string when the entire delta is reasoning
|
||||
content or is being held back pending resolution of a partial
|
||||
tag at the boundary.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
buf = self._buf + text
|
||||
self._buf = ""
|
||||
out: list[str] = []
|
||||
|
||||
while buf:
|
||||
if self._in_block:
|
||||
# Hunt for the earliest close tag.
|
||||
close_idx, close_len = self._find_first_tag(
|
||||
buf, self._CLOSE_TAGS,
|
||||
)
|
||||
if close_idx == -1:
|
||||
# No close yet — hold back a potential partial
|
||||
# close-tag prefix; discard everything else.
|
||||
held = self._max_partial_suffix(buf, self._CLOSE_TAGS)
|
||||
self._buf = buf[-held:] if held else ""
|
||||
return "".join(out)
|
||||
# Found close: discard block content + tag, continue.
|
||||
buf = buf[close_idx + close_len:]
|
||||
self._in_block = False
|
||||
else:
|
||||
# Priority 1 — closed <tag>X</tag> pair anywhere in
|
||||
# buf. Closed pairs are always an intentional,
|
||||
# bounded construct (even mid-line prose containing
|
||||
# an open/close pair is almost certainly a model
|
||||
# leaking reasoning inline), so no boundary gating.
|
||||
pair = self._find_earliest_closed_pair(buf)
|
||||
# Priority 2 — unterminated open tag at a block
|
||||
# boundary. Boundary-gated so prose that mentions
|
||||
# '<think>' isn't over-stripped.
|
||||
open_idx, open_len = self._find_open_at_boundary(
|
||||
buf, out,
|
||||
)
|
||||
|
||||
# Pick whichever match comes earliest in the buffer.
|
||||
if pair is not None and (
|
||||
open_idx == -1 or pair[0] <= open_idx
|
||||
):
|
||||
start_idx, end_idx = pair
|
||||
preceding = buf[:start_idx]
|
||||
if preceding:
|
||||
preceding = self._strip_orphan_close_tags(preceding)
|
||||
if preceding:
|
||||
out.append(preceding)
|
||||
self._last_emitted_ended_newline = (
|
||||
preceding.endswith("\n")
|
||||
)
|
||||
buf = buf[end_idx:]
|
||||
continue
|
||||
|
||||
if open_idx != -1:
|
||||
# Unterminated open at boundary — emit preceding,
|
||||
# enter block, continue loop with remainder.
|
||||
preceding = buf[:open_idx]
|
||||
if preceding:
|
||||
preceding = self._strip_orphan_close_tags(preceding)
|
||||
if preceding:
|
||||
out.append(preceding)
|
||||
self._last_emitted_ended_newline = (
|
||||
preceding.endswith("\n")
|
||||
)
|
||||
self._in_block = True
|
||||
buf = buf[open_idx + open_len:]
|
||||
continue
|
||||
|
||||
# No resolvable tag structure in buf. Hold back any
|
||||
# partial-tag prefix at the tail so a split tag
|
||||
# across deltas isn't missed, then emit the rest.
|
||||
held = self._max_partial_suffix(buf, self._OPEN_TAGS)
|
||||
held_close = self._max_partial_suffix(
|
||||
buf, self._CLOSE_TAGS,
|
||||
)
|
||||
held = max(held, held_close)
|
||||
if held:
|
||||
emit_text = buf[:-held]
|
||||
self._buf = buf[-held:]
|
||||
else:
|
||||
emit_text = buf
|
||||
self._buf = ""
|
||||
if emit_text:
|
||||
emit_text = self._strip_orphan_close_tags(emit_text)
|
||||
if emit_text:
|
||||
out.append(emit_text)
|
||||
self._last_emitted_ended_newline = (
|
||||
emit_text.endswith("\n")
|
||||
)
|
||||
return "".join(out)
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def flush(self) -> str:
|
||||
"""End-of-stream flush.
|
||||
|
||||
If still inside an unterminated block, held-back content is
|
||||
discarded — leaking partial reasoning is worse than a
|
||||
truncated answer. Otherwise the held-back partial-tag tail is
|
||||
emitted verbatim (it turned out not to be a real tag prefix).
|
||||
"""
|
||||
if self._in_block:
|
||||
self._buf = ""
|
||||
self._in_block = False
|
||||
return ""
|
||||
tail = self._buf
|
||||
self._buf = ""
|
||||
if not tail:
|
||||
return ""
|
||||
tail = self._strip_orphan_close_tags(tail)
|
||||
if tail:
|
||||
self._last_emitted_ended_newline = tail.endswith("\n")
|
||||
return tail
|
||||
|
||||
# ── internal helpers ───────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _find_first_tag(
|
||||
buf: str, tags: Tuple[str, ...],
|
||||
) -> Tuple[int, int]:
|
||||
"""Return (earliest_index, tag_length) over *tags*, or (-1, 0).
|
||||
|
||||
Case-insensitive match.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best_idx = -1
|
||||
best_len = 0
|
||||
for tag in tags:
|
||||
idx = buf_lower.find(tag.lower())
|
||||
if idx != -1 and (best_idx == -1 or idx < best_idx):
|
||||
best_idx = idx
|
||||
best_len = len(tag)
|
||||
return best_idx, best_len
|
||||
|
||||
def _find_earliest_closed_pair(self, buf: str):
|
||||
"""Return (start_idx, end_idx) of the earliest closed pair, else None.
|
||||
|
||||
A closed pair is ``<tag>...</tag>`` of any variant. Matches are
|
||||
case-insensitive and non-greedy (the closest close tag after
|
||||
an open tag wins), matching the regex ``<tag>.*?</tag>``
|
||||
semantics of ``_strip_think_blocks`` case 1. When two tag
|
||||
variants could both match, the one whose open tag appears
|
||||
earlier wins.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best: "tuple[int, int] | None" = None
|
||||
for open_tag, close_tag in zip(self._OPEN_TAGS, self._CLOSE_TAGS):
|
||||
open_lower = open_tag.lower()
|
||||
close_lower = close_tag.lower()
|
||||
open_idx = buf_lower.find(open_lower)
|
||||
if open_idx == -1:
|
||||
continue
|
||||
close_idx = buf_lower.find(
|
||||
close_lower, open_idx + len(open_lower),
|
||||
)
|
||||
if close_idx == -1:
|
||||
continue
|
||||
end_idx = close_idx + len(close_lower)
|
||||
if best is None or open_idx < best[0]:
|
||||
best = (open_idx, end_idx)
|
||||
return best
|
||||
|
||||
def _find_open_at_boundary(
|
||||
self, buf: str, already_emitted: list[str],
|
||||
) -> Tuple[int, int]:
|
||||
"""Return the earliest block-boundary open-tag (idx, len).
|
||||
|
||||
Returns (-1, 0) if no boundary-legal opener is present.
|
||||
"""
|
||||
buf_lower = buf.lower()
|
||||
best_idx = -1
|
||||
best_len = 0
|
||||
for tag in self._OPEN_TAGS:
|
||||
tag_lower = tag.lower()
|
||||
search_start = 0
|
||||
while True:
|
||||
idx = buf_lower.find(tag_lower, search_start)
|
||||
if idx == -1:
|
||||
break
|
||||
if self._is_block_boundary(buf, idx, already_emitted):
|
||||
if best_idx == -1 or idx < best_idx:
|
||||
best_idx = idx
|
||||
best_len = len(tag)
|
||||
break # first boundary hit for this tag is enough
|
||||
search_start = idx + 1
|
||||
return best_idx, best_len
|
||||
|
||||
def _is_block_boundary(
|
||||
self, buf: str, idx: int, already_emitted: list[str],
|
||||
) -> bool:
|
||||
"""True iff position *idx* in *buf* is a block boundary.
|
||||
|
||||
A block boundary is:
|
||||
- buf position 0 AND the most recent emission ended with
|
||||
a newline (or nothing has been emitted yet)
|
||||
- any position whose preceding text on the current line
|
||||
(since the last newline in buf) is whitespace-only, AND
|
||||
if there is no newline in the preceding buf portion, the
|
||||
most recent prior emission ended with a newline
|
||||
"""
|
||||
if idx == 0:
|
||||
# Check whether the last already-emitted chunk in THIS
|
||||
# feed() call ended with a newline, otherwise fall back
|
||||
# to the cross-feed flag.
|
||||
if already_emitted:
|
||||
return already_emitted[-1].endswith("\n")
|
||||
return self._last_emitted_ended_newline
|
||||
preceding = buf[:idx]
|
||||
last_nl = preceding.rfind("\n")
|
||||
if last_nl == -1:
|
||||
# No newline in buf before the tag — boundary only if the
|
||||
# prior emission ended with a newline AND everything since
|
||||
# is whitespace.
|
||||
if already_emitted:
|
||||
prior_newline = already_emitted[-1].endswith("\n")
|
||||
else:
|
||||
prior_newline = self._last_emitted_ended_newline
|
||||
return prior_newline and preceding.strip() == ""
|
||||
# Newline present — text between it and the tag must be
|
||||
# whitespace-only.
|
||||
return preceding[last_nl + 1:].strip() == ""
|
||||
|
||||
@classmethod
|
||||
def _max_partial_suffix(
|
||||
cls, buf: str, tags: Tuple[str, ...],
|
||||
) -> int:
|
||||
"""Return the longest buf-suffix that is a prefix of any tag.
|
||||
|
||||
Only prefixes strictly shorter than the tag itself count
|
||||
(full-length suffixes are the tag and are handled as matches,
|
||||
not held-back partials). Case-insensitive.
|
||||
"""
|
||||
if not buf:
|
||||
return 0
|
||||
buf_lower = buf.lower()
|
||||
max_check = min(len(buf_lower), cls._MAX_TAG_LEN - 1)
|
||||
for i in range(max_check, 0, -1):
|
||||
suffix = buf_lower[-i:]
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
if len(tag_lower) > i and tag_lower.startswith(suffix):
|
||||
return i
|
||||
return 0
|
||||
|
||||
@classmethod
|
||||
def _strip_orphan_close_tags(cls, text: str) -> str:
|
||||
"""Remove any close tags from *text* (orphan-close handling).
|
||||
|
||||
An orphan close tag has no matching open in the current
|
||||
scrubber state; it's always noise, stripped with any trailing
|
||||
whitespace so the surrounding prose flows naturally.
|
||||
"""
|
||||
if "</" not in text:
|
||||
return text
|
||||
text_lower = text.lower()
|
||||
out: list[str] = []
|
||||
i = 0
|
||||
while i < len(text):
|
||||
matched = False
|
||||
if text_lower[i:i + 2] == "</":
|
||||
for tag in cls._CLOSE_TAGS:
|
||||
tag_lower = tag.lower()
|
||||
tag_len = len(tag_lower)
|
||||
if text_lower[i:i + tag_len] == tag_lower:
|
||||
# Skip the tag and any trailing whitespace,
|
||||
# matching _strip_think_blocks case 3.
|
||||
j = i + tag_len
|
||||
while j < len(text) and text[j] in " \t\n\r":
|
||||
j += 1
|
||||
i = j
|
||||
matched = True
|
||||
break
|
||||
if not matched:
|
||||
out.append(text[i])
|
||||
i += 1
|
||||
return "".join(out)
|
||||
@@ -1,458 +0,0 @@
|
||||
"""Pure tool-call loop guardrail primitives.
|
||||
|
||||
The controller in this module is intentionally side-effect free: it tracks
|
||||
per-turn tool-call observations and returns decisions. Runtime code owns whether
|
||||
those decisions become warning guidance, synthetic tool results, or controlled
|
||||
turn halts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Mapping
|
||||
|
||||
from utils import safe_json_loads
|
||||
from agent.tool_result_classification import file_mutation_result_landed
|
||||
|
||||
|
||||
IDEMPOTENT_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"read_file",
|
||||
"search_files",
|
||||
"web_search",
|
||||
"web_extract",
|
||||
"session_search",
|
||||
"browser_snapshot",
|
||||
"browser_console",
|
||||
"browser_get_images",
|
||||
"mcp_filesystem_read_file",
|
||||
"mcp_filesystem_read_text_file",
|
||||
"mcp_filesystem_read_multiple_files",
|
||||
"mcp_filesystem_list_directory",
|
||||
"mcp_filesystem_list_directory_with_sizes",
|
||||
"mcp_filesystem_directory_tree",
|
||||
"mcp_filesystem_get_file_info",
|
||||
"mcp_filesystem_search_files",
|
||||
}
|
||||
)
|
||||
|
||||
MUTATING_TOOL_NAMES = frozenset(
|
||||
{
|
||||
"terminal",
|
||||
"execute_code",
|
||||
"write_file",
|
||||
"patch",
|
||||
"todo",
|
||||
"memory",
|
||||
"skill_manage",
|
||||
"browser_click",
|
||||
"browser_type",
|
||||
"browser_press",
|
||||
"browser_scroll",
|
||||
"browser_navigate",
|
||||
"send_message",
|
||||
"cronjob",
|
||||
"delegate_task",
|
||||
"process",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolCallGuardrailConfig:
|
||||
"""Thresholds for per-turn tool-call loop detection.
|
||||
|
||||
Warnings are enabled by default and never prevent tool execution. Hard stops
|
||||
are explicit opt-in so interactive CLI/TUI sessions get a gentle nudge unless
|
||||
the user enables circuit-breaker behavior in config.yaml.
|
||||
"""
|
||||
|
||||
warnings_enabled: bool = True
|
||||
hard_stop_enabled: bool = False
|
||||
exact_failure_warn_after: int = 2
|
||||
exact_failure_block_after: int = 5
|
||||
same_tool_failure_warn_after: int = 3
|
||||
same_tool_failure_halt_after: int = 8
|
||||
no_progress_warn_after: int = 2
|
||||
no_progress_block_after: int = 5
|
||||
idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
|
||||
mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
|
||||
|
||||
@classmethod
|
||||
def from_mapping(cls, data: Mapping[str, Any] | None) -> "ToolCallGuardrailConfig":
|
||||
"""Build config from the `tool_loop_guardrails` config.yaml section."""
|
||||
if not isinstance(data, Mapping):
|
||||
return cls()
|
||||
|
||||
warn_after = data.get("warn_after")
|
||||
if not isinstance(warn_after, Mapping):
|
||||
warn_after = {}
|
||||
hard_stop_after = data.get("hard_stop_after")
|
||||
if not isinstance(hard_stop_after, Mapping):
|
||||
hard_stop_after = {}
|
||||
|
||||
defaults = cls()
|
||||
return cls(
|
||||
warnings_enabled=_as_bool(data.get("warnings_enabled"), defaults.warnings_enabled),
|
||||
hard_stop_enabled=_as_bool(data.get("hard_stop_enabled"), defaults.hard_stop_enabled),
|
||||
exact_failure_warn_after=_positive_int(
|
||||
warn_after.get("exact_failure", data.get("exact_failure_warn_after")),
|
||||
defaults.exact_failure_warn_after,
|
||||
),
|
||||
same_tool_failure_warn_after=_positive_int(
|
||||
warn_after.get("same_tool_failure", data.get("same_tool_failure_warn_after")),
|
||||
defaults.same_tool_failure_warn_after,
|
||||
),
|
||||
no_progress_warn_after=_positive_int(
|
||||
warn_after.get("idempotent_no_progress", data.get("no_progress_warn_after")),
|
||||
defaults.no_progress_warn_after,
|
||||
),
|
||||
exact_failure_block_after=_positive_int(
|
||||
hard_stop_after.get("exact_failure", data.get("exact_failure_block_after")),
|
||||
defaults.exact_failure_block_after,
|
||||
),
|
||||
same_tool_failure_halt_after=_positive_int(
|
||||
hard_stop_after.get("same_tool_failure", data.get("same_tool_failure_halt_after")),
|
||||
defaults.same_tool_failure_halt_after,
|
||||
),
|
||||
no_progress_block_after=_positive_int(
|
||||
hard_stop_after.get("idempotent_no_progress", data.get("no_progress_block_after")),
|
||||
defaults.no_progress_block_after,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolCallSignature:
|
||||
"""Stable, non-reversible identity for a tool name plus canonical args."""
|
||||
|
||||
tool_name: str
|
||||
args_hash: str
|
||||
|
||||
@classmethod
|
||||
def from_call(cls, tool_name: str, args: Mapping[str, Any] | None) -> "ToolCallSignature":
|
||||
canonical = canonical_tool_args(args or {})
|
||||
return cls(tool_name=tool_name, args_hash=_sha256(canonical))
|
||||
|
||||
def to_metadata(self) -> dict[str, str]:
|
||||
"""Return public metadata without raw argument values."""
|
||||
return {"tool_name": self.tool_name, "args_hash": self.args_hash}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolGuardrailDecision:
|
||||
"""Decision returned by the tool-call guardrail controller."""
|
||||
|
||||
action: str = "allow" # allow | warn | block | halt
|
||||
code: str = "allow"
|
||||
message: str = ""
|
||||
tool_name: str = ""
|
||||
count: int = 0
|
||||
signature: ToolCallSignature | None = None
|
||||
|
||||
@property
|
||||
def allows_execution(self) -> bool:
|
||||
return self.action in {"allow", "warn"}
|
||||
|
||||
@property
|
||||
def should_halt(self) -> bool:
|
||||
return self.action in {"block", "halt"}
|
||||
|
||||
def to_metadata(self) -> dict[str, Any]:
|
||||
data: dict[str, Any] = {
|
||||
"action": self.action,
|
||||
"code": self.code,
|
||||
"message": self.message,
|
||||
"tool_name": self.tool_name,
|
||||
"count": self.count,
|
||||
}
|
||||
if self.signature is not None:
|
||||
data["signature"] = self.signature.to_metadata()
|
||||
return data
|
||||
|
||||
|
||||
def canonical_tool_args(args: Mapping[str, Any]) -> str:
|
||||
"""Return sorted compact JSON for parsed tool arguments."""
|
||||
if not isinstance(args, Mapping):
|
||||
raise TypeError(f"tool args must be a mapping, got {type(args).__name__}")
|
||||
return json.dumps(
|
||||
args,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
)
|
||||
|
||||
|
||||
def classify_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
|
||||
"""Safety-fallback classifier used only when callers don't pass ``failed``.
|
||||
|
||||
Mirrors ``agent.display._detect_tool_failure`` exactly so the guardrail
|
||||
never disagrees with the CLI's user-visible ``[error]`` tag. Production
|
||||
callers in ``run_agent.py`` always pass an explicit ``failed=`` derived
|
||||
from ``_detect_tool_failure``; this function exists so standalone callers
|
||||
(tests, tooling) still get consistent behavior.
|
||||
"""
|
||||
if result is None:
|
||||
return False, ""
|
||||
if file_mutation_result_landed(tool_name, result):
|
||||
return False, ""
|
||||
|
||||
if tool_name == "terminal":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
exit_code = data.get("exit_code")
|
||||
if exit_code is not None and exit_code != 0:
|
||||
return True, f" [exit {exit_code}]"
|
||||
return False, ""
|
||||
|
||||
if tool_name == "memory":
|
||||
data = safe_json_loads(result)
|
||||
if isinstance(data, dict):
|
||||
if data.get("success") is False and "exceed the limit" in data.get("error", ""):
|
||||
return True, " [full]"
|
||||
|
||||
lower = result[:500].lower()
|
||||
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
|
||||
return True, " [error]"
|
||||
|
||||
return False, ""
|
||||
|
||||
|
||||
class ToolCallGuardrailController:
|
||||
"""Per-turn controller for repeated failed/non-progressing tool calls."""
|
||||
|
||||
def __init__(self, config: ToolCallGuardrailConfig | None = None):
|
||||
self.config = config or ToolCallGuardrailConfig()
|
||||
self.reset_for_turn()
|
||||
|
||||
def reset_for_turn(self) -> None:
|
||||
self._exact_failure_counts: dict[ToolCallSignature, int] = {}
|
||||
self._same_tool_failure_counts: dict[str, int] = {}
|
||||
self._no_progress: dict[ToolCallSignature, tuple[str, int]] = {}
|
||||
self._halt_decision: ToolGuardrailDecision | None = None
|
||||
|
||||
@property
|
||||
def halt_decision(self) -> ToolGuardrailDecision | None:
|
||||
return self._halt_decision
|
||||
|
||||
def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision:
|
||||
signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
|
||||
if not self.config.hard_stop_enabled:
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
exact_count = self._exact_failure_counts.get(signature, 0)
|
||||
if exact_count >= self.config.exact_failure_block_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="block",
|
||||
code="repeated_exact_failure_block",
|
||||
message=(
|
||||
f"Blocked {tool_name}: the same tool call failed {exact_count} "
|
||||
"times with identical arguments. Stop retrying it unchanged; "
|
||||
"change strategy or explain the blocker."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=exact_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
if self._is_idempotent(tool_name):
|
||||
record = self._no_progress.get(signature)
|
||||
if record is not None:
|
||||
_result_hash, repeat_count = record
|
||||
if repeat_count >= self.config.no_progress_block_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="block",
|
||||
code="idempotent_no_progress_block",
|
||||
message=(
|
||||
f"Blocked {tool_name}: this read-only call returned the same "
|
||||
f"result {repeat_count} times. Stop repeating it unchanged; "
|
||||
"use the result already provided or try a different query."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=repeat_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
def after_call(
|
||||
self,
|
||||
tool_name: str,
|
||||
args: Mapping[str, Any] | None,
|
||||
result: str | None,
|
||||
*,
|
||||
failed: bool | None = None,
|
||||
) -> ToolGuardrailDecision:
|
||||
args = _coerce_args(args)
|
||||
signature = ToolCallSignature.from_call(tool_name, args)
|
||||
if failed is None:
|
||||
failed, _ = classify_tool_failure(tool_name, result)
|
||||
|
||||
if failed:
|
||||
exact_count = self._exact_failure_counts.get(signature, 0) + 1
|
||||
self._exact_failure_counts[signature] = exact_count
|
||||
self._no_progress.pop(signature, None)
|
||||
|
||||
same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1
|
||||
self._same_tool_failure_counts[tool_name] = same_count
|
||||
|
||||
if self.config.hard_stop_enabled and same_count >= self.config.same_tool_failure_halt_after:
|
||||
decision = ToolGuardrailDecision(
|
||||
action="halt",
|
||||
code="same_tool_failure_halt",
|
||||
message=(
|
||||
f"Stopped {tool_name}: it failed {same_count} times this turn. "
|
||||
"Stop retrying the same failing tool path and choose a different approach."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=same_count,
|
||||
signature=signature,
|
||||
)
|
||||
self._halt_decision = decision
|
||||
return decision
|
||||
|
||||
if self.config.warnings_enabled and exact_count >= self.config.exact_failure_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="repeated_exact_failure_warning",
|
||||
message=(
|
||||
f"{tool_name} has failed {exact_count} times with identical arguments. "
|
||||
"This looks like a loop; inspect the error and change strategy "
|
||||
"instead of retrying it unchanged."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=exact_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
if self.config.warnings_enabled and same_count >= self.config.same_tool_failure_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="same_tool_failure_warning",
|
||||
message=(
|
||||
f"{tool_name} has failed {same_count} times this turn. "
|
||||
"This looks like a loop; change approach before retrying."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=same_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, count=exact_count, signature=signature)
|
||||
|
||||
self._exact_failure_counts.pop(signature, None)
|
||||
self._same_tool_failure_counts.pop(tool_name, None)
|
||||
|
||||
if not self._is_idempotent(tool_name):
|
||||
self._no_progress.pop(signature, None)
|
||||
return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
|
||||
|
||||
result_hash = _result_hash(result)
|
||||
previous = self._no_progress.get(signature)
|
||||
repeat_count = 1
|
||||
if previous is not None and previous[0] == result_hash:
|
||||
repeat_count = previous[1] + 1
|
||||
self._no_progress[signature] = (result_hash, repeat_count)
|
||||
|
||||
if self.config.warnings_enabled and repeat_count >= self.config.no_progress_warn_after:
|
||||
return ToolGuardrailDecision(
|
||||
action="warn",
|
||||
code="idempotent_no_progress_warning",
|
||||
message=(
|
||||
f"{tool_name} returned the same result {repeat_count} times. "
|
||||
"Use the result already provided or change the query instead of "
|
||||
"repeating it unchanged."
|
||||
),
|
||||
tool_name=tool_name,
|
||||
count=repeat_count,
|
||||
signature=signature,
|
||||
)
|
||||
|
||||
return ToolGuardrailDecision(tool_name=tool_name, count=repeat_count, signature=signature)
|
||||
|
||||
def _is_idempotent(self, tool_name: str) -> bool:
|
||||
if tool_name in self.config.mutating_tools:
|
||||
return False
|
||||
return tool_name in self.config.idempotent_tools
|
||||
|
||||
|
||||
def toolguard_synthetic_result(decision: ToolGuardrailDecision) -> str:
|
||||
"""Build a synthetic role=tool content string for a blocked tool call."""
|
||||
return json.dumps(
|
||||
{
|
||||
"error": decision.message,
|
||||
"guardrail": decision.to_metadata(),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> str:
|
||||
"""Append runtime guidance to the current tool result content."""
|
||||
if decision.action not in {"warn", "halt"} or not decision.message:
|
||||
return result
|
||||
label = "Tool loop hard stop" if decision.action == "halt" else "Tool loop warning"
|
||||
suffix = (
|
||||
f"\n\n[{label}: "
|
||||
f"{decision.code}; count={decision.count}; {decision.message}]"
|
||||
)
|
||||
return (result or "") + suffix
|
||||
|
||||
|
||||
def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
|
||||
return args if isinstance(args, Mapping) else {}
|
||||
|
||||
|
||||
def _result_hash(result: str | None) -> str:
|
||||
parsed = safe_json_loads(result or "")
|
||||
if parsed is not None:
|
||||
try:
|
||||
canonical = json.dumps(
|
||||
parsed,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=str,
|
||||
)
|
||||
except TypeError:
|
||||
canonical = str(parsed)
|
||||
else:
|
||||
canonical = result or ""
|
||||
return _sha256(canonical)
|
||||
|
||||
|
||||
def _as_bool(value: Any, default: bool) -> bool:
|
||||
if value is None:
|
||||
return default
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, (int, float)):
|
||||
return bool(value)
|
||||
if isinstance(value, str):
|
||||
lowered = value.strip().lower()
|
||||
if lowered in {"1", "true", "yes", "on", "enabled"}:
|
||||
return True
|
||||
if lowered in {"0", "false", "no", "off", "disabled"}:
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _positive_int(value: Any, default: int) -> int:
|
||||
if value is None:
|
||||
return default
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return parsed if parsed >= 1 else default
|
||||
|
||||
|
||||
def _sha256(value: str) -> str:
|
||||
return hashlib.sha256(value.encode("utf-8")).hexdigest()
|
||||
@@ -1,26 +0,0 @@
|
||||
"""Shared helpers for classifying tool result payloads."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
|
||||
FILE_MUTATING_TOOL_NAMES = frozenset({"write_file", "patch"})
|
||||
|
||||
|
||||
def file_mutation_result_landed(tool_name: str, result: Any) -> bool:
|
||||
"""Return True when a file mutation result proves the write landed."""
|
||||
if tool_name not in FILE_MUTATING_TOOL_NAMES or not isinstance(result, str):
|
||||
return False
|
||||
try:
|
||||
data = json.loads(result.strip())
|
||||
except Exception:
|
||||
return False
|
||||
if not isinstance(data, dict) or data.get("error"):
|
||||
return False
|
||||
if tool_name == "write_file":
|
||||
return "bytes_written" in data
|
||||
if tool_name == "patch":
|
||||
return data.get("success") is True
|
||||
return False
|
||||
@@ -1,614 +0,0 @@
|
||||
"""OpenAI Chat Completions transport.
|
||||
|
||||
Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
|
||||
providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
|
||||
|
||||
Messages and tools are already in OpenAI format — convert_messages and
|
||||
convert_tools are near-identity. The complexity lives in build_kwargs
|
||||
which has provider-specific conditionals for max_tokens defaults,
|
||||
reasoning configuration, temperature handling, and extra_body assembly.
|
||||
"""
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.lmstudio_reasoning import resolve_lmstudio_effort
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
|
||||
"""Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
|
||||
if reasoning_config is None or not isinstance(reasoning_config, dict):
|
||||
return None
|
||||
|
||||
normalized_model = (model or "").strip().lower()
|
||||
if normalized_model.startswith("google/"):
|
||||
normalized_model = normalized_model.split("/", 1)[1]
|
||||
|
||||
# ``thinking_config`` is a Gemini-only request parameter. The same
|
||||
# ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
|
||||
# those reject the field with HTTP 400 "Unknown name 'thinking_config':
|
||||
# Cannot find field" — including the polite ``{"includeThoughts": False}``
|
||||
# form. Omit the field entirely on non-Gemini models. (#17426)
|
||||
if not normalized_model.startswith("gemini"):
|
||||
return None
|
||||
|
||||
if reasoning_config.get("enabled") is False:
|
||||
# Gemini can hide thought parts even when internal thinking still
|
||||
# happens; omit thinkingLevel to avoid model-specific validation quirks.
|
||||
return {"includeThoughts": False}
|
||||
|
||||
effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
|
||||
if effort == "none":
|
||||
return {"includeThoughts": False}
|
||||
|
||||
thinking_config: Dict[str, Any] = {"includeThoughts": True}
|
||||
|
||||
# Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
|
||||
# coarse effort levels. ``includeThoughts`` alone is enough to surface
|
||||
# thought parts without risking request validation errors.
|
||||
if normalized_model.startswith("gemini-2.5-"):
|
||||
return thinking_config
|
||||
|
||||
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
|
||||
effort = "medium"
|
||||
|
||||
# Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
|
||||
# is stricter (low/high). Clamp Hermes' wider effort set to what each
|
||||
# family accepts so we never forward an undocumented level verbatim.
|
||||
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
|
||||
if "flash" in normalized_model:
|
||||
if effort in {"minimal", "low"}:
|
||||
thinking_config["thinkingLevel"] = "low"
|
||||
elif effort in {"high", "xhigh"}:
|
||||
thinking_config["thinkingLevel"] = "high"
|
||||
else:
|
||||
thinking_config["thinkingLevel"] = "medium"
|
||||
elif "pro" in normalized_model:
|
||||
thinking_config["thinkingLevel"] = (
|
||||
"high" if effort in {"high", "xhigh"} else "low"
|
||||
)
|
||||
|
||||
return thinking_config
|
||||
|
||||
|
||||
def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
|
||||
"""Convert Gemini thinking config keys to the OpenAI-compat field names."""
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
|
||||
translated: Dict[str, Any] = {}
|
||||
if isinstance(config.get("includeThoughts"), bool):
|
||||
translated["include_thoughts"] = config["includeThoughts"]
|
||||
if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
|
||||
translated["thinking_level"] = config["thinkingLevel"].strip().lower()
|
||||
if isinstance(config.get("thinkingBudget"), (int, float)):
|
||||
translated["thinking_budget"] = int(config["thinkingBudget"])
|
||||
return translated or None
|
||||
|
||||
|
||||
def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
|
||||
normalized = str(base_url or "").strip().rstrip("/").lower()
|
||||
if not normalized:
|
||||
return False
|
||||
if "generativelanguage.googleapis.com" not in normalized:
|
||||
return False
|
||||
return normalized.endswith("/openai")
|
||||
|
||||
|
||||
class ChatCompletionsTransport(ProviderTransport):
|
||||
"""Transport for api_mode='chat_completions'.
|
||||
|
||||
The default path for OpenAI-compatible providers.
|
||||
"""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "chat_completions"
|
||||
|
||||
def convert_messages(
|
||||
self, messages: list[dict[str, Any]], **kwargs
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict) and (
|
||||
"call_id" in tc or "response_item_id" in tc
|
||||
):
|
||||
needs_sanitize = True
|
||||
break
|
||||
if needs_sanitize:
|
||||
break
|
||||
|
||||
if not needs_sanitize:
|
||||
return messages
|
||||
|
||||
sanitized = copy.deepcopy(messages)
|
||||
for msg in sanitized:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict):
|
||||
tc.pop("call_id", None)
|
||||
tc.pop("response_item_id", None)
|
||||
return sanitized
|
||||
|
||||
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Tools are already in OpenAI format — identity."""
|
||||
return tools
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[dict[str, Any]],
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
**params,
|
||||
) -> dict[str, Any]:
|
||||
"""Build chat.completions.create() kwargs.
|
||||
|
||||
params (all optional):
|
||||
timeout: float — API call timeout
|
||||
max_tokens: int | None — user-configured max tokens
|
||||
ephemeral_max_output_tokens: int | None — one-shot override
|
||||
max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
|
||||
reasoning_config: dict | None
|
||||
request_overrides: dict | None
|
||||
session_id: str | None
|
||||
model_lower: str — lowercase model name for pattern matching
|
||||
# Provider profile path (all per-provider quirks live in providers/)
|
||||
provider_profile: ProviderProfile | None — when present, delegates to
|
||||
_build_kwargs_from_profile(); all flag params below are bypassed.
|
||||
# Legacy-path flags — only used when provider_profile is None
|
||||
# (i.e. custom / unregistered providers). Known providers all go
|
||||
# through provider_profile.
|
||||
is_openrouter: bool
|
||||
is_nous: bool
|
||||
is_qwen_portal: bool
|
||||
is_github_models: bool
|
||||
is_nvidia_nim: bool
|
||||
is_kimi: bool
|
||||
is_tokenhub: bool
|
||||
is_lmstudio: bool
|
||||
is_custom_provider: bool
|
||||
ollama_num_ctx: int | None
|
||||
# Provider routing
|
||||
provider_preferences: dict | None
|
||||
# Qwen-specific
|
||||
qwen_prepare_fn: callable | None — runs AFTER codex sanitization
|
||||
qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
|
||||
qwen_session_metadata: dict | None
|
||||
# Temperature
|
||||
fixed_temperature: Any — from _fixed_temperature_for_model()
|
||||
omit_temperature: bool
|
||||
# Reasoning
|
||||
supports_reasoning: bool
|
||||
github_reasoning_extra: dict | None
|
||||
lmstudio_reasoning_options: list[str] | None # raw allowed_options from /api/v1/models
|
||||
# Claude on OpenRouter/Nous max output
|
||||
anthropic_max_output: int | None
|
||||
extra_body_additions: dict | None
|
||||
"""
|
||||
# Codex sanitization: drop reasoning_items / call_id / response_item_id
|
||||
sanitized = self.convert_messages(messages)
|
||||
|
||||
# ── Provider profile: single-path when present ──────────────────
|
||||
_profile = params.get("provider_profile")
|
||||
if _profile:
|
||||
return self._build_kwargs_from_profile(
|
||||
_profile, model, sanitized, tools, params
|
||||
)
|
||||
|
||||
# ── Legacy fallback (unregistered / unknown provider) ───────────
|
||||
# Reached only when get_provider_profile() returned None.
|
||||
# Known providers always go through the profile path above.
|
||||
|
||||
# Developer role swap for GPT-5/Codex models
|
||||
model_lower = params.get("model_lower", (model or "").lower())
|
||||
if (
|
||||
sanitized
|
||||
and isinstance(sanitized[0], dict)
|
||||
and sanitized[0].get("role") == "system"
|
||||
and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||
):
|
||||
sanitized = list(sanitized)
|
||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||
|
||||
api_kwargs: dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": sanitized,
|
||||
}
|
||||
|
||||
timeout = params.get("timeout")
|
||||
if timeout is not None:
|
||||
api_kwargs["timeout"] = timeout
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
|
||||
# etc.) compatible, in addition to direct moonshot.ai endpoints.
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > provider default
|
||||
max_tokens_fn = params.get("max_tokens_param_fn")
|
||||
ephemeral = params.get("ephemeral_max_output_tokens")
|
||||
max_tokens = params.get("max_tokens")
|
||||
anthropic_max_out = params.get("anthropic_max_output")
|
||||
is_nvidia_nim = params.get("is_nvidia_nim", False)
|
||||
is_kimi = params.get("is_kimi", False)
|
||||
is_tokenhub = params.get("is_tokenhub", False)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||
elif max_tokens is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(max_tokens))
|
||||
elif anthropic_max_out is not None:
|
||||
api_kwargs["max_tokens"] = anthropic_max_out
|
||||
|
||||
# Kimi: top-level reasoning_effort (unless thinking disabled)
|
||||
if is_kimi:
|
||||
_kimi_thinking_off = bool(
|
||||
reasoning_config
|
||||
and isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _kimi_thinking_off:
|
||||
_kimi_effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in {"low", "medium", "high"}:
|
||||
_kimi_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _kimi_effort
|
||||
|
||||
# Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
|
||||
if is_tokenhub:
|
||||
_tokenhub_thinking_off = bool(
|
||||
reasoning_config
|
||||
and isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _tokenhub_thinking_off:
|
||||
_tokenhub_effort = "high"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in {"low", "medium", "high"}:
|
||||
_tokenhub_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _tokenhub_effort
|
||||
|
||||
# LM Studio: top-level reasoning_effort. Only emit when the model
|
||||
# declares reasoning support via /api/v1/models capabilities (gated
|
||||
# upstream by params["supports_reasoning"]). resolve_lmstudio_effort
|
||||
# is shared with run_agent's summary path so both stay in sync.
|
||||
if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
|
||||
_lm_effort = resolve_lmstudio_effort(
|
||||
reasoning_config,
|
||||
params.get("lmstudio_reasoning_options"),
|
||||
)
|
||||
if _lm_effort is not None:
|
||||
api_kwargs["reasoning_effort"] = _lm_effort
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: dict[str, Any] = {}
|
||||
|
||||
is_openrouter = params.get("is_openrouter", False)
|
||||
is_nous = params.get("is_nous", False)
|
||||
is_github_models = params.get("is_github_models", False)
|
||||
provider_name = str(params.get("provider_name") or "").strip().lower()
|
||||
base_url = params.get("base_url")
|
||||
|
||||
provider_prefs = params.get("provider_preferences")
|
||||
if provider_prefs and is_openrouter:
|
||||
extra_body["provider"] = provider_prefs
|
||||
|
||||
# Pareto Code router plugin — model-gated. Same shape as the
|
||||
# profile path in plugins/model-providers/openrouter/__init__.py;
|
||||
# this branch only runs when the OpenRouter profile isn't loaded.
|
||||
if is_openrouter and model == "openrouter/pareto-code":
|
||||
_pareto_score = params.get("openrouter_min_coding_score")
|
||||
if _pareto_score is not None and _pareto_score != "":
|
||||
try:
|
||||
_pareto_score_f = float(_pareto_score)
|
||||
except (TypeError, ValueError):
|
||||
_pareto_score_f = None
|
||||
if _pareto_score_f is not None and 0.0 <= _pareto_score_f <= 1.0:
|
||||
extra_body["plugins"] = [
|
||||
{"id": "pareto-router", "min_coding_score": _pareto_score_f}
|
||||
]
|
||||
|
||||
# Kimi extra_body.thinking
|
||||
if is_kimi:
|
||||
_kimi_thinking_enabled = True
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
_kimi_thinking_enabled = False
|
||||
extra_body["thinking"] = {
|
||||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
# Reasoning. LM Studio is handled above via top-level reasoning_effort,
|
||||
# so skip emitting extra_body.reasoning for it.
|
||||
if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
|
||||
if is_github_models:
|
||||
gh_reasoning = params.get("github_reasoning_extra")
|
||||
if gh_reasoning is not None:
|
||||
extra_body["reasoning"] = gh_reasoning
|
||||
else:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
|
||||
if provider_name == "gemini":
|
||||
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if _is_gemini_openai_compat_base_url(base_url):
|
||||
thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
|
||||
if thinking_config:
|
||||
openai_compat_extra = extra_body.get("extra_body", {})
|
||||
google_extra = openai_compat_extra.get("google", {})
|
||||
google_extra["thinking_config"] = thinking_config
|
||||
openai_compat_extra["google"] = google_extra
|
||||
extra_body["extra_body"] = openai_compat_extra
|
||||
elif raw_thinking_config:
|
||||
extra_body["thinking_config"] = raw_thinking_config
|
||||
elif provider_name == "google-gemini-cli":
|
||||
thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if thinking_config:
|
||||
extra_body["thinking_config"] = thinking_config
|
||||
|
||||
# Merge any pre-built extra_body additions
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
extra_body.update(additions)
|
||||
|
||||
if extra_body:
|
||||
api_kwargs["extra_body"] = extra_body
|
||||
|
||||
# Request overrides last (service_tier etc.)
|
||||
overrides = params.get("request_overrides")
|
||||
if overrides:
|
||||
api_kwargs.update(overrides)
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
|
||||
"""Build API kwargs using a ProviderProfile — single path, no legacy flags.
|
||||
|
||||
This method replaces the entire flag-based kwargs assembly when a
|
||||
provider_profile is passed. Every quirk comes from the profile object.
|
||||
"""
|
||||
from providers.base import OMIT_TEMPERATURE
|
||||
|
||||
# Message preprocessing
|
||||
sanitized = profile.prepare_messages(sanitized)
|
||||
|
||||
# Developer role swap — model-name-based, applies to all providers
|
||||
_model_lower = (model or "").lower()
|
||||
if (
|
||||
sanitized
|
||||
and isinstance(sanitized[0], dict)
|
||||
and sanitized[0].get("role") == "system"
|
||||
and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||
):
|
||||
sanitized = list(sanitized)
|
||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||
|
||||
api_kwargs: dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": sanitized,
|
||||
}
|
||||
|
||||
# Temperature
|
||||
if profile.fixed_temperature is OMIT_TEMPERATURE:
|
||||
pass # Don't include temperature at all
|
||||
elif profile.fixed_temperature is not None:
|
||||
api_kwargs["temperature"] = profile.fixed_temperature
|
||||
else:
|
||||
# Use caller's temperature if provided
|
||||
temp = params.get("temperature")
|
||||
if temp is not None:
|
||||
api_kwargs["temperature"] = temp
|
||||
|
||||
# Timeout
|
||||
timeout = params.get("timeout")
|
||||
if timeout is not None:
|
||||
api_kwargs["timeout"] = timeout
|
||||
|
||||
# Tools — apply Moonshot/Kimi schema sanitization regardless of path
|
||||
if tools:
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > profile default
|
||||
max_tokens_fn = params.get("max_tokens_param_fn")
|
||||
ephemeral = params.get("ephemeral_max_output_tokens")
|
||||
user_max = params.get("max_tokens")
|
||||
anthropic_max = params.get("anthropic_max_output")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||
elif user_max is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(user_max))
|
||||
elif profile.default_max_tokens and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
|
||||
elif anthropic_max is not None:
|
||||
api_kwargs["max_tokens"] = anthropic_max
|
||||
|
||||
# Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
extra_body_from_profile, top_level_from_profile = (
|
||||
profile.build_api_kwargs_extras(
|
||||
reasoning_config=reasoning_config,
|
||||
supports_reasoning=params.get("supports_reasoning", False),
|
||||
qwen_session_metadata=params.get("qwen_session_metadata"),
|
||||
model=model,
|
||||
ollama_num_ctx=params.get("ollama_num_ctx"),
|
||||
session_id=params.get("session_id"),
|
||||
)
|
||||
)
|
||||
api_kwargs.update(top_level_from_profile)
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: dict[str, Any] = {}
|
||||
|
||||
# Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
|
||||
profile_body = profile.build_extra_body(
|
||||
session_id=params.get("session_id"),
|
||||
provider_preferences=params.get("provider_preferences"),
|
||||
model=model,
|
||||
base_url=params.get("base_url"),
|
||||
reasoning_config=reasoning_config,
|
||||
openrouter_min_coding_score=params.get("openrouter_min_coding_score"),
|
||||
)
|
||||
if profile_body:
|
||||
extra_body.update(profile_body)
|
||||
|
||||
# Profile's reasoning/thinking extra_body entries
|
||||
if extra_body_from_profile:
|
||||
extra_body.update(extra_body_from_profile)
|
||||
|
||||
# Merge any pre-built extra_body additions from the caller
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
extra_body.update(additions)
|
||||
|
||||
# Request overrides (user config)
|
||||
overrides = params.get("request_overrides")
|
||||
if overrides:
|
||||
for k, v in overrides.items():
|
||||
if k == "extra_body" and isinstance(v, dict):
|
||||
extra_body.update(v)
|
||||
else:
|
||||
api_kwargs[k] = v
|
||||
|
||||
if extra_body:
|
||||
api_kwargs["extra_body"] = extra_body
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
|
||||
|
||||
For chat_completions, this is near-identity — the response is already
|
||||
in OpenAI format. extra_content on tool_calls (Gemini thought_signature)
|
||||
is preserved via ToolCall.provider_data. reasoning_details (OpenRouter
|
||||
unified format) and reasoning_content (DeepSeek/Moonshot) are also
|
||||
preserved for downstream replay.
|
||||
"""
|
||||
choice = response.choices[0]
|
||||
msg = choice.message
|
||||
finish_reason = choice.finish_reason or "stop"
|
||||
|
||||
tool_calls = None
|
||||
if msg.tool_calls:
|
||||
tool_calls = []
|
||||
for tc in msg.tool_calls:
|
||||
# Preserve provider-specific extras on the tool call.
|
||||
# Gemini 3 thinking models attach extra_content with
|
||||
# thought_signature — without replay on the next turn the API
|
||||
# rejects the request with 400.
|
||||
tc_provider_data: dict[str, Any] = {}
|
||||
extra = getattr(tc, "extra_content", None)
|
||||
if extra is None and hasattr(tc, "model_extra"):
|
||||
extra = (tc.model_extra or {}).get("extra_content")
|
||||
if extra is not None:
|
||||
if hasattr(extra, "model_dump"):
|
||||
try:
|
||||
extra = extra.model_dump()
|
||||
except Exception:
|
||||
pass
|
||||
tc_provider_data["extra_content"] = extra
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
provider_data=tc_provider_data or None,
|
||||
)
|
||||
)
|
||||
|
||||
usage = None
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
u = response.usage
|
||||
usage = Usage(
|
||||
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
|
||||
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
|
||||
total_tokens=getattr(u, "total_tokens", 0) or 0,
|
||||
)
|
||||
|
||||
# Preserve reasoning fields separately. DeepSeek/Moonshot use
|
||||
# ``reasoning_content``; others use ``reasoning``. Downstream code
|
||||
# (_extract_reasoning, thinking-prefill retry) reads both distinctly,
|
||||
# so keep them apart in provider_data rather than merging.
|
||||
reasoning = getattr(msg, "reasoning", None)
|
||||
reasoning_content = getattr(msg, "reasoning_content", None)
|
||||
if reasoning_content is None and hasattr(msg, "model_extra"):
|
||||
model_extra = getattr(msg, "model_extra", None) or {}
|
||||
if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
|
||||
reasoning_content = model_extra["reasoning_content"]
|
||||
|
||||
provider_data: Dict[str, Any] = {}
|
||||
if reasoning_content is not None:
|
||||
provider_data["reasoning_content"] = reasoning_content
|
||||
rd = getattr(msg, "reasoning_details", None)
|
||||
if rd:
|
||||
provider_data["reasoning_details"] = rd
|
||||
|
||||
return NormalizedResponse(
|
||||
content=msg.content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
reasoning=reasoning,
|
||||
usage=usage,
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check that response has valid choices."""
|
||||
if response is None:
|
||||
return False
|
||||
if not hasattr(response, "choices") or response.choices is None:
|
||||
return False
|
||||
if not response.choices:
|
||||
return False
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
|
||||
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
|
||||
usage = getattr(response, "usage", None)
|
||||
if usage is None:
|
||||
return None
|
||||
details = getattr(usage, "prompt_tokens_details", None)
|
||||
if details is None:
|
||||
return None
|
||||
cached = getattr(details, "cached_tokens", 0) or 0
|
||||
written = getattr(details, "cache_write_tokens", 0) or 0
|
||||
if cached or written:
|
||||
return {"cached_tokens": cached, "creation_tokens": written}
|
||||
return None
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("chat_completions", ChatCompletionsTransport)
|
||||
@@ -1,368 +0,0 @@
|
||||
"""Codex app-server JSON-RPC client.
|
||||
|
||||
Speaks the protocol documented in codex-rs/app-server/README.md (codex 0.125+).
|
||||
Transport is newline-delimited JSON-RPC 2.0 over stdio: spawn `codex app-server`,
|
||||
do an `initialize` handshake, then drive `thread/start` + `turn/start` and
|
||||
consume streaming `item/*` notifications until `turn/completed`.
|
||||
|
||||
This module is the wire-level speaker only. Higher-level concerns (event
|
||||
projection into Hermes' display, approval bridging, transcript projection into
|
||||
AIAgent.messages, plugin migration) live in sibling modules.
|
||||
|
||||
Status: optional opt-in runtime gated behind `model.openai_runtime ==
|
||||
"codex_app_server"`. Hermes' default tool dispatch is unchanged when this
|
||||
runtime is not selected.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import queue
|
||||
import subprocess
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
# Default minimum codex version we test against. The PR sets this from the
|
||||
# `codex --version` parsed at install time; bumping is a one-line change here.
|
||||
MIN_CODEX_VERSION = (0, 125, 0)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodexAppServerError(RuntimeError):
|
||||
"""Raised on JSON-RPC errors from the app-server."""
|
||||
|
||||
code: int
|
||||
message: str
|
||||
data: Optional[Any] = None
|
||||
|
||||
def __str__(self) -> str: # pragma: no cover - trivial
|
||||
return f"codex app-server error {self.code}: {self.message}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Pending:
|
||||
queue: queue.Queue
|
||||
method: str
|
||||
sent_at: float = field(default_factory=time.time)
|
||||
|
||||
|
||||
class CodexAppServerClient:
|
||||
"""Minimal JSON-RPC 2.0 client for `codex app-server` over stdio.
|
||||
|
||||
Threading model:
|
||||
- Spawning thread (caller) drives request/response pairs synchronously.
|
||||
- One reader thread parses stdout, dispatches replies to the right
|
||||
pending future, and routes notifications + server-initiated requests
|
||||
to bounded queues that the caller drains on their own cadence.
|
||||
- One reader thread captures stderr for diagnostics; codex emits
|
||||
tracing logs there at RUST_LOG-controlled levels.
|
||||
|
||||
Intentionally NOT async. AIAgent.run_conversation() is synchronous and
|
||||
runs on the main thread; layering asyncio just to drive a stdio child
|
||||
creates surprising interrupt semantics. We use blocking queues with
|
||||
timeouts and rely on `turn/interrupt` for cancellation.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
codex_bin: str = "codex",
|
||||
codex_home: Optional[str] = None,
|
||||
extra_args: Optional[list[str]] = None,
|
||||
env: Optional[dict[str, str]] = None,
|
||||
) -> None:
|
||||
self._codex_bin = codex_bin
|
||||
cmd = [codex_bin, "app-server"] + list(extra_args or [])
|
||||
spawn_env = os.environ.copy()
|
||||
if env:
|
||||
spawn_env.update(env)
|
||||
if codex_home:
|
||||
spawn_env["CODEX_HOME"] = codex_home
|
||||
# Codex emits tracing to stderr; default WARN keeps it quiet for users.
|
||||
spawn_env.setdefault("RUST_LOG", "warn")
|
||||
|
||||
self._proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
bufsize=0,
|
||||
env=spawn_env,
|
||||
)
|
||||
self._next_id = 1
|
||||
self._pending: dict[int, _Pending] = {}
|
||||
self._pending_lock = threading.Lock()
|
||||
self._notifications: queue.Queue = queue.Queue()
|
||||
self._server_requests: queue.Queue = queue.Queue()
|
||||
self._stderr_lines: list[str] = []
|
||||
self._stderr_lock = threading.Lock()
|
||||
self._closed = False
|
||||
self._initialized = False
|
||||
|
||||
self._reader = threading.Thread(target=self._read_stdout, daemon=True)
|
||||
self._reader.start()
|
||||
self._stderr_reader = threading.Thread(target=self._read_stderr, daemon=True)
|
||||
self._stderr_reader.start()
|
||||
|
||||
# ---------- lifecycle ----------
|
||||
|
||||
def initialize(
|
||||
self,
|
||||
client_name: str = "hermes",
|
||||
client_title: str = "Hermes Agent",
|
||||
client_version: str = "0.1",
|
||||
capabilities: Optional[dict] = None,
|
||||
timeout: float = 10.0,
|
||||
) -> dict:
|
||||
"""Send `initialize` + `initialized` handshake. Returns the server's
|
||||
InitializeResponse (userAgent, codexHome, platformFamily, platformOs)."""
|
||||
if self._initialized:
|
||||
raise RuntimeError("already initialized")
|
||||
params = {
|
||||
"clientInfo": {
|
||||
"name": client_name,
|
||||
"title": client_title,
|
||||
"version": client_version,
|
||||
},
|
||||
"capabilities": capabilities or {},
|
||||
}
|
||||
result = self.request("initialize", params, timeout=timeout)
|
||||
self.notify("initialized")
|
||||
self._initialized = True
|
||||
return result
|
||||
|
||||
def close(self, timeout: float = 3.0) -> None:
|
||||
"""Close stdin and wait for the subprocess to exit, escalating to kill."""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
try:
|
||||
if self._proc.stdin and not self._proc.stdin.closed:
|
||||
self._proc.stdin.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self._proc.terminate()
|
||||
self._proc.wait(timeout=timeout)
|
||||
except subprocess.TimeoutExpired:
|
||||
try:
|
||||
self._proc.kill()
|
||||
self._proc.wait(timeout=1.0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self) -> "CodexAppServerClient":
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc: Any) -> None:
|
||||
self.close()
|
||||
|
||||
# ---------- send/receive ----------
|
||||
|
||||
def request(
|
||||
self,
|
||||
method: str,
|
||||
params: Optional[dict] = None,
|
||||
timeout: float = 30.0,
|
||||
) -> dict:
|
||||
"""Send a JSON-RPC request and block on the response. Returns `result`,
|
||||
raises CodexAppServerError on `error`."""
|
||||
rid = self._take_id()
|
||||
q: queue.Queue = queue.Queue(maxsize=1)
|
||||
with self._pending_lock:
|
||||
self._pending[rid] = _Pending(queue=q, method=method)
|
||||
self._send({"id": rid, "method": method, "params": params or {}})
|
||||
try:
|
||||
msg = q.get(timeout=timeout)
|
||||
except queue.Empty:
|
||||
with self._pending_lock:
|
||||
self._pending.pop(rid, None)
|
||||
raise TimeoutError(
|
||||
f"codex app-server method {method!r} timed out after {timeout}s"
|
||||
)
|
||||
if "error" in msg:
|
||||
err = msg["error"]
|
||||
raise CodexAppServerError(
|
||||
code=err.get("code", -1),
|
||||
message=err.get("message", ""),
|
||||
data=err.get("data"),
|
||||
)
|
||||
return msg.get("result", {})
|
||||
|
||||
def notify(self, method: str, params: Optional[dict] = None) -> None:
|
||||
"""Send a JSON-RPC notification (no id, no response expected)."""
|
||||
self._send({"method": method, "params": params or {}})
|
||||
|
||||
def respond(self, request_id: Any, result: dict) -> None:
|
||||
"""Reply to a server-initiated request (e.g. approval prompts)."""
|
||||
self._send({"id": request_id, "result": result})
|
||||
|
||||
def respond_error(
|
||||
self, request_id: Any, code: int, message: str, data: Optional[Any] = None
|
||||
) -> None:
|
||||
"""Reply to a server-initiated request with an error."""
|
||||
err: dict[str, Any] = {"code": code, "message": message}
|
||||
if data is not None:
|
||||
err["data"] = data
|
||||
self._send({"id": request_id, "error": err})
|
||||
|
||||
def take_notification(self, timeout: float = 0.0) -> Optional[dict]:
|
||||
"""Pop the next streaming notification, or return None on timeout.
|
||||
|
||||
timeout=0.0 means non-blocking. Use small positive timeouts inside the
|
||||
AIAgent turn loop to interleave reads with interrupt checks."""
|
||||
try:
|
||||
if timeout <= 0:
|
||||
return self._notifications.get_nowait()
|
||||
return self._notifications.get(timeout=timeout)
|
||||
except queue.Empty:
|
||||
return None
|
||||
|
||||
def take_server_request(self, timeout: float = 0.0) -> Optional[dict]:
|
||||
"""Pop the next server-initiated request (e.g. exec/applyPatch approval)."""
|
||||
try:
|
||||
if timeout <= 0:
|
||||
return self._server_requests.get_nowait()
|
||||
return self._server_requests.get(timeout=timeout)
|
||||
except queue.Empty:
|
||||
return None
|
||||
|
||||
# ---------- diagnostics ----------
|
||||
|
||||
def stderr_tail(self, n: int = 20) -> list[str]:
|
||||
"""Return last n lines of codex's stderr (for error reports)."""
|
||||
with self._stderr_lock:
|
||||
return list(self._stderr_lines[-n:])
|
||||
|
||||
def is_alive(self) -> bool:
|
||||
return self._proc.poll() is None
|
||||
|
||||
# ---------- internals ----------
|
||||
|
||||
def _take_id(self) -> int:
|
||||
# JSON-RPC ids only need to be unique per-connection. A simple
|
||||
# monotonically increasing int is the common choice and matches what
|
||||
# codex's own clients use.
|
||||
rid = self._next_id
|
||||
self._next_id += 1
|
||||
return rid
|
||||
|
||||
def _send(self, obj: dict) -> None:
|
||||
if self._closed:
|
||||
raise RuntimeError("codex app-server client is closed")
|
||||
if self._proc.stdin is None:
|
||||
raise RuntimeError("codex app-server stdin not available")
|
||||
try:
|
||||
self._proc.stdin.write((json.dumps(obj) + "\n").encode("utf-8"))
|
||||
self._proc.stdin.flush()
|
||||
except (BrokenPipeError, ValueError) as exc:
|
||||
raise RuntimeError(
|
||||
f"codex app-server stdin closed unexpectedly: {exc}"
|
||||
) from exc
|
||||
|
||||
def _read_stdout(self) -> None:
|
||||
if self._proc.stdout is None:
|
||||
return
|
||||
try:
|
||||
for line in iter(self._proc.stdout.readline, b""):
|
||||
if not line:
|
||||
break
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
msg = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
# Non-JSON output is unexpected on stdout; tracing belongs
|
||||
# on stderr. Surface it via stderr buffer for diagnostics.
|
||||
with self._stderr_lock:
|
||||
self._stderr_lines.append(
|
||||
f"<non-json on stdout> {line[:200]!r}"
|
||||
)
|
||||
continue
|
||||
self._dispatch(msg)
|
||||
except Exception as exc:
|
||||
with self._stderr_lock:
|
||||
self._stderr_lines.append(f"<stdout reader error> {exc}")
|
||||
|
||||
def _dispatch(self, msg: dict) -> None:
|
||||
# Reply (has id + result/error, no method)
|
||||
if "id" in msg and ("result" in msg or "error" in msg):
|
||||
with self._pending_lock:
|
||||
pending = self._pending.pop(msg["id"], None)
|
||||
if pending is not None:
|
||||
try:
|
||||
pending.queue.put_nowait(msg)
|
||||
except queue.Full: # pragma: no cover - defensive
|
||||
pass
|
||||
return
|
||||
# Server-initiated request (has id + method)
|
||||
if "id" in msg and "method" in msg:
|
||||
self._server_requests.put(msg)
|
||||
return
|
||||
# Notification (no id)
|
||||
if "method" in msg:
|
||||
self._notifications.put(msg)
|
||||
|
||||
def _read_stderr(self) -> None:
|
||||
if self._proc.stderr is None:
|
||||
return
|
||||
try:
|
||||
for line in iter(self._proc.stderr.readline, b""):
|
||||
if not line:
|
||||
break
|
||||
with self._stderr_lock:
|
||||
self._stderr_lines.append(
|
||||
line.decode("utf-8", "replace").rstrip()
|
||||
)
|
||||
# Bound memory: keep last 500 lines.
|
||||
if len(self._stderr_lines) > 500:
|
||||
self._stderr_lines = self._stderr_lines[-500:]
|
||||
except Exception: # pragma: no cover
|
||||
pass
|
||||
|
||||
|
||||
def parse_codex_version(output: str) -> Optional[tuple[int, int, int]]:
|
||||
"""Parse `codex --version` output. Returns (major, minor, patch) or None."""
|
||||
# Output format: "codex-cli 0.130.0" possibly followed by metadata.
|
||||
import re
|
||||
|
||||
match = re.search(r"(\d+)\.(\d+)\.(\d+)", output or "")
|
||||
if not match:
|
||||
return None
|
||||
return (int(match.group(1)), int(match.group(2)), int(match.group(3)))
|
||||
|
||||
|
||||
def check_codex_binary(
|
||||
codex_bin: str = "codex", min_version: tuple[int, int, int] = MIN_CODEX_VERSION
|
||||
) -> tuple[bool, str]:
|
||||
"""Verify codex CLI is installed and meets minimum version.
|
||||
|
||||
Returns (ok, message). Used by setup wizard and runtime startup."""
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[codex_bin, "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return False, (
|
||||
f"codex CLI not found at {codex_bin!r}. Install with: "
|
||||
f"npm i -g @openai/codex"
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, "codex --version timed out"
|
||||
if proc.returncode != 0:
|
||||
return False, f"codex --version exited {proc.returncode}: {proc.stderr.strip()}"
|
||||
version = parse_codex_version(proc.stdout)
|
||||
if version is None:
|
||||
return False, f"could not parse codex version from: {proc.stdout!r}"
|
||||
if version < min_version:
|
||||
return False, (
|
||||
f"codex {'.'.join(map(str, version))} is older than required "
|
||||
f"{'.'.join(map(str, min_version))}. Run: npm i -g @openai/codex"
|
||||
)
|
||||
return True, ".".join(map(str, version))
|
||||
@@ -1,810 +0,0 @@
|
||||
"""Session adapter for codex app-server runtime.
|
||||
|
||||
Owns one Codex thread per Hermes session. Drives `turn/start`, consumes
|
||||
streaming notifications via CodexEventProjector, handles server-initiated
|
||||
approval requests (apply_patch, exec command), translates cancellation,
|
||||
and returns a clean turn result that AIAgent.run_conversation() can splice
|
||||
into its `messages` list.
|
||||
|
||||
Lifecycle:
|
||||
session = CodexAppServerSession(cwd="/home/x/proj")
|
||||
session.ensure_started() # spawns + handshake + thread/start
|
||||
result = session.run_turn(user_input="hello") # blocks until turn/completed
|
||||
# result.final_text → assistant text returned to caller
|
||||
# result.projected_messages → list of {role, content, ...} for messages list
|
||||
# result.tool_iterations → how many tool-shaped items completed (skill nudge counter)
|
||||
# result.interrupted → True if Ctrl+C / interrupt_requested fired mid-turn
|
||||
session.close() # tears down subprocess
|
||||
|
||||
Threading model: the adapter is single-threaded from the caller's perspective.
|
||||
The underlying CodexAppServerClient owns its own reader threads but exposes
|
||||
blocking-with-timeout queues that this adapter polls in a loop, so the run_turn
|
||||
call is synchronous and behaves like AIAgent's existing chat_completions loop.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from agent.redact import redact_sensitive_text
|
||||
from agent.transports.codex_app_server import (
|
||||
CodexAppServerClient,
|
||||
CodexAppServerError,
|
||||
)
|
||||
from agent.transports.codex_event_projector import CodexEventProjector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# How many tailing stderr lines from the codex subprocess to attach to a
|
||||
# user-facing error when we don't have a more specific classification (OAuth,
|
||||
# wedge watchdog, etc.). Small enough to keep error messages legible, large
|
||||
# enough to surface a config/provider/auth diagnostic.
|
||||
_STDERR_TAIL_LINES = 12
|
||||
|
||||
|
||||
# Permission profile mapping mirrors the docstring in PR proposal:
|
||||
# Hermes' tools.terminal.security_mode → Codex's permissions profile id.
|
||||
# Defaults if config is missing → workspace-write (matches Codex's own default).
|
||||
_HERMES_TO_CODEX_PERMISSION_PROFILE = {
|
||||
"auto": "workspace-write",
|
||||
"approval-required": "read-only-with-approval",
|
||||
"unrestricted": "full-access",
|
||||
# Backstop alias used by some skills/tests.
|
||||
"yolo": "full-access",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TurnResult:
|
||||
"""Result of one user→assistant→tool turn through the codex app-server."""
|
||||
|
||||
final_text: str = ""
|
||||
projected_messages: list[dict] = field(default_factory=list)
|
||||
tool_iterations: int = 0
|
||||
interrupted: bool = False
|
||||
error: Optional[str] = None # Set if turn ended in a non-recoverable error
|
||||
turn_id: Optional[str] = None
|
||||
thread_id: Optional[str] = None
|
||||
# Hint to the caller that the underlying codex subprocess is likely
|
||||
# wedged (turn-level timeout fired, post-tool watchdog tripped, or
|
||||
# token-refresh failure killed the child). The caller should retire
|
||||
# the session so the next turn respawns codex from scratch instead
|
||||
# of riding a CPU-spinning or auth-broken process. Mirrors openclaw
|
||||
# beta.8's "retire timed-out app-server clients" fix.
|
||||
should_retire: bool = False
|
||||
|
||||
|
||||
# Markers we accept as terminal even when codex never emits turn/completed.
|
||||
# Some codex versions stream `<turn_aborted>` as raw text in agentMessage
|
||||
# items when an interrupt or upstream error tears the turn down before the
|
||||
# normal completion path fires. Mirrors openclaw beta.8 fix.
|
||||
_TURN_ABORTED_MARKERS = ("<turn_aborted>", "<turn_aborted/>")
|
||||
|
||||
|
||||
# Substrings in codex stderr / JSON-RPC error messages that signal the
|
||||
# subprocess died because its OAuth credentials are no longer valid.
|
||||
# Kept conservative: we only redirect users to `codex login` when we're
|
||||
# reasonably sure that's the actual failure, otherwise we surface the
|
||||
# original error verbatim. Mirrors openclaw beta.8's auth-refresh
|
||||
# classification.
|
||||
_OAUTH_REFRESH_FAILURE_HINTS = (
|
||||
"invalid_grant",
|
||||
"invalid grant",
|
||||
"refresh token",
|
||||
"refresh_token",
|
||||
"token refresh",
|
||||
"token_refresh",
|
||||
"token has expired",
|
||||
"expired_token",
|
||||
"expired token",
|
||||
"not authenticated",
|
||||
"unauthenticated",
|
||||
"unauthorized",
|
||||
"401 unauthorized",
|
||||
"re-authenticate",
|
||||
"reauthenticate",
|
||||
"please log in",
|
||||
"please login",
|
||||
"auth profile",
|
||||
"no auth profile",
|
||||
"oauth",
|
||||
)
|
||||
|
||||
|
||||
def _classify_oauth_failure(*parts: str) -> Optional[str]:
|
||||
"""Return a user-friendly re-auth hint if any of the provided strings
|
||||
look like a codex OAuth/token-refresh failure; otherwise None.
|
||||
|
||||
Used for both `turn/start` JSON-RPC errors and post-mortem stderr
|
||||
inspection when the subprocess exits unexpectedly. Conservative on
|
||||
purpose — we only redirect users to `codex login` when the signal
|
||||
is strong, so unrelated runtime failures still surface verbatim.
|
||||
"""
|
||||
haystack = " ".join(p for p in parts if p).lower()
|
||||
if not haystack:
|
||||
return None
|
||||
for needle in _OAUTH_REFRESH_FAILURE_HINTS:
|
||||
if needle in haystack:
|
||||
return (
|
||||
"Codex authentication failed — your ChatGPT/Codex login "
|
||||
"looks expired or invalid. Run `codex login` to refresh, "
|
||||
"then retry. (Fall back to default runtime with "
|
||||
"`/codex-runtime auto` if the issue persists.)"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ServerRequestRouting:
|
||||
"""Default policies for codex-side approval requests when no interactive
|
||||
callback is wired in. These are only used by tests + cron / non-interactive
|
||||
contexts; the live CLI path passes an approval_callback that defers to
|
||||
tools.approval.prompt_dangerous_approval()."""
|
||||
|
||||
auto_approve_exec: bool = False
|
||||
auto_approve_apply_patch: bool = False
|
||||
|
||||
|
||||
class CodexAppServerSession:
|
||||
"""One Codex thread per Hermes session, lifetime owned by AIAgent.
|
||||
|
||||
Not thread-safe — one caller drives it at a time, matching how AIAgent's
|
||||
run_conversation() loop is structured today. The codex client itself can
|
||||
handle interleaved reads/writes via its own threads, but the adapter's
|
||||
state (projector, thread_id, turn counter) is owned by the caller thread.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
cwd: Optional[str] = None,
|
||||
codex_bin: str = "codex",
|
||||
codex_home: Optional[str] = None,
|
||||
permission_profile: Optional[str] = None,
|
||||
approval_callback: Optional[Callable[..., str]] = None,
|
||||
on_event: Optional[Callable[[dict], None]] = None,
|
||||
request_routing: Optional[_ServerRequestRouting] = None,
|
||||
client_factory: Optional[Callable[..., CodexAppServerClient]] = None,
|
||||
) -> None:
|
||||
self._cwd = cwd or os.getcwd()
|
||||
self._codex_bin = codex_bin
|
||||
self._codex_home = codex_home
|
||||
self._permission_profile = (
|
||||
permission_profile or _HERMES_TO_CODEX_PERMISSION_PROFILE.get(
|
||||
os.environ.get("HERMES_TERMINAL_SECURITY_MODE", "auto"),
|
||||
"workspace-write",
|
||||
)
|
||||
)
|
||||
self._approval_callback = approval_callback
|
||||
self._on_event = on_event # Display hook (kawaii spinner ticks etc.)
|
||||
self._routing = request_routing or _ServerRequestRouting()
|
||||
self._client_factory = client_factory or CodexAppServerClient
|
||||
|
||||
self._client: Optional[CodexAppServerClient] = None
|
||||
self._thread_id: Optional[str] = None
|
||||
self._interrupt_event = threading.Event()
|
||||
# Pending file-change items, keyed by item id. Populated on
|
||||
# item/started for fileChange items; consumed by the approval
|
||||
# bridge when codex sends item/fileChange/requestApproval. The
|
||||
# approval params don't carry the changeset, so we cache here
|
||||
# to surface a real summary in the approval prompt (quirk #4).
|
||||
self._pending_file_changes: dict[str, str] = {}
|
||||
self._closed = False
|
||||
|
||||
# ---------- lifecycle ----------
|
||||
|
||||
def ensure_started(self) -> str:
|
||||
"""Spawn the subprocess, do the initialize handshake, and start a
|
||||
thread. Returns the codex thread id. Idempotent — repeated calls
|
||||
return the same thread id."""
|
||||
if self._thread_id is not None:
|
||||
return self._thread_id
|
||||
if self._client is None:
|
||||
self._client = self._client_factory(
|
||||
codex_bin=self._codex_bin, codex_home=self._codex_home
|
||||
)
|
||||
self._client.initialize(
|
||||
client_name="hermes",
|
||||
client_title="Hermes Agent",
|
||||
client_version=_get_hermes_version(),
|
||||
)
|
||||
# Permission selection is intentionally NOT sent on thread/start.
|
||||
# Two reasons (live-tested against codex 0.130.0):
|
||||
# 1. `thread/start.permissions` is gated behind the experimentalApi
|
||||
# capability on this codex version — we'd have to opt in during
|
||||
# initialize and accept the unstable surface.
|
||||
# 2. Even with experimentalApi declared and the correct shape
|
||||
# (`{"type": "profile", "id": "..."}`, not `{"profileId": ...}`),
|
||||
# codex requires a matching `[permissions]` table in
|
||||
# ~/.codex/config.toml or it fails the request with
|
||||
# 'default_permissions requires a [permissions] table'.
|
||||
# Letting codex pick its default (`:read-only` unless the user has
|
||||
# configured otherwise in their codex config.toml) is the standard
|
||||
# codex CLI workflow and avoids fighting codex's own validation.
|
||||
# Users who want a write-capable profile configure it in their
|
||||
# ~/.codex/config.toml the same way they would for any codex usage.
|
||||
params: dict[str, Any] = {"cwd": self._cwd}
|
||||
result = self._client.request("thread/start", params, timeout=15)
|
||||
# Cross-fill thread.id/sessionId — different codex versions have
|
||||
# serialized this under either key. Mirrors openclaw beta.8's
|
||||
# tolerance fix so future codex drops/renames don't KeyError us
|
||||
# at handshake time.
|
||||
thread_obj = result.get("thread") or {}
|
||||
thread_id = (
|
||||
thread_obj.get("id")
|
||||
or thread_obj.get("sessionId")
|
||||
or result.get("sessionId")
|
||||
or result.get("threadId")
|
||||
)
|
||||
if not thread_id:
|
||||
raise CodexAppServerError(
|
||||
code=-32603,
|
||||
message=(
|
||||
"codex thread/start returned no thread id "
|
||||
f"(payload keys: {sorted(result.keys())})"
|
||||
),
|
||||
)
|
||||
self._thread_id = thread_id
|
||||
logger.info(
|
||||
"codex app-server thread started: id=%s profile=%s cwd=%s",
|
||||
self._thread_id[:8],
|
||||
self._permission_profile,
|
||||
self._cwd,
|
||||
)
|
||||
return self._thread_id
|
||||
|
||||
def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
if self._client is not None:
|
||||
try:
|
||||
self._client.close()
|
||||
except Exception: # pragma: no cover - best-effort cleanup
|
||||
pass
|
||||
self._client = None
|
||||
self._thread_id = None
|
||||
|
||||
def __enter__(self) -> "CodexAppServerSession":
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc: Any) -> None:
|
||||
self.close()
|
||||
|
||||
# ---------- interrupt ----------
|
||||
|
||||
def request_interrupt(self) -> None:
|
||||
"""Idempotent: signal the active turn loop to issue turn/interrupt
|
||||
and unwind. Called by AIAgent's _interrupt_requested path."""
|
||||
self._interrupt_event.set()
|
||||
|
||||
# ---------- diagnostics ----------
|
||||
|
||||
def _format_error_with_stderr(
|
||||
self,
|
||||
prefix: str,
|
||||
exc: Any = "",
|
||||
*,
|
||||
tail_lines: int = _STDERR_TAIL_LINES,
|
||||
) -> str:
|
||||
"""Build a user-facing error string for codex failures.
|
||||
|
||||
Appends the last few lines of codex's stderr buffer when available,
|
||||
passed through agent.redact with force=True so secrets in provider
|
||||
error responses (auth headers, query-string tokens, sk-* keys) never
|
||||
leak into chat output or trajectories. The codex CLI's own error
|
||||
text ('Internal error', 'turn/start failed: ...') is otherwise
|
||||
opaque and forces users to re-run with verbose flags to diagnose
|
||||
config / provider / auth-bridge problems.
|
||||
|
||||
Use this for the generic / catch-all branches. Specific
|
||||
classifications (OAuth via _classify_oauth_failure, post-tool wedge
|
||||
watchdog) already produce a clean hint and should be used instead.
|
||||
"""
|
||||
exc_str = str(exc) if exc != "" and exc is not None else ""
|
||||
base = f"{prefix}: {exc_str}" if exc_str else prefix
|
||||
if self._client is None:
|
||||
return base
|
||||
try:
|
||||
tail = self._client.stderr_tail(tail_lines)
|
||||
except Exception: # pragma: no cover - diagnostic best-effort
|
||||
return base
|
||||
if not tail:
|
||||
return base
|
||||
joined = "\n".join(line.rstrip() for line in tail if line)
|
||||
if not joined.strip():
|
||||
return base
|
||||
redacted = redact_sensitive_text(joined, force=True)
|
||||
return f"{base}\ncodex stderr (last {len(tail)} lines):\n{redacted}"
|
||||
|
||||
# ---------- per-turn ----------
|
||||
|
||||
def run_turn(
|
||||
self,
|
||||
user_input: str,
|
||||
*,
|
||||
turn_timeout: float = 600.0,
|
||||
notification_poll_timeout: float = 0.25,
|
||||
post_tool_quiet_timeout: float = 90.0,
|
||||
) -> TurnResult:
|
||||
"""Send a user message and block until turn/completed, while
|
||||
forwarding server-initiated approval requests and projecting items
|
||||
into Hermes' messages shape.
|
||||
|
||||
post_tool_quiet_timeout: if codex emits a tool completion and then
|
||||
goes quiet for this many seconds without emitting another item or
|
||||
`turn/completed`, fast-fail and mark the session for retirement.
|
||||
Mirrors openclaw beta.8's post-tool completion watchdog (#81697)
|
||||
so a wedged codex doesn't burn the full turn deadline.
|
||||
"""
|
||||
# Pre-create the result so startup failures (codex subprocess can't
|
||||
# spawn, initialize handshake rejects, thread/start blows up) surface
|
||||
# the same way per-turn failures do — with a TurnResult.error string
|
||||
# the caller can render — instead of bubbling raw codex exceptions
|
||||
# up to AIAgent.run_conversation.
|
||||
result = TurnResult()
|
||||
try:
|
||||
self.ensure_started()
|
||||
except (CodexAppServerError, TimeoutError) as exc:
|
||||
result.error = self._format_error_with_stderr(
|
||||
"codex app-server startup failed", exc
|
||||
)
|
||||
# Subprocess almost certainly unhealthy — retire so the next
|
||||
# turn re-spawns cleanly.
|
||||
result.should_retire = True
|
||||
return result
|
||||
assert self._client is not None and self._thread_id is not None
|
||||
result.thread_id = self._thread_id
|
||||
|
||||
self._interrupt_event.clear()
|
||||
projector = CodexEventProjector()
|
||||
|
||||
# Send turn/start with the user input. Text-only for now (codex
|
||||
# supports rich content but Hermes' text path is the common case).
|
||||
try:
|
||||
ts = self._client.request(
|
||||
"turn/start",
|
||||
{
|
||||
"threadId": self._thread_id,
|
||||
"input": [{"type": "text", "text": user_input}],
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
except CodexAppServerError as exc:
|
||||
# Classify auth/refresh failures so the user gets a clear
|
||||
# `codex login` pointer instead of a raw RPC error string.
|
||||
stderr_blob = "\n".join(self._client.stderr_tail(40))
|
||||
hint = _classify_oauth_failure(exc.message, stderr_blob)
|
||||
if hint is not None:
|
||||
result.error = hint
|
||||
# Subprocess is fine on a JSON-RPC level here, but the
|
||||
# token store is broken — retire so the next turn does a
|
||||
# clean handshake (and the user has a chance to re-auth
|
||||
# via `codex login` between turns).
|
||||
result.should_retire = True
|
||||
else:
|
||||
result.error = self._format_error_with_stderr(
|
||||
"turn/start failed", exc
|
||||
)
|
||||
return result
|
||||
except TimeoutError as exc:
|
||||
# turn/start hanging is a strong signal the subprocess is wedged.
|
||||
stderr_blob = "\n".join(self._client.stderr_tail(40))
|
||||
hint = _classify_oauth_failure(stderr_blob)
|
||||
result.error = hint or self._format_error_with_stderr(
|
||||
"turn/start timed out", exc
|
||||
)
|
||||
result.should_retire = True
|
||||
return result
|
||||
|
||||
result.turn_id = (ts.get("turn") or {}).get("id")
|
||||
deadline = time.time() + turn_timeout
|
||||
turn_complete = False
|
||||
# Post-tool watchdog state. last_tool_completion_at is set whenever
|
||||
# a tool-shaped item completes; if no further notification arrives
|
||||
# within post_tool_quiet_timeout and the turn hasn't completed, we
|
||||
# fast-fail and retire the session.
|
||||
last_tool_completion_at: Optional[float] = None
|
||||
|
||||
while time.time() < deadline and not turn_complete:
|
||||
if self._interrupt_event.is_set():
|
||||
self._issue_interrupt(result.turn_id)
|
||||
result.interrupted = True
|
||||
break
|
||||
|
||||
# Detect a dead subprocess between iterations. If codex exited
|
||||
# (e.g. crashed, segfaulted, or its auth refresh thread killed
|
||||
# the process), we won't get any more notifications — bail out
|
||||
# rather than waiting for the full turn deadline.
|
||||
if not self._client.is_alive():
|
||||
stderr_blob = "\n".join(self._client.stderr_tail(60))
|
||||
hint = _classify_oauth_failure(stderr_blob)
|
||||
if hint is not None:
|
||||
result.error = hint
|
||||
else:
|
||||
result.error = self._format_error_with_stderr(
|
||||
"codex app-server subprocess exited unexpectedly",
|
||||
tail_lines=20,
|
||||
)
|
||||
result.should_retire = True
|
||||
break
|
||||
|
||||
# Post-tool watchdog: if a tool completion was the most recent
|
||||
# signal and codex has been silent past the quiet timeout, give
|
||||
# up on this turn instead of waiting for the outer deadline.
|
||||
if (
|
||||
last_tool_completion_at is not None
|
||||
and (time.time() - last_tool_completion_at)
|
||||
> post_tool_quiet_timeout
|
||||
):
|
||||
self._issue_interrupt(result.turn_id)
|
||||
result.interrupted = True
|
||||
result.error = (
|
||||
f"codex went silent for "
|
||||
f"{post_tool_quiet_timeout:.0f}s after a tool result; "
|
||||
f"retiring app-server session."
|
||||
)
|
||||
result.should_retire = True
|
||||
break
|
||||
|
||||
# Drain any server-initiated requests (approvals) before
|
||||
# reading notifications, so the codex side isn't blocked.
|
||||
sreq = self._client.take_server_request(timeout=0)
|
||||
if sreq is not None:
|
||||
# Drain any pending notifications first so per-turn state
|
||||
# (e.g. _pending_file_changes for fileChange approvals) is
|
||||
# up to date when we make the approval decision. Bounded
|
||||
# to avoid starving the server-request response.
|
||||
for _ in range(8):
|
||||
pending = self._client.take_notification(timeout=0)
|
||||
if pending is None:
|
||||
break
|
||||
self._track_pending_file_change(pending)
|
||||
proj = projector.project(pending)
|
||||
if proj.messages:
|
||||
result.projected_messages.extend(proj.messages)
|
||||
if proj.is_tool_iteration:
|
||||
result.tool_iterations += 1
|
||||
last_tool_completion_at = time.time()
|
||||
if proj.final_text is not None:
|
||||
result.final_text = proj.final_text
|
||||
if _has_turn_aborted_marker(proj.final_text):
|
||||
turn_complete = True
|
||||
result.interrupted = True
|
||||
result.error = (
|
||||
result.error
|
||||
or "codex reported turn_aborted"
|
||||
)
|
||||
self._handle_server_request(sreq)
|
||||
# Activity counts as live signal — reset the post-tool
|
||||
# quiet timer so an approval round-trip doesn't trip it.
|
||||
last_tool_completion_at = None
|
||||
continue
|
||||
|
||||
note = self._client.take_notification(
|
||||
timeout=notification_poll_timeout
|
||||
)
|
||||
if note is None:
|
||||
continue
|
||||
|
||||
method = note.get("method", "")
|
||||
if self._on_event is not None:
|
||||
try:
|
||||
self._on_event(note)
|
||||
except Exception: # pragma: no cover - display callback
|
||||
logger.debug("on_event callback raised", exc_info=True)
|
||||
|
||||
# Track in-progress fileChange items so the approval bridge
|
||||
# can surface a real change summary when codex requests
|
||||
# approval (the approval params themselves don't carry the
|
||||
# changeset). Quirk #4 fix.
|
||||
self._track_pending_file_change(note)
|
||||
|
||||
# Project into messages
|
||||
projection = projector.project(note)
|
||||
if projection.messages:
|
||||
result.projected_messages.extend(projection.messages)
|
||||
if projection.is_tool_iteration:
|
||||
result.tool_iterations += 1
|
||||
# Arm/refresh the post-tool quiet watchdog whenever a
|
||||
# tool-shaped item completes.
|
||||
last_tool_completion_at = time.time()
|
||||
else:
|
||||
# Any non-tool projected activity (assistant message,
|
||||
# status update, etc.) means codex is still producing
|
||||
# output — clear the quiet timer so we don't fast-fail.
|
||||
if projection.messages or projection.final_text is not None:
|
||||
last_tool_completion_at = None
|
||||
if projection.final_text is not None:
|
||||
# Codex can emit multiple agentMessage items in one turn
|
||||
# (e.g. partial then final). Take the last one as canonical.
|
||||
result.final_text = projection.final_text
|
||||
# Some codex builds tear a turn down by emitting a
|
||||
# `<turn_aborted>` marker in the agent message text and
|
||||
# never sending turn/completed. Treat the marker itself
|
||||
# as terminal so we don't burn the full deadline.
|
||||
if _has_turn_aborted_marker(projection.final_text):
|
||||
turn_complete = True
|
||||
result.interrupted = True
|
||||
result.error = (
|
||||
result.error or "codex reported turn_aborted"
|
||||
)
|
||||
|
||||
if method == "turn/completed":
|
||||
turn_complete = True
|
||||
turn_status = (
|
||||
(note.get("params") or {}).get("turn") or {}
|
||||
).get("status")
|
||||
if turn_status and turn_status not in ("completed", "interrupted"):
|
||||
err_obj = (
|
||||
(note.get("params") or {}).get("turn") or {}
|
||||
).get("error")
|
||||
if err_obj:
|
||||
err_msg = err_obj.get("message") or str(err_obj)
|
||||
# If the turn failed for an auth/refresh reason,
|
||||
# rewrite the error into a re-auth hint AND mark
|
||||
# the session for retirement.
|
||||
stderr_blob = "\n".join(
|
||||
self._client.stderr_tail(40)
|
||||
)
|
||||
hint = _classify_oauth_failure(err_msg, stderr_blob)
|
||||
if hint is not None:
|
||||
result.error = hint
|
||||
result.should_retire = True
|
||||
else:
|
||||
result.error = self._format_error_with_stderr(
|
||||
f"turn ended status={turn_status}", err_msg
|
||||
)
|
||||
|
||||
if not turn_complete and not result.interrupted:
|
||||
# Hit the deadline. Issue interrupt to stop wasted compute, and
|
||||
# tell the caller to retire the session — a turn that never
|
||||
# finished is a strong sign codex is wedged in a way the next
|
||||
# turn shouldn't inherit.
|
||||
self._issue_interrupt(result.turn_id)
|
||||
result.interrupted = True
|
||||
if not result.error:
|
||||
result.error = self._format_error_with_stderr(
|
||||
f"turn timed out after {turn_timeout}s"
|
||||
)
|
||||
result.should_retire = True
|
||||
|
||||
return result
|
||||
|
||||
# ---------- internals ----------
|
||||
|
||||
def _issue_interrupt(self, turn_id: Optional[str]) -> None:
|
||||
if self._client is None or self._thread_id is None or turn_id is None:
|
||||
return
|
||||
try:
|
||||
self._client.request(
|
||||
"turn/interrupt",
|
||||
{"threadId": self._thread_id, "turnId": turn_id},
|
||||
timeout=5,
|
||||
)
|
||||
except CodexAppServerError as exc:
|
||||
# "no active turn to interrupt" is fine — already done.
|
||||
logger.debug("turn/interrupt non-fatal: %s", exc)
|
||||
except TimeoutError:
|
||||
logger.warning("turn/interrupt timed out")
|
||||
|
||||
def _handle_server_request(self, req: dict) -> None:
|
||||
"""Translate a codex server request (approval) into Hermes' approval
|
||||
flow, then send the response.
|
||||
|
||||
Method names verified live against codex 0.130.0 (Apr 2026):
|
||||
item/commandExecution/requestApproval — exec approvals
|
||||
item/fileChange/requestApproval — apply_patch approvals
|
||||
item/permissions/requestApproval — permissions changes
|
||||
(we decline; user controls
|
||||
permission profile in
|
||||
~/.codex/config.toml).
|
||||
"""
|
||||
if self._client is None:
|
||||
return
|
||||
method = req.get("method", "")
|
||||
rid = req.get("id")
|
||||
params = req.get("params") or {}
|
||||
|
||||
if method == "item/commandExecution/requestApproval":
|
||||
decision = self._decide_exec_approval(params)
|
||||
self._client.respond(rid, {"decision": decision})
|
||||
elif method == "item/fileChange/requestApproval":
|
||||
decision = self._decide_apply_patch_approval(params)
|
||||
self._client.respond(rid, {"decision": decision})
|
||||
elif method == "item/permissions/requestApproval":
|
||||
# Codex sometimes asks to escalate permissions mid-turn. We
|
||||
# always decline — the user already chose their permission
|
||||
# profile in ~/.codex/config.toml and surprise escalations
|
||||
# shouldn't be silently accepted.
|
||||
self._client.respond(rid, {"decision": "decline"})
|
||||
elif method == "mcpServer/elicitation/request":
|
||||
# Codex's MCP layer asks the user for structured input on
|
||||
# behalf of an MCP server (e.g. tool-call confirmation,
|
||||
# OAuth, form data). For our own hermes-tools callback we
|
||||
# auto-accept — the user already approved Hermes' tools
|
||||
# by enabling the runtime, and we never expose anything
|
||||
# codex's built-in shell can't already do. For other MCP
|
||||
# servers we decline so the user explicitly opts in via
|
||||
# codex's own auth flow.
|
||||
server_name = params.get("serverName") or ""
|
||||
if server_name == "hermes-tools":
|
||||
self._client.respond(
|
||||
rid,
|
||||
{"action": "accept", "content": None, "_meta": None},
|
||||
)
|
||||
else:
|
||||
self._client.respond(
|
||||
rid,
|
||||
{"action": "decline", "content": None, "_meta": None},
|
||||
)
|
||||
else:
|
||||
# Unknown server request — codex can extend this surface. Reject
|
||||
# cleanly so codex doesn't hang waiting for us.
|
||||
logger.warning("Unknown codex server request: %s", method)
|
||||
self._client.respond_error(
|
||||
rid, code=-32601, message=f"Unsupported method: {method}"
|
||||
)
|
||||
|
||||
def _decide_exec_approval(self, params: dict) -> str:
|
||||
if self._routing.auto_approve_exec:
|
||||
return "accept"
|
||||
command = params.get("command") or ""
|
||||
# Codex's CommandExecutionRequestApprovalParams has cwd as Optional —
|
||||
# fall back to the session's cwd when codex doesn't include it so the
|
||||
# approval prompt is never empty (quirk #10 fix).
|
||||
cwd = params.get("cwd") or self._cwd or "<unknown>"
|
||||
reason = params.get("reason")
|
||||
description = f"Codex requests exec in {cwd}"
|
||||
if reason:
|
||||
description += f" — {reason}"
|
||||
if self._approval_callback is not None:
|
||||
try:
|
||||
choice = self._approval_callback(
|
||||
command, description, allow_permanent=False
|
||||
)
|
||||
return _approval_choice_to_codex_decision(choice)
|
||||
except Exception:
|
||||
logger.exception("approval_callback raised on exec request")
|
||||
return "decline"
|
||||
return "decline" # fail-closed when no callback wired
|
||||
|
||||
def _decide_apply_patch_approval(self, params: dict) -> str:
|
||||
if self._routing.auto_approve_apply_patch:
|
||||
return "accept"
|
||||
if self._approval_callback is not None:
|
||||
# FileChangeRequestApprovalParams gives us reason + grantRoot.
|
||||
# The actual changeset lives on the corresponding fileChange
|
||||
# item which the projector has already cached for us — look it
|
||||
# up by item_id so the user sees what's actually changing.
|
||||
reason = params.get("reason")
|
||||
grant_root = params.get("grantRoot")
|
||||
item_id = params.get("itemId") or ""
|
||||
change_summary = self._lookup_pending_file_change(item_id)
|
||||
description_parts = []
|
||||
if reason:
|
||||
description_parts.append(reason)
|
||||
if change_summary:
|
||||
description_parts.append(change_summary)
|
||||
if grant_root:
|
||||
description_parts.append(f"grants write to {grant_root}")
|
||||
description = (
|
||||
"; ".join(description_parts)
|
||||
if description_parts
|
||||
else "Codex requests to apply a patch"
|
||||
)
|
||||
command_label = (
|
||||
f"apply_patch: {change_summary}" if change_summary
|
||||
else f"apply_patch: {reason}" if reason
|
||||
else "apply_patch"
|
||||
)
|
||||
try:
|
||||
choice = self._approval_callback(
|
||||
command_label,
|
||||
description,
|
||||
allow_permanent=False,
|
||||
)
|
||||
return _approval_choice_to_codex_decision(choice)
|
||||
except Exception:
|
||||
logger.exception("approval_callback raised on apply_patch")
|
||||
return "decline"
|
||||
return "decline"
|
||||
|
||||
def _track_pending_file_change(self, note: dict) -> None:
|
||||
"""Maintain self._pending_file_changes from item/started + item/completed
|
||||
notifications. Lets the apply_patch approval prompt show what's
|
||||
actually changing — codex's approval params don't carry the data."""
|
||||
method = note.get("method", "")
|
||||
params = note.get("params") or {}
|
||||
item = params.get("item") or {}
|
||||
if item.get("type") != "fileChange":
|
||||
return
|
||||
item_id = item.get("id") or ""
|
||||
if not item_id:
|
||||
return
|
||||
if method == "item/started":
|
||||
changes = item.get("changes") or []
|
||||
if not changes:
|
||||
self._pending_file_changes[item_id] = "1 change pending"
|
||||
return
|
||||
kinds: dict[str, int] = {}
|
||||
paths: list[str] = []
|
||||
for ch in changes:
|
||||
if not isinstance(ch, dict):
|
||||
continue
|
||||
kind = (ch.get("kind") or {}).get("type") or "update"
|
||||
kinds[kind] = kinds.get(kind, 0) + 1
|
||||
p = ch.get("path") or ""
|
||||
if p:
|
||||
paths.append(p)
|
||||
counts = ", ".join(f"{n} {k}" for k, n in sorted(kinds.items()))
|
||||
preview = ", ".join(paths[:3])
|
||||
if len(paths) > 3:
|
||||
preview += f", +{len(paths) - 3} more"
|
||||
self._pending_file_changes[item_id] = (
|
||||
f"{counts}: {preview}" if preview else counts
|
||||
)
|
||||
elif method == "item/completed":
|
||||
self._pending_file_changes.pop(item_id, None)
|
||||
|
||||
def _lookup_pending_file_change(self, item_id: str) -> Optional[str]:
|
||||
"""Look up an in-progress fileChange item by id and summarize its
|
||||
changes for the approval prompt. Returns None when we don't have
|
||||
the item cached (e.g. approval arrived before item/started, or
|
||||
fileChange item content not tracked yet)."""
|
||||
if not item_id:
|
||||
return None
|
||||
cached = self._pending_file_changes.get(item_id)
|
||||
if not cached:
|
||||
return None
|
||||
return cached
|
||||
|
||||
|
||||
def _approval_choice_to_codex_decision(choice: str) -> str:
|
||||
"""Map Hermes approval choices onto codex's CommandExecutionApprovalDecision
|
||||
/ FileChangeApprovalDecision wire values.
|
||||
|
||||
Hermes returns 'once', 'session', 'always', or 'deny'.
|
||||
Codex expects 'accept', 'acceptForSession', 'decline', or 'cancel'
|
||||
(verified against codex-rs/app-server-protocol/src/protocol/v2/item.rs
|
||||
on codex 0.130.0).
|
||||
"""
|
||||
if choice in ("once",):
|
||||
return "accept"
|
||||
if choice in ("session", "always"):
|
||||
return "acceptForSession"
|
||||
return "decline"
|
||||
|
||||
|
||||
def _has_turn_aborted_marker(text: str) -> bool:
|
||||
"""Return True if `text` contains any of the raw markers codex uses
|
||||
to signal a turn was aborted without emitting `turn/completed`.
|
||||
|
||||
Codex emits `<turn_aborted>` (and sometimes `<turn_aborted/>`) as raw
|
||||
text inside agentMessage items when an interrupt or upstream error
|
||||
tears the turn down before the normal completion path fires. Mirrors
|
||||
openclaw beta.8's terminal-marker fix so we don't burn the full turn
|
||||
deadline waiting for a turn/completed that never comes.
|
||||
"""
|
||||
if not text:
|
||||
return False
|
||||
for marker in _TURN_ABORTED_MARKERS:
|
||||
if marker in text:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _get_hermes_version() -> str:
|
||||
"""Best-effort Hermes version string for codex's userAgent line."""
|
||||
try:
|
||||
from importlib.metadata import version
|
||||
|
||||
return version("hermes-agent")
|
||||
except Exception: # pragma: no cover
|
||||
return "0.0.0"
|
||||
@@ -1,312 +0,0 @@
|
||||
"""Projects codex app-server events into Hermes' messages list.
|
||||
|
||||
The translator that lets Hermes' memory/skill review keep working under the
|
||||
Codex runtime: it converts Codex `item/*` notifications into the standard
|
||||
OpenAI-shaped `{role, content, tool_calls, tool_call_id}` entries that
|
||||
`agent/curator.py` already knows how to read.
|
||||
|
||||
Codex emits items with a discriminator field `type`:
|
||||
- userMessage → {role: "user", content}
|
||||
- agentMessage → {role: "assistant", content}
|
||||
- reasoning → stashed in the assistant's "reasoning" field
|
||||
- commandExecution → assistant tool_call(name="exec") + tool result
|
||||
- fileChange → assistant tool_call(name="apply_patch") + tool result
|
||||
- mcpToolCall → assistant tool_call(name=f"mcp.{server}.{tool}") + tool result
|
||||
- dynamicToolCall → assistant tool_call(name=tool) + tool result
|
||||
- plan/hookPrompt/collabAgentToolCall → recorded as opaque assistant notes
|
||||
|
||||
Each item maps to AT MOST one assistant entry + one tool entry, preserving
|
||||
Hermes' message-alternation invariants (system → user → assistant → user/tool
|
||||
→ assistant → ...). Multiple Codex tool calls within one Codex turn produce
|
||||
multiple consecutive (assistant, tool) pairs, which is the same shape Hermes
|
||||
already produces for parallel tool calls.
|
||||
|
||||
Counters tracked alongside projection:
|
||||
- tool_iterations: ticks once per completed tool-shaped item. Used by
|
||||
AIAgent._iters_since_skill (skill nudge gate, default threshold 10).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
def _deterministic_call_id(item_type: str, item_id: str) -> str:
|
||||
"""Stable id for tool_call message correlation.
|
||||
|
||||
Uses the codex item id directly when present (already a uuid); falls back
|
||||
to a content hash so replay produces the same id across sessions and
|
||||
prefix caches stay valid. See AGENTS.md Pitfall #16 (deterministic IDs in
|
||||
tool call history)."""
|
||||
if item_id:
|
||||
return f"codex_{item_type}_{item_id}"
|
||||
digest = hashlib.sha256(f"{item_type}".encode()).hexdigest()[:16]
|
||||
return f"codex_{item_type}_{digest}"
|
||||
|
||||
|
||||
def _format_tool_args(d: dict) -> str:
|
||||
"""Format a dict as JSON the way Hermes' existing tool_calls path does."""
|
||||
return json.dumps(d, ensure_ascii=False, sort_keys=True)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProjectionResult:
|
||||
"""Output of projecting one Codex item.
|
||||
|
||||
`messages` is a list because some Codex items produce two messages
|
||||
(assistant tool_call + tool result). Empty list = item ignored (e.g. a
|
||||
streaming `outputDelta` that doesn't materialize into messages until the
|
||||
`item/completed` event)."""
|
||||
|
||||
messages: list[dict] = field(default_factory=list)
|
||||
is_tool_iteration: bool = False
|
||||
final_text: Optional[str] = None # Set when an agentMessage completes
|
||||
|
||||
|
||||
class CodexEventProjector:
|
||||
"""Stateful projector consuming Codex notifications in arrival order.
|
||||
|
||||
Owns the in-progress reasoning content (codex emits reasoning as separate
|
||||
items but Hermes stashes it on the next assistant message)."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._pending_reasoning: list[str] = []
|
||||
|
||||
def project(self, notification: dict) -> ProjectionResult:
|
||||
"""Project a single notification. Idempotent for non-completion events;
|
||||
only `item/completed` and `turn/completed` materialize messages."""
|
||||
method = notification.get("method", "")
|
||||
params = notification.get("params", {}) or {}
|
||||
|
||||
# We only materialize messages on `item/completed`. Streaming deltas
|
||||
# (`item/<type>/outputDelta`, `item/<type>/delta`) are display-only and
|
||||
# don't enter the messages list — same way Hermes already only writes
|
||||
# the assistant message after the streaming completion event.
|
||||
if method != "item/completed":
|
||||
return ProjectionResult()
|
||||
|
||||
item = params.get("item") or {}
|
||||
item_type = item.get("type") or ""
|
||||
item_id = item.get("id") or ""
|
||||
|
||||
if item_type == "agentMessage":
|
||||
return self._project_agent_message(item)
|
||||
if item_type == "reasoning":
|
||||
self._pending_reasoning.extend(item.get("summary") or [])
|
||||
self._pending_reasoning.extend(item.get("content") or [])
|
||||
return ProjectionResult()
|
||||
if item_type == "commandExecution":
|
||||
return self._project_command(item, item_id)
|
||||
if item_type == "fileChange":
|
||||
return self._project_file_change(item, item_id)
|
||||
if item_type == "mcpToolCall":
|
||||
return self._project_mcp_tool_call(item, item_id)
|
||||
if item_type == "dynamicToolCall":
|
||||
return self._project_dynamic_tool_call(item, item_id)
|
||||
if item_type == "userMessage":
|
||||
return self._project_user_message(item)
|
||||
|
||||
# Unknown / rare items (plan, hookPrompt, collabAgentToolCall, etc.)
|
||||
# — record as opaque assistant note so memory review can still see
|
||||
# *something* happened, but don't fabricate tool_call structure.
|
||||
return self._project_opaque(item, item_type)
|
||||
|
||||
# ---------- per-type projections ----------
|
||||
|
||||
def _project_agent_message(self, item: dict) -> ProjectionResult:
|
||||
text = item.get("text") or ""
|
||||
msg: dict[str, Any] = {"role": "assistant", "content": text}
|
||||
if self._pending_reasoning:
|
||||
msg["reasoning"] = "\n".join(self._pending_reasoning)
|
||||
self._pending_reasoning = []
|
||||
return ProjectionResult(messages=[msg], final_text=text)
|
||||
|
||||
def _project_user_message(self, item: dict) -> ProjectionResult:
|
||||
# codex's userMessage content is a list of UserInput variants. For
|
||||
# projection purposes we flatten any text fragments and ignore
|
||||
# non-text parts (images, etc.) — Hermes' messages store text only.
|
||||
text_parts: list[str] = []
|
||||
for fragment in item.get("content") or []:
|
||||
if isinstance(fragment, dict):
|
||||
if fragment.get("type") == "text":
|
||||
text_parts.append(fragment.get("text") or "")
|
||||
elif "text" in fragment:
|
||||
text_parts.append(str(fragment["text"]))
|
||||
return ProjectionResult(
|
||||
messages=[{"role": "user", "content": "\n".join(text_parts)}]
|
||||
)
|
||||
|
||||
def _project_command(self, item: dict, item_id: str) -> ProjectionResult:
|
||||
call_id = _deterministic_call_id("exec", item_id)
|
||||
args = {
|
||||
"command": item.get("command") or "",
|
||||
"cwd": item.get("cwd") or "",
|
||||
}
|
||||
assistant_msg = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": call_id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "exec_command",
|
||||
"arguments": _format_tool_args(args),
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
if self._pending_reasoning:
|
||||
assistant_msg["reasoning"] = "\n".join(self._pending_reasoning)
|
||||
self._pending_reasoning = []
|
||||
output = item.get("aggregatedOutput") or ""
|
||||
exit_code = item.get("exitCode")
|
||||
if exit_code is not None and exit_code != 0:
|
||||
output = f"[exit {exit_code}]\n{output}"
|
||||
tool_msg = {
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": output,
|
||||
}
|
||||
return ProjectionResult(
|
||||
messages=[assistant_msg, tool_msg], is_tool_iteration=True
|
||||
)
|
||||
|
||||
def _project_file_change(self, item: dict, item_id: str) -> ProjectionResult:
|
||||
call_id = _deterministic_call_id("apply_patch", item_id)
|
||||
# Reduce the codex changes array to a digest the agent loop will
|
||||
# find readable. We record per-file change kinds (Add/Update/Delete)
|
||||
# without inlining full file contents — those can be huge.
|
||||
changes_summary = []
|
||||
for change in item.get("changes") or []:
|
||||
kind = (change.get("kind") or {}).get("type") or "update"
|
||||
path = change.get("path") or ""
|
||||
changes_summary.append({"kind": kind, "path": path})
|
||||
args = {"changes": changes_summary}
|
||||
assistant_msg = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": call_id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "apply_patch",
|
||||
"arguments": _format_tool_args(args),
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
if self._pending_reasoning:
|
||||
assistant_msg["reasoning"] = "\n".join(self._pending_reasoning)
|
||||
self._pending_reasoning = []
|
||||
status = item.get("status") or "unknown"
|
||||
n = len(changes_summary)
|
||||
tool_msg = {
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": f"apply_patch status={status}, {n} change(s)",
|
||||
}
|
||||
return ProjectionResult(
|
||||
messages=[assistant_msg, tool_msg], is_tool_iteration=True
|
||||
)
|
||||
|
||||
def _project_mcp_tool_call(self, item: dict, item_id: str) -> ProjectionResult:
|
||||
server = item.get("server") or "mcp"
|
||||
tool = item.get("tool") or "unknown"
|
||||
call_id = _deterministic_call_id(f"mcp_{server}_{tool}", item_id)
|
||||
args = item.get("arguments") or {}
|
||||
if not isinstance(args, dict):
|
||||
args = {"arguments": args}
|
||||
assistant_msg = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": call_id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": f"mcp.{server}.{tool}",
|
||||
"arguments": _format_tool_args(args),
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
if self._pending_reasoning:
|
||||
assistant_msg["reasoning"] = "\n".join(self._pending_reasoning)
|
||||
self._pending_reasoning = []
|
||||
result = item.get("result")
|
||||
error = item.get("error")
|
||||
if error:
|
||||
content = f"[error] {json.dumps(error, ensure_ascii=False)[:1000]}"
|
||||
elif result is not None:
|
||||
content = json.dumps(result, ensure_ascii=False)[:4000]
|
||||
else:
|
||||
content = ""
|
||||
tool_msg = {
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": content,
|
||||
}
|
||||
return ProjectionResult(
|
||||
messages=[assistant_msg, tool_msg], is_tool_iteration=True
|
||||
)
|
||||
|
||||
def _project_dynamic_tool_call(
|
||||
self, item: dict, item_id: str
|
||||
) -> ProjectionResult:
|
||||
tool = item.get("tool") or "unknown"
|
||||
call_id = _deterministic_call_id(f"dyn_{tool}", item_id)
|
||||
args = item.get("arguments") or {}
|
||||
if not isinstance(args, dict):
|
||||
args = {"arguments": args}
|
||||
assistant_msg = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": call_id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tool,
|
||||
"arguments": _format_tool_args(args),
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
if self._pending_reasoning:
|
||||
assistant_msg["reasoning"] = "\n".join(self._pending_reasoning)
|
||||
self._pending_reasoning = []
|
||||
content_items = item.get("contentItems") or []
|
||||
if isinstance(content_items, list) and content_items:
|
||||
content = json.dumps(content_items, ensure_ascii=False)[:4000]
|
||||
else:
|
||||
success = item.get("success")
|
||||
content = f"success={success}"
|
||||
tool_msg = {
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": content,
|
||||
}
|
||||
return ProjectionResult(
|
||||
messages=[assistant_msg, tool_msg], is_tool_iteration=True
|
||||
)
|
||||
|
||||
def _project_opaque(self, item: dict, item_type: str) -> ProjectionResult:
|
||||
# Record the existence of the item without inventing tool_calls.
|
||||
# Memory review will see this and may or may not save anything.
|
||||
try:
|
||||
payload = json.dumps(item, ensure_ascii=False)[:1500]
|
||||
except (TypeError, ValueError):
|
||||
payload = repr(item)[:1500]
|
||||
return ProjectionResult(
|
||||
messages=[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": f"[codex {item_type}] {payload}",
|
||||
}
|
||||
]
|
||||
)
|
||||
@@ -1,225 +0,0 @@
|
||||
"""Hermes-tools-as-MCP server for the codex_app_server runtime.
|
||||
|
||||
When the user runs `openai/*` turns through the codex app-server, codex
|
||||
owns the loop and builds its own tool list. By default, that means
|
||||
Hermes' richer tool surface — web search, browser automation,
|
||||
delegate_task subagents, vision analysis, persistent memory, skills,
|
||||
cross-session search, image generation, TTS — is unreachable.
|
||||
|
||||
This module exposes a curated subset of those Hermes tools to the
|
||||
spawned codex subprocess via stdio MCP. Codex registers it as a normal
|
||||
MCP server (per `~/.codex/config.toml [mcp_servers.hermes-tools]`) and
|
||||
the user gets full Hermes capability inside a Codex turn.
|
||||
|
||||
Scope (what we expose):
|
||||
- web_search, web_extract — Firecrawl, no codex equivalent
|
||||
- browser_navigate / _click / _type / — Camofox/Browserbase automation
|
||||
_snapshot / _screenshot / _scroll / _back / _press / _vision
|
||||
- delegate_task — Hermes subagents
|
||||
- vision_analyze — image inspection by vision model
|
||||
- image_generate — image generation
|
||||
- memory — Hermes' persistent memory store
|
||||
- skill_view, skills_list — Hermes' skill library
|
||||
- session_search — cross-session search
|
||||
- text_to_speech — TTS
|
||||
|
||||
What we DO NOT expose (codex has equivalents):
|
||||
- terminal / shell — codex's own shell tool
|
||||
- read_file / write_file / patch — codex's apply_patch + shell
|
||||
- search_files / process — codex's shell
|
||||
- clarify, todo — codex's own UX
|
||||
|
||||
Run with: python -m agent.transports.hermes_tools_mcp_server
|
||||
Spawned by: CodexAppServerSession.ensure_started() when the runtime is
|
||||
active and config opts in.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Tools we expose. Each name MUST match a registered Hermes tool that
|
||||
# `model_tools.handle_function_call()` can dispatch.
|
||||
#
|
||||
# What we deliberately DO NOT expose:
|
||||
# - terminal / shell / read_file / write_file / patch / search_files /
|
||||
# process — codex's built-ins cover these and approval routes through
|
||||
# codex's own UI.
|
||||
# - delegate_task / memory / session_search / todo — these are
|
||||
# `_AGENT_LOOP_TOOLS` in Hermes (model_tools.py:493). They require
|
||||
# the running AIAgent context to dispatch (mid-loop state), so a
|
||||
# stateless MCP callback can't drive them. Hermes' default runtime
|
||||
# keeps these working; the codex_app_server runtime cannot.
|
||||
EXPOSED_TOOLS: tuple[str, ...] = (
|
||||
"web_search",
|
||||
"web_extract",
|
||||
"browser_navigate",
|
||||
"browser_click",
|
||||
"browser_type",
|
||||
"browser_press",
|
||||
"browser_snapshot",
|
||||
"browser_scroll",
|
||||
"browser_back",
|
||||
"browser_get_images",
|
||||
"browser_console",
|
||||
"browser_vision",
|
||||
"vision_analyze",
|
||||
"image_generate",
|
||||
"skill_view",
|
||||
"skills_list",
|
||||
"text_to_speech",
|
||||
# Kanban worker handoff tools — gated on HERMES_KANBAN_TASK env var
|
||||
# (set by the kanban dispatcher when spawning a worker). Without these
|
||||
# in the callback, a worker spawned with openai_runtime=codex_app_server
|
||||
# could do the work but couldn't report completion back to the kernel,
|
||||
# making it hang until timeout. Stateless dispatch — they just read
|
||||
# the env var and write to ~/.hermes/kanban.db.
|
||||
"kanban_complete",
|
||||
"kanban_block",
|
||||
"kanban_comment",
|
||||
"kanban_heartbeat",
|
||||
"kanban_show",
|
||||
"kanban_list",
|
||||
# NOTE: kanban_create / kanban_unblock / kanban_link are orchestrator-
|
||||
# only — the kanban tool gates them on HERMES_KANBAN_TASK being unset.
|
||||
# They're exposed here for orchestrator agents running on the codex
|
||||
# runtime that need to dispatch new tasks.
|
||||
"kanban_create",
|
||||
"kanban_unblock",
|
||||
"kanban_link",
|
||||
)
|
||||
|
||||
|
||||
def _build_server() -> Any:
|
||||
"""Create the FastMCP server with Hermes tools attached. Lazy imports
|
||||
so the module can be imported without the mcp package installed
|
||||
(we degrade to a clear error only when actually run)."""
|
||||
try:
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
except ImportError as exc: # pragma: no cover - install hint
|
||||
raise ImportError(
|
||||
f"hermes-tools MCP server requires the 'mcp' package: {exc}"
|
||||
) from exc
|
||||
|
||||
# Discover Hermes tools so dispatch works.
|
||||
from model_tools import (
|
||||
get_tool_definitions,
|
||||
handle_function_call,
|
||||
)
|
||||
|
||||
mcp = FastMCP(
|
||||
"hermes-tools",
|
||||
instructions=(
|
||||
"Hermes Agent's tool surface, exposed for use inside a Codex "
|
||||
"session. Use these for capabilities Codex's built-in toolset "
|
||||
"doesn't cover: web search/extract, browser automation, "
|
||||
"subagent delegation, vision, image generation, persistent "
|
||||
"memory, skills, and cross-session search."
|
||||
),
|
||||
)
|
||||
|
||||
# Pull authoritative Hermes tool schemas for the ones we expose, so
|
||||
# MCP clients see the same parameter docs Hermes gives the model.
|
||||
all_defs = {
|
||||
td["function"]["name"]: td["function"]
|
||||
for td in (get_tool_definitions(quiet_mode=True) or [])
|
||||
if isinstance(td, dict) and td.get("type") == "function"
|
||||
}
|
||||
|
||||
exposed_count = 0
|
||||
|
||||
for name in EXPOSED_TOOLS:
|
||||
spec = all_defs.get(name)
|
||||
if spec is None:
|
||||
logger.debug(
|
||||
"skipping %s — not registered in this Hermes process", name
|
||||
)
|
||||
continue
|
||||
|
||||
description = spec.get("description") or f"Hermes {name} tool"
|
||||
params_schema = spec.get("parameters") or {"type": "object", "properties": {}}
|
||||
|
||||
# FastMCP wants a Python callable. Build a closure that takes the
|
||||
# arguments dict, dispatches via handle_function_call, and returns
|
||||
# the result string. We use add_tool() for full control over the
|
||||
# input schema (FastMCP's @tool() decorator inspects type hints,
|
||||
# which we can't get from a JSON schema at runtime).
|
||||
def _make_handler(tool_name: str):
|
||||
def _dispatch(**kwargs: Any) -> str:
|
||||
try:
|
||||
return handle_function_call(tool_name, kwargs or {})
|
||||
except Exception as exc:
|
||||
logger.exception("tool %s raised", tool_name)
|
||||
return json.dumps({"error": str(exc), "tool": tool_name})
|
||||
_dispatch.__name__ = tool_name
|
||||
_dispatch.__doc__ = description
|
||||
return _dispatch
|
||||
|
||||
try:
|
||||
mcp.add_tool(
|
||||
_make_handler(name),
|
||||
name=name,
|
||||
description=description,
|
||||
# FastMCP accepts JSON schema directly via the
|
||||
# input_schema parameter on newer versions; older
|
||||
# versions use parameters_schema. Try both for compat.
|
||||
)
|
||||
except TypeError:
|
||||
# Older mcp SDK signature — fall back to decorator-style.
|
||||
handler = _make_handler(name)
|
||||
handler = mcp.tool(name=name, description=description)(handler)
|
||||
|
||||
exposed_count += 1
|
||||
|
||||
logger.info(
|
||||
"hermes-tools MCP server registered %d/%d tools",
|
||||
exposed_count,
|
||||
len(EXPOSED_TOOLS),
|
||||
)
|
||||
return mcp
|
||||
|
||||
|
||||
def main(argv: Optional[list[str]] = None) -> int:
|
||||
"""Entry point for `python -m agent.transports.hermes_tools_mcp_server`."""
|
||||
argv = argv or sys.argv[1:]
|
||||
verbose = "--verbose" in argv or "-v" in argv
|
||||
|
||||
log_level = logging.INFO if verbose else logging.WARNING
|
||||
logging.basicConfig(
|
||||
level=log_level,
|
||||
stream=sys.stderr, # MCP uses stdio for protocol — logs MUST go to stderr
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
)
|
||||
|
||||
# Quiet mode: keep Hermes' own banners off stdout (which is the MCP wire).
|
||||
os.environ.setdefault("HERMES_QUIET", "1")
|
||||
os.environ.setdefault("HERMES_REDACT_SECRETS", "true")
|
||||
|
||||
try:
|
||||
server = _build_server()
|
||||
except ImportError as exc:
|
||||
sys.stderr.write(f"hermes-tools MCP server cannot start: {exc}\n")
|
||||
return 2
|
||||
|
||||
# FastMCP runs with stdio transport by default when launched as a
|
||||
# subprocess.
|
||||
try:
|
||||
server.run()
|
||||
except KeyboardInterrupt:
|
||||
return 0
|
||||
except Exception as exc:
|
||||
logger.exception("hermes-tools MCP server crashed")
|
||||
sys.stderr.write(f"hermes-tools MCP server error: {exc}\n")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,162 +0,0 @@
|
||||
"""Shared types for normalized provider responses.
|
||||
|
||||
These dataclasses define the canonical shape that all provider adapters
|
||||
normalize responses to. The shared surface is intentionally minimal —
|
||||
only fields that every downstream consumer reads are top-level.
|
||||
Protocol-specific state goes in ``provider_data`` dicts (response-level
|
||||
and per-tool-call) so that protocol-aware code paths can access it
|
||||
without polluting the shared type.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolCall:
|
||||
"""A normalized tool call from any provider.
|
||||
|
||||
``id`` is the protocol's canonical identifier — what gets used in
|
||||
``tool_call_id`` / ``tool_use_id`` when constructing tool result
|
||||
messages. May be ``None`` when the provider omits it; the agent
|
||||
fills it via ``_deterministic_call_id()`` before storing in history.
|
||||
|
||||
``provider_data`` carries per-tool-call protocol metadata that only
|
||||
protocol-aware code reads:
|
||||
|
||||
* Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
|
||||
* Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
id: str | None
|
||||
name: str
|
||||
arguments: str # JSON string
|
||||
provider_data: dict[str, Any] | None = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The agent loop reads tc.function.name / tc.function.arguments
|
||||
# throughout run_agent.py (45+ sites). These properties let
|
||||
# NormalizedResponse pass through without the _nr_to_assistant_message
|
||||
# shim, while keeping ToolCall's canonical fields flat.
|
||||
@property
|
||||
def type(self) -> str:
|
||||
return "function"
|
||||
|
||||
@property
|
||||
def function(self) -> ToolCall:
|
||||
"""Return self so tc.function.name / tc.function.arguments work."""
|
||||
return self
|
||||
|
||||
@property
|
||||
def call_id(self) -> str | None:
|
||||
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
|
||||
return (self.provider_data or {}).get("call_id")
|
||||
|
||||
@property
|
||||
def response_item_id(self) -> str | None:
|
||||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
@property
|
||||
def extra_content(self) -> dict[str, Any] | None:
|
||||
"""Gemini extra_content (thought_signature) from provider_data.
|
||||
|
||||
Gemini 3 thinking models attach ``extra_content`` with a
|
||||
``thought_signature`` to each tool call. This signature must be
|
||||
replayed on subsequent API calls — without it the API rejects the
|
||||
request with HTTP 400. The chat_completions transport stores this
|
||||
in ``provider_data["extra_content"]``; this property exposes it so
|
||||
``_build_assistant_message`` can ``getattr(tc, "extra_content")``
|
||||
uniformly.
|
||||
"""
|
||||
return (self.provider_data or {}).get("extra_content")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Usage:
|
||||
"""Token usage from an API response."""
|
||||
|
||||
prompt_tokens: int = 0
|
||||
completion_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
cached_tokens: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class NormalizedResponse:
|
||||
"""Normalized API response from any provider.
|
||||
|
||||
Shared fields are truly cross-provider — every caller can rely on
|
||||
them without branching on api_mode. Protocol-specific state goes in
|
||||
``provider_data`` so that only protocol-aware code paths read it.
|
||||
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
content: str | None
|
||||
tool_calls: list[ToolCall] | None
|
||||
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
|
||||
reasoning: str | None = None
|
||||
usage: Usage | None = None
|
||||
provider_data: dict[str, Any] | None = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The shim _nr_to_assistant_message() mapped these from provider_data.
|
||||
# These properties let NormalizedResponse pass through directly.
|
||||
@property
|
||||
def reasoning_content(self) -> str | None:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_content")
|
||||
|
||||
@property
|
||||
def reasoning_details(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_details")
|
||||
|
||||
@property
|
||||
def codex_reasoning_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
@property
|
||||
def codex_message_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_message_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def build_tool_call(
|
||||
id: str | None,
|
||||
name: str,
|
||||
arguments: Any,
|
||||
**provider_fields: Any,
|
||||
) -> ToolCall:
|
||||
"""Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
|
||||
|
||||
Any extra keyword arguments are collected into ``provider_data``.
|
||||
"""
|
||||
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
|
||||
pd = dict(provider_fields) if provider_fields else None
|
||||
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
|
||||
|
||||
|
||||
def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
|
||||
"""Translate a provider-specific stop reason to the normalised set.
|
||||
|
||||
Falls back to ``"stop"`` for unknown or ``None`` reasons.
|
||||
"""
|
||||
if reason is None:
|
||||
return "stop"
|
||||
return mapping.get(reason, "stop")
|
||||
@@ -1,299 +0,0 @@
|
||||
"""
|
||||
Video Generation Provider ABC
|
||||
=============================
|
||||
|
||||
Defines the pluggable-backend interface for video generation. Providers register
|
||||
instances via ``PluginContext.register_video_gen_provider()``; the active one
|
||||
(selected via ``video_gen.provider`` in ``config.yaml``) services every
|
||||
``video_generate`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/video_gen/<name>/`` (built-in, auto-loaded
|
||||
as ``kind: backend``) or ``~/.hermes/plugins/video_gen/<name>/`` (user, opt-in
|
||||
via ``plugins.enabled``).
|
||||
|
||||
Mirrors the ``image_gen`` provider design (``agent/image_gen_provider.py``) so
|
||||
the two surfaces stay learnable together.
|
||||
|
||||
Unified surface
|
||||
---------------
|
||||
One tool — ``video_generate`` — covers **text-to-video** and **image-to-video**.
|
||||
The router is the presence of ``image_url``: if it's set, the provider routes
|
||||
to its image-to-video endpoint; if it's omitted, the provider routes to
|
||||
text-to-video. Users pick one **model family** (e.g. Pixverse v6, Veo 3.1,
|
||||
Kling O3 Standard); the provider handles which underlying FAL/xAI endpoint
|
||||
to hit.
|
||||
|
||||
Video edit and video extend are intentionally NOT exposed in this surface —
|
||||
the inconsistency across backends is too large for one unified tool. If
|
||||
those use cases warrant attention later they can ship as separate tools.
|
||||
|
||||
Response shape
|
||||
--------------
|
||||
All providers return a dict built by :func:`success_response` /
|
||||
:func:`error_response`. Keys:
|
||||
|
||||
success bool
|
||||
video str | None URL or absolute file path
|
||||
model str provider-specific model identifier
|
||||
prompt str echoed prompt
|
||||
modality str "text" | "image" (which mode was used)
|
||||
aspect_ratio str provider-native (e.g. "16:9") or ""
|
||||
duration int seconds (0 if not applicable)
|
||||
provider str provider name (for diagnostics)
|
||||
error str only when success=False
|
||||
error_type str only when success=False
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import base64
|
||||
import datetime
|
||||
import logging
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Common aspect ratios across providers (Veo / Kling / xAI / Pixverse). The
|
||||
# tool schema advertises this set as an enum hint, but providers may accept
|
||||
# a narrower or wider set — they are responsible for clamping.
|
||||
COMMON_ASPECT_RATIOS: Tuple[str, ...] = ("16:9", "9:16", "1:1", "4:3", "3:4", "3:2", "2:3")
|
||||
DEFAULT_ASPECT_RATIO = "16:9"
|
||||
|
||||
COMMON_RESOLUTIONS: Tuple[str, ...] = ("480p", "540p", "720p", "1080p")
|
||||
DEFAULT_RESOLUTION = "720p"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class VideoGenProvider(abc.ABC):
|
||||
"""Abstract base class for a video generation backend.
|
||||
|
||||
Subclasses must implement :meth:`generate`. Everything else has sane
|
||||
defaults — override only what your provider needs.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in ``video_gen.provider`` config.
|
||||
|
||||
Lowercase, no spaces. Examples: ``xai``, ``fal``, ``google``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
|
||||
return self.name.title()
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically checks for a required API key and optional-dependency
|
||||
import. Default: True.
|
||||
"""
|
||||
return True
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
"""Return catalog entries for ``hermes tools`` model picker.
|
||||
|
||||
Each entry represents a **model family** that supports text-to-video
|
||||
and/or image-to-video routing internally::
|
||||
|
||||
{
|
||||
"id": "veo-3.1", # required
|
||||
"display": "Veo 3.1", # optional; defaults to id
|
||||
"speed": "~60s", # optional
|
||||
"strengths": "...", # optional
|
||||
"price": "$0.20/s", # optional
|
||||
"modalities": ["text", "image"], # optional, advisory
|
||||
}
|
||||
|
||||
Default: empty list (provider has no user-selectable models).
|
||||
"""
|
||||
return []
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker."""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
"""Return the default model id, or None if not applicable."""
|
||||
models = self.list_models()
|
||||
if models:
|
||||
return models[0].get("id")
|
||||
return None
|
||||
|
||||
def capabilities(self) -> Dict[str, Any]:
|
||||
"""Return what this provider supports.
|
||||
|
||||
Returned dict (all keys optional)::
|
||||
|
||||
{
|
||||
"modalities": ["text", "image"], # which inputs the backend accepts
|
||||
"aspect_ratios": ["16:9", "9:16", ...],
|
||||
"resolutions": ["720p", "1080p"],
|
||||
"max_duration": 15, # seconds
|
||||
"min_duration": 1,
|
||||
"supports_audio": True,
|
||||
"supports_negative_prompt": True,
|
||||
"max_reference_images": 7,
|
||||
}
|
||||
|
||||
Used by the tool layer for soft validation and by ``hermes tools``
|
||||
for the picker. Default: text-only.
|
||||
"""
|
||||
return {
|
||||
"modalities": ["text"],
|
||||
"aspect_ratios": list(COMMON_ASPECT_RATIOS),
|
||||
"resolutions": list(COMMON_RESOLUTIONS),
|
||||
"max_duration": 10,
|
||||
"min_duration": 1,
|
||||
"supports_audio": False,
|
||||
"supports_negative_prompt": False,
|
||||
"max_reference_images": 0,
|
||||
}
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
*,
|
||||
model: Optional[str] = None,
|
||||
image_url: Optional[str] = None,
|
||||
reference_image_urls: Optional[List[str]] = None,
|
||||
duration: Optional[int] = None,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
resolution: str = DEFAULT_RESOLUTION,
|
||||
negative_prompt: Optional[str] = None,
|
||||
audio: Optional[bool] = None,
|
||||
seed: Optional[int] = None,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate a video from a prompt (text-to-video) or animate an image
|
||||
(image-to-video).
|
||||
|
||||
Routing: if ``image_url`` is provided, the provider should route to
|
||||
its image-to-video endpoint; otherwise text-to-video. The plugin
|
||||
is responsible for picking the right underlying endpoint within
|
||||
the user's chosen model family.
|
||||
|
||||
Implementations should return the dict from :func:`success_response`
|
||||
or :func:`error_response`. ``kwargs`` may contain forward-compat
|
||||
parameters future versions of the schema will expose —
|
||||
implementations MUST ignore unknown keys (no TypeError).
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _videos_cache_dir() -> Path:
|
||||
"""Return ``$HERMES_HOME/cache/videos/``, creating parents as needed."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
path = get_hermes_home() / "cache" / "videos"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def save_b64_video(
|
||||
b64_data: str,
|
||||
*,
|
||||
prefix: str = "video",
|
||||
extension: str = "mp4",
|
||||
) -> Path:
|
||||
"""Decode base64 video data and write under ``$HERMES_HOME/cache/videos/``.
|
||||
|
||||
Returns the absolute :class:`Path` to the saved file.
|
||||
|
||||
Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
|
||||
"""
|
||||
raw = base64.b64decode(b64_data)
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
short = uuid.uuid4().hex[:8]
|
||||
path = _videos_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
||||
path.write_bytes(raw)
|
||||
return path
|
||||
|
||||
|
||||
def save_bytes_video(
|
||||
raw: bytes,
|
||||
*,
|
||||
prefix: str = "video",
|
||||
extension: str = "mp4",
|
||||
) -> Path:
|
||||
"""Write raw video bytes (e.g. an HTTP download body) to the cache."""
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
short = uuid.uuid4().hex[:8]
|
||||
path = _videos_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
||||
path.write_bytes(raw)
|
||||
return path
|
||||
|
||||
|
||||
def success_response(
|
||||
*,
|
||||
video: str,
|
||||
model: str,
|
||||
prompt: str,
|
||||
modality: str = "text",
|
||||
aspect_ratio: str = "",
|
||||
duration: int = 0,
|
||||
provider: str,
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform success response dict.
|
||||
|
||||
``video`` may be an HTTP URL or an absolute filesystem path.
|
||||
``modality`` is ``"text"`` (text-to-video) or ``"image"`` (image-to-video) —
|
||||
indicates which endpoint was actually hit, useful for diagnostics.
|
||||
"""
|
||||
payload: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"video": video,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"modality": modality,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"duration": int(duration) if duration else 0,
|
||||
"provider": provider,
|
||||
}
|
||||
if extra:
|
||||
for k, v in extra.items():
|
||||
payload.setdefault(k, v)
|
||||
return payload
|
||||
|
||||
|
||||
def error_response(
|
||||
*,
|
||||
error: str,
|
||||
error_type: str = "provider_error",
|
||||
provider: str = "",
|
||||
model: str = "",
|
||||
prompt: str = "",
|
||||
aspect_ratio: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform error response dict."""
|
||||
return {
|
||||
"success": False,
|
||||
"video": None,
|
||||
"error": error,
|
||||
"error_type": error_type,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
@@ -1,117 +0,0 @@
|
||||
"""
|
||||
Video Generation Provider Registry
|
||||
==================================
|
||||
|
||||
Central map of registered providers. Populated by plugins at import-time via
|
||||
``PluginContext.register_video_gen_provider()``; consumed by the
|
||||
``video_generate`` tool to dispatch each call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by ``video_gen.provider`` in ``config.yaml``.
|
||||
If unset, :func:`get_active_provider` applies fallback logic:
|
||||
|
||||
1. If exactly one provider is registered, use it.
|
||||
2. Otherwise return ``None`` (the tool surfaces a helpful error pointing
|
||||
the user at ``hermes tools``).
|
||||
|
||||
Mirrors ``agent/image_gen_registry.py`` so the two surfaces behave the
|
||||
same.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.video_gen_provider import VideoGenProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, VideoGenProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: VideoGenProvider) -> None:
|
||||
"""Register a video generation provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — this makes hot-reload scenarios (tests, dev loops)
|
||||
behave predictably.
|
||||
"""
|
||||
if not isinstance(provider, VideoGenProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects a VideoGenProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Video gen provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug("Video gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
|
||||
else:
|
||||
logger.debug("Registered video gen provider '%s' (%s)", name, type(provider).__name__)
|
||||
|
||||
|
||||
def list_providers() -> List[VideoGenProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[VideoGenProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
def get_active_provider() -> Optional[VideoGenProvider]:
|
||||
"""Resolve the currently-active provider.
|
||||
|
||||
Reads ``video_gen.provider`` from config.yaml; falls back per the
|
||||
module docstring.
|
||||
"""
|
||||
configured: Optional[str] = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get("video_gen") if isinstance(cfg, dict) else None
|
||||
if isinstance(section, dict):
|
||||
raw = section.get("provider")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
configured = raw.strip()
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read video_gen.provider from config: %s", exc)
|
||||
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None:
|
||||
return provider
|
||||
logger.debug(
|
||||
"video_gen.provider='%s' configured but not registered; falling back",
|
||||
configured,
|
||||
)
|
||||
|
||||
# Fallback: single-provider case
|
||||
if len(snapshot) == 1:
|
||||
return next(iter(snapshot.values()))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
@@ -1,221 +0,0 @@
|
||||
"""
|
||||
Web Search Provider ABC
|
||||
=======================
|
||||
|
||||
Defines the pluggable-backend interface for web search and content extraction.
|
||||
Providers register instances via ``PluginContext.register_web_search_provider()``;
|
||||
the active one (selected via ``web.search_backend`` / ``web.extract_backend`` /
|
||||
``web.backend`` in ``config.yaml``) services every ``web_search`` /
|
||||
``web_extract`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/web/<name>/`` (built-in, auto-loaded as
|
||||
``kind: backend``) or ``~/.hermes/plugins/web/<name>/`` (user, opt-in via
|
||||
``plugins.enabled``).
|
||||
|
||||
This ABC is the SINGLE plugin-facing surface for web providers — every
|
||||
provider in the tree (brave-free, ddgs, searxng, exa, parallel, tavily,
|
||||
firecrawl) implements it. The legacy in-tree ``tools.web_providers.base``
|
||||
ABCs were deleted in PR #25182 along with the per-vendor inline helpers
|
||||
in ``tools/web_tools.py``; the response-shape contract documented below
|
||||
is preserved bit-for-bit so the tool wrapper does not have to translate.
|
||||
|
||||
Response shape (preserved from the legacy contract):
|
||||
|
||||
Search results::
|
||||
|
||||
{
|
||||
"success": True,
|
||||
"data": {
|
||||
"web": [
|
||||
{"title": str, "url": str, "description": str, "position": int},
|
||||
...
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
Extract results::
|
||||
|
||||
{
|
||||
"success": True,
|
||||
"data": [
|
||||
{"url": str, "title": str, "content": str,
|
||||
"raw_content": str, "metadata": dict},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
On failure (either capability)::
|
||||
|
||||
{"success": False, "error": str}
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class WebSearchProvider(abc.ABC):
|
||||
"""Abstract base class for a web search/extract/crawl backend.
|
||||
|
||||
Subclasses must implement :meth:`is_available` and at least one of
|
||||
:meth:`search` / :meth:`extract` / :meth:`crawl`. The
|
||||
:meth:`supports_search` / :meth:`supports_extract` / :meth:`supports_crawl`
|
||||
capability flags let the registry route each tool call to the right
|
||||
provider, and let multi-capability providers (Firecrawl, Tavily, Exa,
|
||||
…) advertise multiple capabilities from a single class.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in ``web.search_backend`` /
|
||||
``web.extract_backend`` / ``web.backend`` config keys.
|
||||
|
||||
Lowercase, no spaces; hyphens permitted to preserve existing
|
||||
user-visible names. Examples: ``brave-free``, ``ddgs``,
|
||||
``searxng``, ``firecrawl``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name``."""
|
||||
return self.name
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically a cheap check (env var present, optional Python dep
|
||||
importable, instance URL set). Must NOT make network calls — this
|
||||
runs at tool-registration time and on every ``hermes tools`` paint.
|
||||
"""
|
||||
|
||||
def supports_search(self) -> bool:
|
||||
"""Return True if this provider implements :meth:`search`."""
|
||||
return True
|
||||
|
||||
def supports_extract(self) -> bool:
|
||||
"""Return True if this provider implements :meth:`extract`.
|
||||
|
||||
Both sync and async :meth:`extract` implementations are valid — the
|
||||
dispatcher detects coroutine functions via
|
||||
:func:`inspect.iscoroutinefunction` and awaits as needed. Sync
|
||||
implementations that perform blocking I/O (HTTP, SDK calls) should
|
||||
ideally wrap in :func:`asyncio.to_thread` at the call site; small
|
||||
providers can keep their sync shape and let the dispatcher handle
|
||||
threading.
|
||||
"""
|
||||
return False
|
||||
|
||||
def supports_crawl(self) -> bool:
|
||||
"""Return True if this provider implements :meth:`crawl`.
|
||||
|
||||
Crawl differs from extract in that the agent provides a *seed URL*
|
||||
and the provider walks linked pages on its own — useful for
|
||||
documentation sites where the agent doesn't know all relevant
|
||||
URLs upfront. Tavily is the only built-in backend that natively
|
||||
crawls today; Firecrawl provides a similar capability that we
|
||||
don't currently surface as a tool.
|
||||
|
||||
Providers that don't crawl should leave this as False; the
|
||||
dispatcher in :func:`tools.web_tools.web_crawl_tool` will fall
|
||||
back to its auxiliary-model summarization path.
|
||||
"""
|
||||
return False
|
||||
|
||||
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
||||
"""Execute a web search.
|
||||
|
||||
Override when :meth:`supports_search` returns True. The default
|
||||
raises NotImplementedError; callers should gate on
|
||||
:meth:`supports_search` before calling.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{self.name} does not support search (override supports_search)"
|
||||
)
|
||||
|
||||
def extract(self, urls: List[str], **kwargs: Any) -> Any:
|
||||
"""Extract content from one or more URLs.
|
||||
|
||||
Override when :meth:`supports_extract` returns True. The default
|
||||
raises NotImplementedError; callers should gate on
|
||||
:meth:`supports_extract` before calling.
|
||||
|
||||
Return shape: a list of result dicts matching what the legacy
|
||||
:func:`tools.web_tools.web_extract_tool` post-processing pipeline
|
||||
expects::
|
||||
|
||||
[
|
||||
{
|
||||
"url": str,
|
||||
"title": str,
|
||||
"content": str,
|
||||
"raw_content": str,
|
||||
"metadata": dict, # optional
|
||||
"error": str, # optional, only on per-URL failure
|
||||
},
|
||||
...
|
||||
]
|
||||
|
||||
Implementations MAY be ``async def`` — the dispatcher detects
|
||||
coroutines via :func:`inspect.iscoroutinefunction` and awaits.
|
||||
|
||||
``kwargs`` may carry forward-compat fields (``format``, ``include_raw``,
|
||||
``max_chars``) — implementations should ignore unknown keys.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{self.name} does not support extract (override supports_extract)"
|
||||
)
|
||||
|
||||
def crawl(self, url: str, **kwargs: Any) -> Any:
|
||||
"""Crawl a seed URL and return results.
|
||||
|
||||
Override when :meth:`supports_crawl` returns True. The default
|
||||
raises NotImplementedError; callers should gate on
|
||||
:meth:`supports_crawl` before calling.
|
||||
|
||||
Return shape: ``{"results": [{"url": str, "title": str,
|
||||
"content": str, ...}, ...]}`` matching what
|
||||
:func:`tools.web_tools.web_crawl_tool` post-processing expects.
|
||||
|
||||
Implementations MAY be ``async def``.
|
||||
|
||||
``kwargs`` may carry forward-compat fields (e.g. ``max_depth``,
|
||||
``include_domains``) — implementations should ignore unknown keys.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{self.name} does not support crawl (override supports_crawl)"
|
||||
)
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker.
|
||||
|
||||
Used by ``hermes_cli/tools_config.py`` to inject this provider as a
|
||||
row in the Web Search / Web Extract picker. Shape::
|
||||
|
||||
{
|
||||
"name": "Brave Search (Free)",
|
||||
"badge": "free",
|
||||
"tag": "No paid tier needed — uses Brave's free API.",
|
||||
"env_vars": [
|
||||
{"key": "BRAVE_SEARCH_API_KEY",
|
||||
"prompt": "Brave Search API key",
|
||||
"url": "https://brave.com/search/api/"},
|
||||
],
|
||||
}
|
||||
|
||||
Default: minimal entry derived from ``display_name``. Override to
|
||||
expose API key prompts, badges, and instance URL fields.
|
||||
"""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
@@ -1,262 +0,0 @@
|
||||
"""
|
||||
Web Search Provider Registry
|
||||
============================
|
||||
|
||||
Central map of registered web providers. Populated by plugins at import-time
|
||||
via :meth:`PluginContext.register_web_search_provider`; consumed by the
|
||||
``web_search`` and ``web_extract`` tool wrappers in :mod:`tools.web_tools` to
|
||||
dispatch each call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by configuration with this precedence:
|
||||
|
||||
1. ``web.search_backend`` / ``web.extract_backend`` / ``web.crawl_backend``
|
||||
(per-capability override).
|
||||
2. ``web.backend`` (shared fallback).
|
||||
3. If exactly one capability-eligible provider is registered AND available,
|
||||
use it.
|
||||
4. Legacy preference order — ``firecrawl`` → ``parallel`` → ``tavily`` →
|
||||
``exa`` → ``searxng`` → ``brave-free`` → ``ddgs`` — filtered by
|
||||
availability. Matches the historic ``tools.web_tools._get_backend()``
|
||||
candidate order so installs that never set a config key keep landing
|
||||
on the same provider they did before the plugin migration.
|
||||
5. Otherwise ``None`` — the tool surfaces a helpful error pointing at
|
||||
``hermes tools``.
|
||||
|
||||
The capability filter (``supports_search`` / ``supports_extract`` /
|
||||
``supports_crawl``) is applied at every step so a search-only provider
|
||||
(``brave-free``) configured as ``web.extract_backend`` correctly falls
|
||||
through to an extract-capable backend.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.web_search_provider import WebSearchProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, WebSearchProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: WebSearchProvider) -> None:
|
||||
"""Register a web search/extract provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — makes hot-reload scenarios (tests, dev loops) behave
|
||||
predictably.
|
||||
"""
|
||||
if not isinstance(provider, WebSearchProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects a WebSearchProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Web provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug(
|
||||
"Web provider '%s' re-registered (was %r)",
|
||||
name, type(existing).__name__,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Registered web provider '%s' (%s)",
|
||||
name, type(provider).__name__,
|
||||
)
|
||||
|
||||
|
||||
def list_providers() -> List[WebSearchProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[WebSearchProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Active-provider resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _read_config_key(*path: str) -> Optional[str]:
|
||||
"""Resolve a dotted config key from ``config.yaml``. Returns None on miss."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
cur = cfg
|
||||
for segment in path:
|
||||
if not isinstance(cur, dict):
|
||||
return None
|
||||
cur = cur.get(segment)
|
||||
if isinstance(cur, str) and cur.strip():
|
||||
return cur.strip()
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read config %s: %s", ".".join(path), exc)
|
||||
return None
|
||||
|
||||
|
||||
# Legacy preference order — preserves behaviour for users who set no
|
||||
# ``web.backend`` / ``web.<capability>_backend`` config key at all. Matches
|
||||
# the historic candidate order in :func:`tools.web_tools._get_backend`
|
||||
# (paid providers first so existing paid setups don't get downgraded to
|
||||
# a free tier on upgrade). Filtered by ``is_available()`` at walk time so
|
||||
# we don't surface a provider the user has no credentials for.
|
||||
_LEGACY_PREFERENCE = (
|
||||
"firecrawl",
|
||||
"parallel",
|
||||
"tavily",
|
||||
"exa",
|
||||
"searxng",
|
||||
"brave-free",
|
||||
"ddgs",
|
||||
)
|
||||
|
||||
|
||||
def _resolve(configured: Optional[str], *, capability: str) -> Optional[WebSearchProvider]:
|
||||
"""Resolve the active provider for a capability ("search" | "extract" | "crawl").
|
||||
|
||||
Resolution rules (in order):
|
||||
|
||||
1. **Explicit config wins, ignoring availability.** If
|
||||
``web.{capability}_backend`` or ``web.backend`` names a registered
|
||||
provider that supports *capability*, return it even if its
|
||||
:meth:`is_available` returns False — the dispatcher will surface a
|
||||
precise "X_API_KEY is not set" error to the user instead of silently
|
||||
routing somewhere else. Matches legacy
|
||||
:func:`tools.web_tools._get_backend` behavior for configured names.
|
||||
|
||||
2. **Single-provider shortcut.** When only one registered provider
|
||||
supports *capability* AND ``is_available()`` reports True, return it.
|
||||
|
||||
3. **Legacy preference walk, filtered by availability.** Walk the
|
||||
:data:`_LEGACY_PREFERENCE` order (firecrawl → parallel → tavily →
|
||||
exa → searxng → brave-free → ddgs) looking for a provider whose
|
||||
``supports_<capability>()`` is True AND whose ``is_available()`` is
|
||||
True. Matches the historic ``tools.web_tools._get_backend()``
|
||||
candidate order so users with credentials but no explicit config
|
||||
key keep landing on the same provider as pre-migration. This is
|
||||
the path that fires when no config key is set — pick the
|
||||
highest-priority backend the user actually has credentials for.
|
||||
|
||||
Returns None when no provider is configured AND no available provider
|
||||
matches the legacy preference; the dispatcher then returns a "set up a
|
||||
provider" error to the user.
|
||||
"""
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
def _capable(p: WebSearchProvider) -> bool:
|
||||
if capability == "search":
|
||||
return bool(p.supports_search())
|
||||
if capability == "extract":
|
||||
return bool(p.supports_extract())
|
||||
if capability == "crawl":
|
||||
return bool(p.supports_crawl())
|
||||
return False
|
||||
|
||||
def _is_available_safe(p: WebSearchProvider) -> bool:
|
||||
"""Wrap ``is_available()`` so a buggy provider doesn't kill resolution."""
|
||||
try:
|
||||
return bool(p.is_available())
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug("provider %s.is_available() raised %s", p.name, exc)
|
||||
return False
|
||||
|
||||
# 1. Explicit config wins — return regardless of is_available() so the
|
||||
# user gets a precise downstream error message rather than a silent
|
||||
# backend switch. Matches _get_backend() in web_tools.py.
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None and _capable(provider):
|
||||
return provider
|
||||
if provider is None:
|
||||
logger.debug(
|
||||
"web backend '%s' configured but not registered; falling back",
|
||||
configured,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"web backend '%s' configured but does not support '%s'; falling back",
|
||||
configured, capability,
|
||||
)
|
||||
|
||||
# 2. + 3. Fallback path — filter by availability so we don't surface
|
||||
# a provider the user has no credentials for. Without this filter,
|
||||
# a registered-but-unconfigured provider could end up "active" on
|
||||
# a fresh install with no API keys at all.
|
||||
eligible = [
|
||||
p for p in snapshot.values()
|
||||
if _capable(p) and _is_available_safe(p)
|
||||
]
|
||||
if len(eligible) == 1:
|
||||
return eligible[0]
|
||||
|
||||
for legacy in _LEGACY_PREFERENCE:
|
||||
provider = snapshot.get(legacy)
|
||||
if (
|
||||
provider is not None
|
||||
and _capable(provider)
|
||||
and _is_available_safe(provider)
|
||||
):
|
||||
return provider
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_active_search_provider() -> Optional[WebSearchProvider]:
|
||||
"""Resolve the currently-active web search provider.
|
||||
|
||||
Reads ``web.search_backend`` (preferred) or ``web.backend`` (shared
|
||||
fallback) from config.yaml; falls back per the module docstring.
|
||||
"""
|
||||
explicit = _read_config_key("web", "search_backend") or _read_config_key("web", "backend")
|
||||
return _resolve(explicit, capability="search")
|
||||
|
||||
|
||||
def get_active_extract_provider() -> Optional[WebSearchProvider]:
|
||||
"""Resolve the currently-active web extract provider.
|
||||
|
||||
Reads ``web.extract_backend`` (preferred) or ``web.backend`` (shared
|
||||
fallback) from config.yaml; falls back per the module docstring.
|
||||
"""
|
||||
explicit = _read_config_key("web", "extract_backend") or _read_config_key("web", "backend")
|
||||
return _resolve(explicit, capability="extract")
|
||||
|
||||
|
||||
def get_active_crawl_provider() -> Optional[WebSearchProvider]:
|
||||
"""Resolve the currently-active web crawl provider.
|
||||
|
||||
Reads ``web.crawl_backend`` (preferred) or ``web.backend`` (shared
|
||||
fallback) from config.yaml; falls back per the module docstring.
|
||||
|
||||
Crawl is a niche capability — among built-in providers only Tavily and
|
||||
Firecrawl implement it. Callers should expect ``None`` and fall back to
|
||||
a different strategy (e.g. summarize-via-LLM) when neither is
|
||||
configured.
|
||||
"""
|
||||
explicit = _read_config_key("web", "crawl_backend") or _read_config_key("web", "backend")
|
||||
return _resolve(explicit, capability="crawl")
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
@@ -30,13 +30,14 @@ model:
|
||||
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
|
||||
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||
# "lmstudio" - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
|
||||
#
|
||||
# Local servers (LM Studio, Ollama, vLLM, llama.cpp):
|
||||
# "custom" - Any other OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# LM Studio is first-class and uses provider: "lmstudio".
|
||||
# It works with both no-auth and auth-enabled server modes.
|
||||
# "custom" - Any OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# Example for LM Studio:
|
||||
# provider: "lmstudio"
|
||||
# base_url: "http://localhost:1234/v1"
|
||||
# No API key needed — local servers typically ignore auth.
|
||||
#
|
||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||
provider: "auto"
|
||||
@@ -121,18 +122,6 @@ model:
|
||||
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
|
||||
# # data_collection: "deny"
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Response Caching (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
# Cache identical API responses at the OpenRouter edge for free instant replays.
|
||||
# When enabled, identical requests (same model, messages, parameters) return
|
||||
# cached responses with zero billing. Separate from Anthropic prompt caching.
|
||||
# See: https://openrouter.ai/docs/guides/features/response-caching
|
||||
#
|
||||
# openrouter:
|
||||
# response_cache: true # Enable response caching (default: true)
|
||||
# response_cache_ttl: 300 # Cache TTL in seconds, 1-86400 (default: 300)
|
||||
|
||||
# =============================================================================
|
||||
# Git Worktree Isolation
|
||||
# =============================================================================
|
||||
@@ -192,23 +181,12 @@ terminal:
|
||||
# lifetime_seconds: 300
|
||||
# docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
# docker_mount_cwd_to_workspace: true # Explicit opt-in: mount your launch cwd into /workspace
|
||||
# # Optional: run the container as your host user's uid:gid so files written
|
||||
# # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
|
||||
# # caps too since no gosu privilege drop is needed. Leave off if your
|
||||
# # chosen docker_image expects to start as root.
|
||||
# docker_run_as_host_user: true
|
||||
# # Optional: explicitly forward selected env vars into Docker.
|
||||
# # These values come from your current shell first, then ~/.hermes/.env.
|
||||
# # Warning: anything forwarded here is visible to commands run in the container.
|
||||
# docker_forward_env:
|
||||
# - "GITHUB_TOKEN"
|
||||
# - "NPM_TOKEN"
|
||||
# # Optional: extra flags passed verbatim to docker run (appended after security defaults).
|
||||
# # Useful for adding capabilities (e.g. apt installs needing SETUID) or custom options.
|
||||
# # Example: add a Linux capability not included by default
|
||||
# # docker_extra_args:
|
||||
# # - "--cap-add"
|
||||
# # - "SETUID"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# OPTION 4: Singularity/Apptainer container
|
||||
@@ -307,25 +285,6 @@ browser:
|
||||
# after this period of no activity between agent loops (default: 120 = 2 minutes)
|
||||
inactivity_timeout: 120
|
||||
|
||||
# =============================================================================
|
||||
# Tool Loop Guardrails
|
||||
# =============================================================================
|
||||
# Soft warnings are enabled by default. They append guidance to repeated failed
|
||||
# or non-progressing tool results but still let the tool execute. Hard stops are
|
||||
# opt-in circuit breakers for autonomous/cron sessions where stopping a loop is
|
||||
# preferable to spending the full iteration budget.
|
||||
tool_loop_guardrails:
|
||||
warnings_enabled: true
|
||||
hard_stop_enabled: false
|
||||
warn_after:
|
||||
exact_failure: 2
|
||||
same_tool_failure: 3
|
||||
idempotent_no_progress: 2
|
||||
hard_stop_after:
|
||||
exact_failure: 5
|
||||
same_tool_failure: 8
|
||||
idempotent_no_progress: 5
|
||||
|
||||
# =============================================================================
|
||||
# Context Compression (Auto-shrinks long conversations)
|
||||
# =============================================================================
|
||||
@@ -364,31 +323,9 @@ compression:
|
||||
# compression of older turns.
|
||||
protect_last_n: 20
|
||||
|
||||
# Number of non-system messages to protect at the head of the transcript, in
|
||||
# ADDITION to the system prompt (which is always implicitly protected).
|
||||
# Head messages are NEVER summarized — they survive every compression
|
||||
# indefinitely. This gives stable early context for short/medium sessions,
|
||||
# but in long-running sessions that rely on rolling compaction the pinned
|
||||
# opening turns may not match how you want the session framed over time.
|
||||
# Set to 0 to preserve ONLY the system prompt (plus the rolling summary
|
||||
# and recent tail) — the cleanest configuration for long-running sessions.
|
||||
# Default 3 preserves the system prompt plus the first three non-system
|
||||
# head messages, matching the pre-feature behaviour.
|
||||
protect_first_n: 3
|
||||
|
||||
# To pin a specific model/provider for compression summaries, use the
|
||||
# auxiliary section below (auxiliary.compression.provider / model).
|
||||
|
||||
# =============================================================================
|
||||
# Anthropic prompt caching TTL
|
||||
# =============================================================================
|
||||
# When prompt caching is active (Claude via OpenRouter or native Anthropic),
|
||||
# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
|
||||
# "1h". Other values are ignored and "5m" is used.
|
||||
#
|
||||
prompt_caching:
|
||||
cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
|
||||
|
||||
# =============================================================================
|
||||
# Auxiliary Models (Advanced — Experimental)
|
||||
# =============================================================================
|
||||
@@ -518,7 +455,6 @@ group_sessions_per_user: true
|
||||
# Stream tokens to messaging platforms in real-time. The bot sends a message
|
||||
# on first token, then progressively edits it as more tokens arrive.
|
||||
# Disabled by default — enable to try the streaming UX on Telegram/Discord/Slack.
|
||||
# For Telegram, partial edits are sent as plain text and only the final edit uses MarkdownV2.
|
||||
streaming:
|
||||
enabled: false
|
||||
# transport: edit # "edit" = progressive editMessageText
|
||||
@@ -571,13 +507,6 @@ agent:
|
||||
# finish, then interrupts anything still running after this timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
# restart_drain_timeout: 60
|
||||
|
||||
# Max app-level retry attempts for API errors (connection drops, provider
|
||||
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
|
||||
# to 1 if you use fallback providers and want fast failover on flaky
|
||||
# primaries (default 3). The OpenAI SDK does its own low-level retries
|
||||
# underneath this wrapper — this is the Hermes-level loop.
|
||||
# api_max_retries: 3
|
||||
|
||||
# Enable verbose logging
|
||||
verbose: false
|
||||
@@ -620,7 +549,7 @@ agent:
|
||||
# - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
|
||||
# - A list of individual toolsets to compose your own (see list below)
|
||||
#
|
||||
# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot, teams, google_chat
|
||||
# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
|
||||
#
|
||||
# Examples:
|
||||
#
|
||||
@@ -650,8 +579,6 @@ agent:
|
||||
# signal: hermes-signal (same as telegram)
|
||||
# homeassistant: hermes-homeassistant (same as telegram)
|
||||
# qqbot: hermes-qqbot (same as telegram)
|
||||
# teams: hermes-teams (same as telegram)
|
||||
# google_chat: hermes-google_chat (same as telegram)
|
||||
#
|
||||
platform_toolsets:
|
||||
cli: [hermes-cli]
|
||||
@@ -662,9 +589,6 @@ platform_toolsets:
|
||||
signal: [hermes-signal]
|
||||
homeassistant: [hermes-homeassistant]
|
||||
qqbot: [hermes-qqbot]
|
||||
yuanbao: [hermes-yuanbao]
|
||||
teams: [hermes-teams]
|
||||
google_chat: [hermes-google_chat]
|
||||
|
||||
# =============================================================================
|
||||
# Gateway Platform Settings
|
||||
@@ -675,10 +599,6 @@ platform_toolsets:
|
||||
# platforms:
|
||||
# telegram:
|
||||
# reply_to_mode: "first" # off | first | all
|
||||
# # guest_mode lets explicit @mentions from non-allowlisted groups through.
|
||||
# # Default false; ordinary messages, replies, and regex wake words stay blocked.
|
||||
# guest_mode: false
|
||||
# # allowed_chats: ["-1001234567890"]
|
||||
# extra:
|
||||
# disable_link_previews: false # Set true to suppress Telegram URL previews in bot messages
|
||||
|
||||
@@ -853,17 +773,9 @@ code_execution:
|
||||
# Supports single tasks and batch mode (default 3 parallel, configurable).
|
||||
delegation:
|
||||
max_iterations: 50 # Max tool-calling turns per child (default: 50)
|
||||
# max_concurrent_children: 3 # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
|
||||
# WARNING: values above 10 multiply API cost linearly.
|
||||
# max_spawn_depth: 1 # Delegation tree depth cap (range: 1-3, default: 1 = flat).
|
||||
# Raise to 2 to allow workers to spawn their own subagents.
|
||||
# Requires role="orchestrator" on intermediate agents.
|
||||
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
|
||||
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
|
||||
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
|
||||
# subagent_auto_approve: false # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
|
||||
# or auto-approve "once" (true) instead of blocking on stdin.
|
||||
# The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
|
||||
# Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
|
||||
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
|
||||
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
|
||||
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
|
||||
# # Resolves full credentials (base_url, api_key) automatically.
|
||||
@@ -887,9 +799,7 @@ delegation:
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
|
||||
# true: Compact single-line banner
|
||||
# false: Full ASCII banner with tool/skill summary (default)
|
||||
# Use compact banner mode
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
@@ -900,38 +810,15 @@ display:
|
||||
# Toggle at runtime with /verbose in the CLI
|
||||
tool_progress: all
|
||||
|
||||
# Auto-cleanup of temporary progress bubbles after the final response lands.
|
||||
# On platforms that support message deletion (currently Telegram), this
|
||||
# removes the tool-progress bubble, "⏳ Still working..." notices, and
|
||||
# context-pressure status messages once the final reply has been delivered —
|
||||
# keeping long-running turns visible live, then tidy afterward. Failed runs
|
||||
# leave the bubbles in place as breadcrumbs. Off by default.
|
||||
# Per-platform override: display.platforms.telegram.cleanup_progress
|
||||
# true: Delete tracked progress/status bubbles on successful turn
|
||||
# false: Leave everything in place (default)
|
||||
# Example:
|
||||
# display:
|
||||
# platforms:
|
||||
# telegram:
|
||||
# cleanup_progress: true
|
||||
cleanup_progress: false
|
||||
|
||||
# Gateway-only natural mid-turn assistant updates.
|
||||
# When true, completed assistant status messages are sent as separate chat
|
||||
# messages. This is independent of tool_progress and gateway streaming.
|
||||
# true: Send mid-turn assistant updates as separate messages (default)
|
||||
# false: Only send the final response
|
||||
interim_assistant_messages: true
|
||||
|
||||
# What Enter does when Hermes is already busy (CLI and gateway platforms).
|
||||
# What Enter does when Hermes is already busy in the CLI.
|
||||
# interrupt: Interrupt the current run and redirect Hermes (default)
|
||||
# queue: Queue your message for the next turn
|
||||
# steer: Inject your message mid-run via /steer, arriving at the agent
|
||||
# after the next tool call — no interrupt, no role violation.
|
||||
# Falls back to 'queue' if the agent isn't running yet or if
|
||||
# images are attached (steer only carries text).
|
||||
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
|
||||
# Toggle at runtime with /busy <interrupt|queue|steer>.
|
||||
# Ctrl+C always interrupts regardless of this setting.
|
||||
busy_input_mode: interrupt
|
||||
|
||||
# Background process notifications (gateway/messaging only).
|
||||
@@ -947,27 +834,19 @@ display:
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
# true: Ring the terminal bell on each response
|
||||
# false: Silent (default)
|
||||
bell_on_complete: false
|
||||
|
||||
# Show model reasoning/thinking before each response.
|
||||
# When enabled, a dim box shows the model's thought process above the response.
|
||||
# Toggle at runtime with /reasoning show or /reasoning hide.
|
||||
# true: Show the reasoning box
|
||||
# false: Hide reasoning (default)
|
||||
show_reasoning: false
|
||||
|
||||
# Stream tokens to the terminal as they arrive instead of waiting for the
|
||||
# full response. The response box opens on first token and text appears
|
||||
# line-by-line. Tool calls are still captured silently.
|
||||
# true: Stream tokens as they arrive (default)
|
||||
# false: Wait for the full response before rendering
|
||||
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
|
||||
streaming: true
|
||||
|
||||
# Show [HH:MM] timestamps on user input and assistant response labels.
|
||||
# timestamps: false
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
# Skin / Theme
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
@@ -975,15 +854,10 @@ display:
|
||||
# response box label, and branding text. Change at runtime with /skin <name>.
|
||||
#
|
||||
# Built-in skins:
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# daylight — Bright light-mode theme
|
||||
# warm-lightmode — Warm paper-tone light-mode theme
|
||||
# poseidon — Sea-green/teal Olympian theme
|
||||
# sisyphus — Earthy stone-and-moss theme
|
||||
# charizard — Fiery orange dragon theme
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
#
|
||||
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
|
||||
# Schema (all fields optional, missing values inherit from default):
|
||||
@@ -1009,7 +883,7 @@ display:
|
||||
# agent_name: "My Agent" # Banner title and branding
|
||||
# welcome: "Welcome message" # Shown at CLI startup
|
||||
# response_label: " ⚔ Agent " # Response box header label
|
||||
# prompt_symbol: "⚔" # Prompt symbol (bare token; renderers add trailing space)
|
||||
# prompt_symbol: "⚔ ❯ " # Prompt symbol
|
||||
# tool_prefix: "╎" # Tool output line prefix (default: ┊)
|
||||
#
|
||||
skin: default
|
||||
|
||||
@@ -29,7 +29,7 @@ echo "📝 Logging to: $LOG_FILE"
|
||||
# Point to the example dataset in this directory
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
|
||||
python batch_runner.py \
|
||||
python scripts/batch_runner.py \
|
||||
--dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
|
||||
--batch_size=5 \
|
||||
--run_name="browser_tasks_example" \
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# Generates tool-calling trajectories for multi-step web research tasks.
|
||||
#
|
||||
# Usage:
|
||||
# python batch_runner.py \
|
||||
# python scripts/batch_runner.py \
|
||||
# --config datagen-config-examples/web_research.yaml \
|
||||
# --run_name web_research_v1
|
||||
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
#
|
||||
# docker-compose.yml for Hermes Agent
|
||||
#
|
||||
# Usage:
|
||||
# HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
|
||||
#
|
||||
# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
|
||||
# files created inside the container stay readable/writable on the host.
|
||||
# The entrypoint remaps the internal `hermes` user to these values via
|
||||
# usermod/groupmod + gosu.
|
||||
#
|
||||
# Security notes:
|
||||
# - The dashboard service binds to 127.0.0.1 by default. It stores API
|
||||
# keys; exposing it on LAN without auth is unsafe. If you want remote
|
||||
# access, use an SSH tunnel or put it behind a reverse proxy that
|
||||
# adds authentication — do NOT pass --insecure --host 0.0.0.0.
|
||||
# - If you override entrypoint, keep /opt/hermes/docker/entrypoint.sh in
|
||||
# the command chain. It drops root to the hermes user before gateway
|
||||
# files such as gateway.lock are created.
|
||||
# - The gateway's API server is off unless you uncomment API_SERVER_KEY
|
||||
# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
|
||||
# this on an internet-facing host.
|
||||
#
|
||||
services:
|
||||
gateway:
|
||||
build: .
|
||||
image: hermes-agent
|
||||
container_name: hermes
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# To expose the OpenAI-compatible API server beyond localhost,
|
||||
# uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
|
||||
# - API_SERVER_HOST=0.0.0.0
|
||||
# - API_SERVER_KEY=${API_SERVER_KEY}
|
||||
# Microsoft Teams — uncomment and fill in to enable Teams gateway.
|
||||
# Register your bot at https://dev.botframework.com/ to get these values.
|
||||
# - TEAMS_CLIENT_ID=${TEAMS_CLIENT_ID}
|
||||
# - TEAMS_CLIENT_SECRET=${TEAMS_CLIENT_SECRET}
|
||||
# - TEAMS_TENANT_ID=${TEAMS_TENANT_ID}
|
||||
# - TEAMS_ALLOWED_USERS=${TEAMS_ALLOWED_USERS}
|
||||
# - TEAMS_PORT=${TEAMS_PORT:-3978}
|
||||
# Google Chat — uncomment and fill in to enable the Google Chat gateway.
|
||||
# See website/docs/user-guide/messaging/google_chat.md for the full setup.
|
||||
# The SA JSON path must point to a file mounted into the container —
|
||||
# add a volume entry above (e.g. ``- ~/.hermes/google-chat-sa.json:/secrets/google-chat-sa.json:ro``)
|
||||
# then set GOOGLE_CHAT_SERVICE_ACCOUNT_JSON to that mount path.
|
||||
# - GOOGLE_CHAT_PROJECT_ID=${GOOGLE_CHAT_PROJECT_ID}
|
||||
# - GOOGLE_CHAT_SUBSCRIPTION_NAME=${GOOGLE_CHAT_SUBSCRIPTION_NAME}
|
||||
# - GOOGLE_CHAT_SERVICE_ACCOUNT_JSON=${GOOGLE_CHAT_SERVICE_ACCOUNT_JSON}
|
||||
# - GOOGLE_CHAT_ALLOWED_USERS=${GOOGLE_CHAT_ALLOWED_USERS}
|
||||
command: ["gateway", "run"]
|
||||
|
||||
dashboard:
|
||||
image: hermes-agent
|
||||
container_name: hermes-dashboard
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- gateway
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
|
||||
command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
|
||||
@@ -22,36 +22,14 @@ if [ "$(id -u)" = "0" ]; then
|
||||
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
|
||||
# files created by previous runs (under the old UID) become inaccessible.
|
||||
# Always chown -R when UID was remapped; otherwise only if top-level is wrong.
|
||||
actual_hermes_uid=$(id -u hermes)
|
||||
needs_chown=false
|
||||
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
|
||||
needs_chown=true
|
||||
elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
needs_chown=true
|
||||
fi
|
||||
if [ "$needs_chown" = true ]; then
|
||||
echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
|
||||
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
|
||||
# In rootless Podman the container's "root" is mapped to an unprivileged
|
||||
# host UID — chown will fail. That's fine: the volume is already owned
|
||||
# by the mapped user on the host side.
|
||||
chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
# The .venv must also be re-chowned when UID is remapped, otherwise
|
||||
# lazy_deps.py cannot install platform packages (discord.py, etc.).
|
||||
chown -R hermes:hermes "$INSTALL_DIR/.venv" 2>/dev/null || \
|
||||
echo "Warning: chown .venv failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
@@ -85,73 +63,9 @@ if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
|
||||
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
|
||||
fi
|
||||
|
||||
# auth.json: bootstrap from env on first boot only. Used by orchestrators
|
||||
# (e.g. provisioning a Hermes VPS from an account-management service) that
|
||||
# need to seed the OAuth refresh credential non-interactively, instead of
|
||||
# walking the user through `hermes setup` + the device-flow login dance.
|
||||
# Subsequent token rotations write back to the same file, which lives on a
|
||||
# persistent volume — so this env var is consumed exactly once at first
|
||||
# boot. The `[ ! -f ... ]` guard is critical: without it, a container
|
||||
# restart would clobber a rotated refresh token with the now-stale value
|
||||
# the orchestrator originally seeded.
|
||||
if [ ! -f "$HERMES_HOME/auth.json" ] && [ -n "$HERMES_AUTH_JSON_BOOTSTRAP" ]; then
|
||||
printf '%s' "$HERMES_AUTH_JSON_BOOTSTRAP" > "$HERMES_HOME/auth.json"
|
||||
chmod 600 "$HERMES_HOME/auth.json"
|
||||
fi
|
||||
|
||||
# Sync bundled skills (manifest-based so user edits are preserved)
|
||||
if [ -d "$INSTALL_DIR/skills" ]; then
|
||||
python3 "$INSTALL_DIR/tools/skills_sync.py"
|
||||
hermes-skills-sync
|
||||
fi
|
||||
|
||||
# Optionally start `hermes dashboard` as a side-process.
|
||||
#
|
||||
# Toggled by HERMES_DASHBOARD=1 (also accepts "true"/"yes", case-insensitive).
|
||||
# Host/port/TUI can be overridden via:
|
||||
# HERMES_DASHBOARD_HOST (default 0.0.0.0 — exposed outside the container)
|
||||
# HERMES_DASHBOARD_PORT (default 9119, matches `hermes dashboard` default)
|
||||
# HERMES_DASHBOARD_TUI (already honored by `hermes dashboard` itself)
|
||||
#
|
||||
# The dashboard is a long-lived server. We background it *before* the final
|
||||
# `exec hermes "$@"` so the user's chosen foreground command (chat, gateway,
|
||||
# sleep infinity, …) remains PID-of-interest for the container runtime. When
|
||||
# the container stops the whole process tree is torn down, so no explicit
|
||||
# cleanup is needed.
|
||||
case "${HERMES_DASHBOARD:-}" in
|
||||
1|true|TRUE|True|yes|YES|Yes)
|
||||
dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}"
|
||||
dash_port="${HERMES_DASHBOARD_PORT:-9119}"
|
||||
dash_args=(--host "$dash_host" --port "$dash_port" --no-open)
|
||||
# Binding to anything other than localhost requires --insecure — the
|
||||
# dashboard refuses otherwise because it exposes API keys. Inside a
|
||||
# container this is the expected deployment (host reaches it via
|
||||
# published port), so opt in automatically.
|
||||
if [ "$dash_host" != "127.0.0.1" ] && [ "$dash_host" != "localhost" ]; then
|
||||
dash_args+=(--insecure)
|
||||
fi
|
||||
echo "Starting hermes dashboard on ${dash_host}:${dash_port} (background)"
|
||||
# Prefix dashboard output so it's distinguishable from the main
|
||||
# process in `docker logs`. stdbuf keeps the pipe line-buffered.
|
||||
(
|
||||
stdbuf -oL -eL hermes dashboard "${dash_args[@]}" 2>&1 \
|
||||
| sed -u 's/^/[dashboard] /'
|
||||
) &
|
||||
;;
|
||||
esac
|
||||
|
||||
# Final exec: two supported invocation patterns.
|
||||
#
|
||||
# docker run <image> -> exec `hermes` with no args (legacy default)
|
||||
# docker run <image> chat -q "..." -> exec `hermes chat -q "..."` (legacy wrap)
|
||||
# docker run <image> sleep infinity -> exec `sleep infinity` directly
|
||||
# docker run <image> bash -> exec `bash` directly
|
||||
#
|
||||
# If the first positional arg resolves to an executable on PATH, we assume the
|
||||
# caller wants to run it directly (needed by the launcher which runs long-lived
|
||||
# `sleep infinity` sandbox containers — see tools/environments/docker.py).
|
||||
# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
|
||||
# preserving the documented `docker run <image> <subcommand>` behavior.
|
||||
if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
|
||||
exec "$@"
|
||||
fi
|
||||
exec hermes "$@"
|
||||
|
||||
Binary file not shown.
@@ -1,473 +0,0 @@
|
||||
# Telegram DM User-Managed Multi-Session Topics Implementation Plan
|
||||
|
||||
> **For Hermes:** Use test-driven-development for implementation. Use subagent-driven-development only after this plan is split into small reviewed tasks.
|
||||
|
||||
**Goal:** Add an opt-in Telegram DM multi-session mode where Telegram user-created private-chat topics become independent Hermes session lanes, while the root DM becomes a system lobby.
|
||||
|
||||
**Architecture:** Rely on Telegram's native private-chat topic UI. Users create new topics with the `+` button; Hermes maps each `message_thread_id` to a separate session lane. Hermes does not create topics for normal `/new` flow and does not try to manage topic lifecycle beyond activation/status, root-lobby behavior, and restoring legacy sessions into a user-created topic.
|
||||
|
||||
**Tech Stack:** Hermes gateway, Telegram Bot API 9.4+, python-telegram-bot adapter, SQLite SessionDB / side tables, pytest.
|
||||
|
||||
---
|
||||
|
||||
## 1. Product decisions
|
||||
|
||||
### Accepted
|
||||
|
||||
- PR-quality implementation: migrations, tests, docs, backwards compatibility.
|
||||
- Use SQLite persistence, not JSON sidecars.
|
||||
- Live status suffixes in topic titles are out of MVP.
|
||||
- Topic title sync/editing is out of MVP except future-compatible storage if cheap.
|
||||
- User creates Telegram topics manually through the Telegram bot interface.
|
||||
- `/new` does **not** create Telegram topics.
|
||||
- Root/main DM becomes a system lobby after activation.
|
||||
- Existing Telegram behavior remains unchanged until the feature is activated/enabled.
|
||||
- Migration of old sessions is supported through `/topic` listing and `/topic <session_id>` restore inside a user-created topic.
|
||||
|
||||
### Telegram API assumptions verified from Bot API docs
|
||||
|
||||
- `getMe` returns bot `User` fields:
|
||||
- `has_topics_enabled`: forum/topic mode enabled in private chats.
|
||||
- `allows_users_to_create_topics`: users may create/delete topics in private chats.
|
||||
- `createForumTopic` works for private chats with a user, but MVP does not rely on it for normal flow.
|
||||
- `Message.message_thread_id` identifies a topic in private chats.
|
||||
- `sendMessage` supports `message_thread_id` for private-chat topics.
|
||||
- `pinChatMessage` is allowed in private chats.
|
||||
|
||||
---
|
||||
|
||||
## 2. Target UX
|
||||
|
||||
### 2.1 Activation from root/main DM
|
||||
|
||||
User sends:
|
||||
|
||||
```text
|
||||
/topic
|
||||
```
|
||||
|
||||
Hermes:
|
||||
|
||||
1. calls Telegram `getMe`;
|
||||
2. verifies `has_topics_enabled` and `allows_users_to_create_topics`;
|
||||
3. enables multi-session topic mode for this Telegram DM user/chat;
|
||||
4. sends an onboarding message;
|
||||
5. pins the onboarding message if configured;
|
||||
6. shows old/unlinked sessions that can be restored into topics.
|
||||
|
||||
Suggested onboarding text:
|
||||
|
||||
```text
|
||||
Multi-session mode is enabled.
|
||||
|
||||
Create new Hermes chats with the + button in this bot interface. Each Telegram topic is an independent Hermes session, so you can work on different tasks in parallel.
|
||||
|
||||
This main chat is reserved for system commands, status, and session management.
|
||||
|
||||
To restore an old session:
|
||||
1. Use /topic here to see unlinked sessions.
|
||||
2. Create a new topic with the + button.
|
||||
3. Send /topic <session_id> inside that topic.
|
||||
```
|
||||
|
||||
### 2.2 Root/main DM after activation
|
||||
|
||||
Root DM is a system lobby.
|
||||
|
||||
Allowed/system commands include at least:
|
||||
|
||||
- `/topic`
|
||||
- `/status`
|
||||
- `/sessions` if available
|
||||
- `/usage`
|
||||
- `/help`
|
||||
- `/platforms`
|
||||
|
||||
Normal user prompts in root DM do not enter the agent loop. Reply:
|
||||
|
||||
```text
|
||||
This main chat is reserved for system commands.
|
||||
|
||||
To chat with Hermes, create a new topic using the + button in this bot interface. Each topic works as an independent Hermes session.
|
||||
```
|
||||
|
||||
`/new` in root DM does not create a session/topic. Reply:
|
||||
|
||||
```text
|
||||
To start a new parallel Hermes chat, create a new topic with the + button in this bot interface.
|
||||
|
||||
Each topic is an independent Hermes session. Use /new inside a topic only if you want to replace that topic's current session.
|
||||
```
|
||||
|
||||
### 2.3 First message in a user-created topic
|
||||
|
||||
When a user creates a Telegram topic and sends the first message there:
|
||||
|
||||
1. Hermes receives a Telegram DM message with `message_thread_id`.
|
||||
2. Hermes derives the existing thread-aware `session_key` from `(platform=telegram, chat_type=dm, chat_id, thread_id)`.
|
||||
3. If no binding exists, Hermes creates a fresh Hermes session for this topic lane and persists the binding.
|
||||
4. The message runs through the normal agent loop for that lane.
|
||||
|
||||
### 2.4 `/new` inside a non-main topic
|
||||
|
||||
`/new` remains supported but replaces the session attached to the current topic lane.
|
||||
|
||||
Hermes should warn:
|
||||
|
||||
```text
|
||||
Started a new Hermes session in this topic.
|
||||
|
||||
Tip: for parallel work, create a new topic with the + button instead of using /new here. /new replaces the session attached to the current topic.
|
||||
```
|
||||
|
||||
### 2.5 `/topic` in root/main DM after activation
|
||||
|
||||
Shows:
|
||||
|
||||
- mode enabled/disabled;
|
||||
- last capability check result;
|
||||
- whether intro message is pinned if known;
|
||||
- count of known topic bindings;
|
||||
- list of old/unlinked sessions.
|
||||
|
||||
Example:
|
||||
|
||||
```text
|
||||
Telegram multi-session topics are enabled.
|
||||
|
||||
Create new Hermes chats with the + button in this bot interface.
|
||||
|
||||
Unlinked previous sessions:
|
||||
1. 2026-05-01 Research notes — id: abc123
|
||||
2. 2026-04-30 Deploy debugging — id: def456
|
||||
3. Untitled session — id: ghi789
|
||||
|
||||
To restore one:
|
||||
1. Create a new topic with the + button.
|
||||
2. Open that topic.
|
||||
3. Send /topic <id>
|
||||
```
|
||||
|
||||
### 2.6 `/topic` inside a non-main topic
|
||||
|
||||
Without args, show the current topic binding:
|
||||
|
||||
```text
|
||||
This topic is linked to:
|
||||
Session: Research notes
|
||||
ID: abc123
|
||||
|
||||
Use /new to replace this topic with a fresh session.
|
||||
For parallel work, create another topic with the + button.
|
||||
```
|
||||
|
||||
### 2.7 `/topic <session_id>` inside a non-main topic
|
||||
|
||||
Restore an old/unlinked session into the current user-created topic.
|
||||
|
||||
Behavior:
|
||||
|
||||
1. reject if not in Telegram DM topic;
|
||||
2. verify session belongs to the same Telegram user/chat or is a safe legacy root DM session for this user;
|
||||
3. reject if session is already linked to another active topic in MVP;
|
||||
4. `SessionStore.switch_session(current_topic_session_key, target_session_id)`;
|
||||
5. upsert binding with `managed_mode = restored`;
|
||||
6. send two messages into the topic:
|
||||
- session restored confirmation;
|
||||
- last Hermes assistant message if available.
|
||||
|
||||
Example:
|
||||
|
||||
```text
|
||||
Session restored: Research notes
|
||||
|
||||
Last Hermes message:
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Persistence model
|
||||
|
||||
Use SQLite, but topic-mode schema changes are **explicit opt-in migrations**, not automatic startup reconciliation.
|
||||
|
||||
Important rollback-safety rule:
|
||||
|
||||
- upgrading Hermes and starting the gateway must not create Telegram topic-mode tables or columns;
|
||||
- old/default Telegram behavior must keep working on the existing `state.db`;
|
||||
- the first `/topic` activation path calls an idempotent explicit migration, then enables topic mode for that chat;
|
||||
- if activation fails before the migration is needed, the database remains in the pre-topic-mode shape.
|
||||
|
||||
### 3.1 No eager `sessions` table mutation for MVP
|
||||
|
||||
Do **not** add `chat_id`, `chat_type`, `thread_id`, or `session_key` columns to `sessions` as part of ordinary `SessionDB()` startup. The existing declarative `_reconcile_columns()` mechanism would add them eagerly on every process start, which violates the managed-migration requirement.
|
||||
|
||||
For MVP, keep origin/session-lane data in topic-specific side tables created only by the explicit `/topic` migration. Legacy unlinked sessions can be discovered conservatively from existing data (`source = telegram`, `user_id = current Telegram user`) plus absence from topic bindings.
|
||||
|
||||
If future PRs need richer origin metadata for all gateway sessions, introduce it behind a separate explicit migration/command or a compatibility-reviewed schema bump.
|
||||
|
||||
### 3.2 Explicit `/topic` migration API
|
||||
|
||||
Add an idempotent method such as:
|
||||
|
||||
```python
|
||||
def apply_telegram_topic_migration(self) -> None: ...
|
||||
```
|
||||
|
||||
It creates only topic-mode side tables/indexes and records:
|
||||
|
||||
```text
|
||||
state_meta.telegram_dm_topic_schema_version = 1
|
||||
```
|
||||
|
||||
This method is called from `/topic` activation/status paths before reading or writing topic-mode state. It is not called from generic `SessionDB.__init__`, gateway startup, CLI startup, or auto-maintenance.
|
||||
|
||||
### 3.3 `telegram_dm_topic_mode`
|
||||
|
||||
Stores per-user/chat activation state. Created only by `apply_telegram_topic_migration()`.
|
||||
|
||||
Suggested fields:
|
||||
|
||||
- `chat_id` primary key
|
||||
- `user_id`
|
||||
- `enabled`
|
||||
- `activated_at`
|
||||
- `updated_at`
|
||||
- `has_topics_enabled`
|
||||
- `allows_users_to_create_topics`
|
||||
- `capability_checked_at`
|
||||
- `intro_message_id`
|
||||
- `pinned_message_id`
|
||||
|
||||
### 3.4 `telegram_dm_topic_bindings`
|
||||
|
||||
Stores Telegram topic/thread to Hermes session binding. Created only by `apply_telegram_topic_migration()`.
|
||||
|
||||
Suggested fields:
|
||||
|
||||
- `chat_id`
|
||||
- `thread_id`
|
||||
- `user_id`
|
||||
- `session_key`
|
||||
- `session_id`
|
||||
- `managed_mode`
|
||||
- `auto`
|
||||
- `restored`
|
||||
- `new_replaced`
|
||||
- `linked_at`
|
||||
- `updated_at`
|
||||
|
||||
Recommended constraints:
|
||||
|
||||
- primary key `(chat_id, thread_id)`;
|
||||
- unique index on `session_id` for MVP to prevent one session linked to multiple topics;
|
||||
- index `(user_id, chat_id)` for status/listing.
|
||||
|
||||
### 3.5 Unlinked session semantics
|
||||
|
||||
For MVP, a session is unlinked if:
|
||||
|
||||
- `source = telegram`;
|
||||
- `user_id = current Telegram user`;
|
||||
- no row in `telegram_dm_topic_bindings` has `session_id = session_id`.
|
||||
|
||||
This is intentionally conservative until a future explicit migration adds richer cross-platform origin metadata.
|
||||
|
||||
Never dedupe by title.
|
||||
|
||||
---
|
||||
|
||||
## 4. Config
|
||||
|
||||
Suggested config block:
|
||||
|
||||
```yaml
|
||||
platforms:
|
||||
telegram:
|
||||
extra:
|
||||
multisession_topics:
|
||||
enabled: false
|
||||
mode: user_managed_topics
|
||||
root_chat_behavior: system_lobby
|
||||
pin_intro_message: true
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `enabled: false` means existing Telegram behavior is unchanged.
|
||||
- Activation via `/topic` may create per-chat enabled state only if global config permits it.
|
||||
- `root_chat_behavior: system_lobby` is the MVP behavior for activated chats.
|
||||
|
||||
---
|
||||
|
||||
## 5. Command behavior summary
|
||||
|
||||
### `/topic` root/main DM
|
||||
|
||||
- If not activated: capability check, activate, send/pin onboarding, list unlinked sessions.
|
||||
- If activated: show status and unlinked sessions.
|
||||
|
||||
### `/topic` non-main topic
|
||||
|
||||
- Show current binding.
|
||||
|
||||
### `/topic <session_id>` root/main DM
|
||||
|
||||
Reject with instructions:
|
||||
|
||||
```text
|
||||
Create a new topic with the + button, open it, then send /topic <session_id> there to restore this session.
|
||||
```
|
||||
|
||||
### `/topic <session_id>` non-main topic
|
||||
|
||||
Restore that session into this topic if ownership/linking checks pass.
|
||||
|
||||
### `/new` root/main DM when activated
|
||||
|
||||
Reply with instructions to use the `+` button. Do not enter agent loop.
|
||||
|
||||
### `/new` non-main topic
|
||||
|
||||
Create a new session in the current topic lane, persist/update binding, warn that `+` is preferred for parallel work.
|
||||
|
||||
### Normal text root/main DM when activated
|
||||
|
||||
Reply with system-lobby instruction. Do not enter agent loop.
|
||||
|
||||
### Normal text non-main topic
|
||||
|
||||
Normal Hermes agent flow for that topic's session lane.
|
||||
|
||||
---
|
||||
|
||||
## 6. PR breakdown
|
||||
|
||||
### PR 1 — Explicit topic-mode schema migration
|
||||
|
||||
**Goal:** Add rollback-safe SQLite support for Telegram topic mode without mutating `state.db` on ordinary upgrade/startup.
|
||||
|
||||
**Files likely touched:**
|
||||
|
||||
- `hermes_state.py`
|
||||
- tests under `tests/`
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. opening an old/current DB with `SessionDB()` does not create topic-mode tables or `sessions` origin columns;
|
||||
2. calling `apply_telegram_topic_migration()` creates `telegram_dm_topic_mode` and `telegram_dm_topic_bindings` idempotently;
|
||||
3. migration records `state_meta.telegram_dm_topic_schema_version = 1`.
|
||||
|
||||
### PR 2 — Topic mode activation and binding APIs
|
||||
|
||||
**Goal:** Add SQLite persistence for activation and topic bindings.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. enable/check mode row round-trips;
|
||||
2. binding upsert and lookup by `(chat_id, user_id, thread_id)`;
|
||||
3. linked sessions are excluded from unlinked list.
|
||||
|
||||
### PR 3 — `/topic` activation/status command
|
||||
|
||||
**Goal:** Implement root activation/status/listing behavior.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. `/topic` in root checks `getMe` capabilities and records activation;
|
||||
2. capability failure returns readable instructions;
|
||||
3. activated root `/topic` lists unlinked sessions.
|
||||
|
||||
### PR 4 — System lobby behavior
|
||||
|
||||
**Goal:** Prevent root chat from entering agent loop after activation.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. normal text in activated root returns lobby instruction;
|
||||
2. `/new` in activated root returns `+` button instruction;
|
||||
3. non-activated root behavior is unchanged.
|
||||
|
||||
### PR 5 — Auto-bind user-created topics
|
||||
|
||||
**Goal:** First message in non-main topic creates/uses an independent session lane.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. new topic message creates binding with `auto_created`;
|
||||
2. repeated topic message reuses same binding/lane;
|
||||
3. two topics in same DM do not share sessions.
|
||||
|
||||
### PR 6 — Restore legacy sessions into a topic
|
||||
|
||||
**Goal:** Implement `/topic <session_id>` in non-main topics.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. root `/topic <id>` rejects with instructions;
|
||||
2. topic `/topic <id>` switches current topic lane to target session;
|
||||
3. restore rejects sessions from other users/chats;
|
||||
4. restore rejects already-linked sessions;
|
||||
5. restore emits confirmation and last Hermes assistant message.
|
||||
|
||||
### PR 7 — `/new` inside topic updates binding
|
||||
|
||||
**Goal:** Keep existing `/new` semantics but persist topic binding replacement.
|
||||
|
||||
**Tests first:**
|
||||
|
||||
1. `/new` in topic creates a new session for same topic lane;
|
||||
2. binding updates to `managed_mode = new_replaced`;
|
||||
3. response includes guidance to use `+` for parallel work.
|
||||
|
||||
### PR 8 — Docs and polish
|
||||
|
||||
**Goal:** Document the feature and Telegram setup.
|
||||
|
||||
**Files likely touched:**
|
||||
|
||||
- `website/docs/user-guide/messaging/telegram.md`
|
||||
- maybe `website/docs/user-guide/sessions.md`
|
||||
|
||||
Docs must explain:
|
||||
|
||||
- BotFather/Telegram settings for topic mode and user-created topics;
|
||||
- `/topic` activation;
|
||||
- root system lobby;
|
||||
- using `+` for new parallel chats;
|
||||
- restoring old sessions with `/topic <id>` inside a topic;
|
||||
- limitations.
|
||||
|
||||
---
|
||||
|
||||
## 7. Testing / quality gates
|
||||
|
||||
Run targeted tests after each TDD cycle, then broader tests before completion.
|
||||
|
||||
Suggested commands after inspection confirms test paths:
|
||||
|
||||
```bash
|
||||
python -m pytest tests/test_hermes_state.py -q
|
||||
python -m pytest tests/gateway/ -q
|
||||
python -m pytest tests/ -o 'addopts=' -q
|
||||
```
|
||||
|
||||
Do not ship without verifying disabled-feature backwards compatibility.
|
||||
|
||||
---
|
||||
|
||||
## 8. Definition of done for MVP
|
||||
|
||||
- `/topic` activates/checks Telegram DM multi-session mode.
|
||||
- Root DM becomes a system lobby after activation.
|
||||
- Onboarding message tells users to create new chats with the Telegram `+` button.
|
||||
- Onboarding message can be pinned in private chat.
|
||||
- User-created topics automatically become independent Hermes session lanes.
|
||||
- `/new` in root gives instructions, not a new agent run.
|
||||
- `/new` in a topic creates a new session in that topic and warns that `+` is preferred for parallel work.
|
||||
- `/topic` in root lists unlinked old sessions.
|
||||
- `/topic <session_id>` inside a topic restores that session and sends confirmation + last Hermes assistant message.
|
||||
- Ownership checks prevent restoring other users' sessions.
|
||||
- Already-linked sessions are not restored into a second topic in MVP.
|
||||
- Existing Telegram behavior is unchanged when the feature is disabled.
|
||||
- Tests and docs are included.
|
||||
@@ -40,7 +40,7 @@ This directory contains the integration layer between **hermes-agent's** tool-ca
|
||||
- `evaluate_log()` for saving eval results to JSON + samples.jsonl
|
||||
|
||||
**HermesAgentBaseEnv** (`hermes_base_env.py`) extends BaseEnv with hermes-agent specifics:
|
||||
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, ssh, singularity, modal, daytona, vercel_sandbox)
|
||||
- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, daytona, ssh, singularity)
|
||||
- Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` which queries `tools/registry.py`)
|
||||
- Implements `collect_trajectory()` which runs the full agent loop and computes rewards
|
||||
- Supports two-phase operation (Phase 1: OpenAI server, Phase 2: VLLM ManagedServer)
|
||||
|
||||
@@ -18,11 +18,14 @@ import logging
|
||||
import os
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING
|
||||
|
||||
from model_tools import handle_function_call
|
||||
from tools.terminal_tool import get_active_env
|
||||
from tools.tool_result_storage import maybe_persist_tool_result, enforce_turn_budget
|
||||
if TYPE_CHECKING:
|
||||
from hermes_agent.tools.budget_config import BudgetConfig
|
||||
|
||||
from hermes_agent.tools.dispatch import handle_function_call
|
||||
from hermes_agent.tools.terminal import get_active_env
|
||||
from hermes_agent.tools.result_storage import maybe_persist_tool_result, enforce_turn_budget
|
||||
|
||||
# Thread pool for running sync tool calls that internally use asyncio.run()
|
||||
# (e.g., the Modal/Docker/Daytona terminal backends). Running them in a separate
|
||||
@@ -161,7 +164,7 @@ class HermesAgentLoop:
|
||||
thresholds, per-turn aggregate budget, and preview size.
|
||||
If None, uses DEFAULT_BUDGET (current hardcoded values).
|
||||
"""
|
||||
from tools.budget_config import DEFAULT_BUDGET
|
||||
from hermes_agent.tools.budget_config import DEFAULT_BUDGET
|
||||
self.server = server
|
||||
self.tool_schemas = tool_schemas
|
||||
self.valid_tool_names = valid_tool_names
|
||||
@@ -187,7 +190,7 @@ class HermesAgentLoop:
|
||||
tool_errors: List[ToolError] = []
|
||||
|
||||
# Per-loop TodoStore for the todo tool (ephemeral, dies with the loop)
|
||||
from tools.todo_tool import TodoStore, todo_tool as _todo_tool
|
||||
from hermes_agent.tools.todo import TodoStore, todo_tool as _todo_tool
|
||||
_todo_store = TodoStore()
|
||||
|
||||
# Extract user task from first user message for browser_snapshot context
|
||||
@@ -403,7 +406,7 @@ class HermesAgentLoop:
|
||||
# Run tool calls in a thread pool so backends that
|
||||
# use asyncio.run() internally (modal, docker, daytona) get
|
||||
# a clean event loop instead of deadlocking.
|
||||
loop = asyncio.get_running_loop()
|
||||
loop = asyncio.get_event_loop()
|
||||
# Capture current tool_name/args for the lambda
|
||||
_tn, _ta, _tid = tool_name, args, self.task_id
|
||||
tool_result = await loop.run_in_executor(
|
||||
|
||||
@@ -264,7 +264,7 @@ def _parse_hint_result(text: str) -> tuple[int | None, str]:
|
||||
"""Parse the judge's boxed decision and hint text."""
|
||||
boxed = _BOXED_RE.findall(text)
|
||||
score = int(boxed[-1]) if boxed else None
|
||||
if score not in {1, -1}:
|
||||
if score not in (1, -1):
|
||||
score = None
|
||||
hint_matches = _HINT_RE.findall(text)
|
||||
hint = hint_matches[-1].strip() if hint_matches else ""
|
||||
|
||||
@@ -60,7 +60,7 @@ from atroposlib.envs.server_handling.server_manager import APIServerConfig
|
||||
from environments.agent_loop import AgentResult, HermesAgentLoop
|
||||
from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig
|
||||
from environments.tool_context import ToolContext
|
||||
from tools.terminal_tool import (
|
||||
from hermes_agent.tools.terminal import (
|
||||
register_task_env_overrides,
|
||||
clear_task_env_overrides,
|
||||
cleanup_vm,
|
||||
@@ -162,7 +162,7 @@ def _normalize_tar_member_parts(member_name: str) -> list:
|
||||
):
|
||||
raise ValueError(f"Unsafe archive member path: {member_name}")
|
||||
|
||||
parts = [part for part in posix_path.parts if part not in {"", "."}]
|
||||
parts = [part for part in posix_path.parts if part not in ("", ".")]
|
||||
if not parts or any(part == ".." for part in parts):
|
||||
raise ValueError(f"Unsafe archive member path: {member_name}")
|
||||
return parts
|
||||
@@ -365,7 +365,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
run_ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
self._streaming_path = os.path.join(log_dir, f"samples_{run_ts}.jsonl")
|
||||
self._streaming_file = open(self._streaming_path, "w", encoding="utf-8")
|
||||
self._streaming_file = open(self._streaming_path, "w")
|
||||
self._streaming_lock = __import__("threading").Lock()
|
||||
print(f" Streaming results to: {self._streaming_path}")
|
||||
|
||||
@@ -561,7 +561,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
|
||||
# --- 5. Verify -- run test suite in the agent's sandbox ---
|
||||
# Skip verification if the agent produced no meaningful output
|
||||
only_system_and_user = all(
|
||||
msg.get("role") in {"system", "user"} for msg in result.messages
|
||||
msg.get("role") in ("system", "user") for msg in result.messages
|
||||
)
|
||||
if result.turns_used == 0 or only_system_and_user:
|
||||
logger.warning(
|
||||
@@ -575,7 +575,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
|
||||
# other tasks, tqdm updates, and timeout timers).
|
||||
ctx = ToolContext(task_id)
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
loop = asyncio.get_event_loop()
|
||||
reward = await loop.run_in_executor(
|
||||
None, # default thread pool
|
||||
self._run_tests, eval_item, ctx, task_name,
|
||||
@@ -876,7 +876,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
|
||||
# Let cancellations propagate (finally blocks run cleanup_vm)
|
||||
await asyncio.gather(*eval_tasks, return_exceptions=True)
|
||||
# Belt-and-suspenders: clean up any remaining sandboxes
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
from hermes_agent.tools.terminal import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
print("All sandboxes cleaned up.")
|
||||
return
|
||||
@@ -919,7 +919,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
|
||||
eval_metrics[f"eval/pass_rate_{cat_key}"] = cat_pass_rate
|
||||
|
||||
# Store metrics for wandb_log
|
||||
self.eval_metrics = list(eval_metrics.items())
|
||||
self.eval_metrics = [(k, v) for k, v in eval_metrics.items()]
|
||||
|
||||
# ---- Print summary ----
|
||||
print(f"\n{'='*60}")
|
||||
@@ -984,7 +984,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
|
||||
|
||||
# Kill all remaining sandboxes. Timed-out tasks leave orphaned thread
|
||||
# pool workers still executing commands -- cleanup_all stops them.
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
from hermes_agent.tools.terminal import cleanup_all_environments
|
||||
print("\nCleaning up all sandboxes...")
|
||||
cleanup_all_environments()
|
||||
|
||||
|
||||
@@ -422,7 +422,7 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
run_ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
self._streaming_path = os.path.join(log_dir, f"samples_{run_ts}.jsonl")
|
||||
self._streaming_file = open(self._streaming_path, "w", encoding="utf-8")
|
||||
self._streaming_file = open(self._streaming_path, "w")
|
||||
self._streaming_lock = threading.Lock()
|
||||
|
||||
print(f"\nYC-Bench eval matrix: {len(self.all_eval_items)} runs")
|
||||
@@ -709,7 +709,7 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
|
||||
tqdm.write("\n[INTERRUPTED] Stopping evaluation...")
|
||||
pbar.close()
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
from hermes_agent.tools.terminal import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -759,7 +759,7 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
|
||||
eval_metrics[f"eval/survival_rate_{key}"] = ps / pt if pt else 0
|
||||
eval_metrics[f"eval/avg_score_{key}"] = pa
|
||||
|
||||
self.eval_metrics = list(eval_metrics.items())
|
||||
self.eval_metrics = [(k, v) for k, v in eval_metrics.items()]
|
||||
|
||||
# --- Print summary ---
|
||||
print(f"\n{'='*60}")
|
||||
@@ -819,7 +819,7 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
|
||||
print(f"Results saved to: {self._streaming_path}")
|
||||
|
||||
try:
|
||||
from tools.terminal_tool import cleanup_all_environments
|
||||
from hermes_agent.tools.terminal import cleanup_all_environments
|
||||
cleanup_all_environments()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -62,15 +62,15 @@ from atroposlib.type_definitions import Item
|
||||
|
||||
from environments.agent_loop import AgentResult, HermesAgentLoop
|
||||
from environments.tool_context import ToolContext
|
||||
from tools.budget_config import (
|
||||
from hermes_agent.tools.budget_config import (
|
||||
DEFAULT_RESULT_SIZE_CHARS,
|
||||
DEFAULT_TURN_BUDGET_CHARS,
|
||||
DEFAULT_PREVIEW_SIZE_CHARS,
|
||||
)
|
||||
|
||||
# Import hermes-agent toolset infrastructure
|
||||
from model_tools import get_tool_definitions
|
||||
from toolset_distributions import sample_toolsets_from_distribution
|
||||
from hermes_agent.tools.dispatch import get_tool_definitions
|
||||
from hermes_agent.tools.distributions import sample_toolsets_from_distribution
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -209,7 +209,7 @@ class HermesAgentEnvConfig(BaseEnvConfig):
|
||||
|
||||
def build_budget_config(self):
|
||||
"""Build a BudgetConfig from env config fields."""
|
||||
from tools.budget_config import BudgetConfig
|
||||
from hermes_agent.tools.budget_config import BudgetConfig
|
||||
return BudgetConfig(
|
||||
default_result_size=self.default_result_size_chars,
|
||||
turn_budget=self.turn_budget_chars,
|
||||
@@ -571,7 +571,7 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
# (e.g., API call failed on turn 1). No point spinning up a Modal sandbox
|
||||
# just to verify files that were never created.
|
||||
only_system_and_user = all(
|
||||
msg.get("role") in {"system", "user"} for msg in result.messages
|
||||
msg.get("role") in ("system", "user") for msg in result.messages
|
||||
)
|
||||
if result.turns_used == 0 or only_system_and_user:
|
||||
logger.warning(
|
||||
|
||||
@@ -31,9 +31,9 @@ from typing import Any, Dict, List, Optional
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
|
||||
from model_tools import handle_function_call
|
||||
from tools.terminal_tool import cleanup_vm
|
||||
from tools.browser_tool import cleanup_browser
|
||||
from hermes_agent.tools.dispatch import handle_function_call
|
||||
from hermes_agent.tools.terminal import cleanup_vm
|
||||
from hermes_agent.tools.browser.tool import cleanup_browser
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -179,7 +179,7 @@ class ToolContext:
|
||||
|
||||
# Ensure parent directory exists in the sandbox
|
||||
parent = str(_Path(remote_path).parent)
|
||||
if parent not in {".", "/"}:
|
||||
if parent not in (".", "/"):
|
||||
self.terminal(f"mkdir -p {parent}", timeout=10)
|
||||
|
||||
# For small files, single command is fine
|
||||
@@ -446,7 +446,7 @@ class ToolContext:
|
||||
"""
|
||||
# Kill any background processes from this rollout (safety net)
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
from hermes_agent.tools.process_registry import process_registry
|
||||
killed = process_registry.kill_all(task_id=self.task_id)
|
||||
if killed:
|
||||
logger.debug("Process cleanup for task %s: killed %d process(es)", self.task_id, killed)
|
||||
|
||||
@@ -36,7 +36,6 @@
|
||||
|
||||
imports = [
|
||||
./nix/packages.nix
|
||||
./nix/overlays.nix
|
||||
./nix/nixosModules.nix
|
||||
./nix/checks.nix
|
||||
./nix/devShell.nix
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 115 KiB |
@@ -1,260 +0,0 @@
|
||||
"""
|
||||
Platform Adapter Registry
|
||||
|
||||
Allows platform adapters (built-in and plugin) to self-register so the gateway
|
||||
can discover and instantiate them without hardcoded if/elif chains.
|
||||
|
||||
Built-in adapters continue to use the existing if/elif in _create_adapter()
|
||||
for now. Plugin adapters register here via PluginContext.register_platform()
|
||||
and are looked up first -- if nothing is found the gateway falls through to
|
||||
the legacy code path.
|
||||
|
||||
Usage (plugin side):
|
||||
|
||||
from gateway.platform_registry import platform_registry, PlatformEntry
|
||||
|
||||
platform_registry.register(PlatformEntry(
|
||||
name="irc",
|
||||
label="IRC",
|
||||
adapter_factory=lambda cfg: IRCAdapter(cfg),
|
||||
check_fn=check_requirements,
|
||||
validate_config=lambda cfg: bool(cfg.extra.get("server")),
|
||||
required_env=["IRC_SERVER"],
|
||||
install_hint="pip install irc",
|
||||
))
|
||||
|
||||
Usage (gateway side):
|
||||
|
||||
adapter = platform_registry.create_adapter("irc", platform_config)
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Awaitable, Callable, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlatformEntry:
|
||||
"""Metadata and factory for a single platform adapter."""
|
||||
|
||||
# Identifier used in config.yaml (e.g. "irc", "viber").
|
||||
name: str
|
||||
|
||||
# Human-readable label (e.g. "IRC", "Viber").
|
||||
label: str
|
||||
|
||||
# Factory callable: receives a PlatformConfig, returns an adapter instance.
|
||||
# Using a factory instead of a bare class lets plugins do custom init
|
||||
# (e.g. passing extra kwargs, wrapping in try/except).
|
||||
adapter_factory: Callable[[Any], Any]
|
||||
|
||||
# Returns True when the platform's dependencies are available.
|
||||
check_fn: Callable[[], bool]
|
||||
|
||||
# Optional: given a PlatformConfig, is it properly configured?
|
||||
# If None, the registry skips config validation and lets the adapter
|
||||
# fail at connect() time with a descriptive error.
|
||||
validate_config: Optional[Callable[[Any], bool]] = None
|
||||
|
||||
# Optional: given a PlatformConfig, is the platform connected/enabled?
|
||||
# Used by ``GatewayConfig.get_connected_platforms()`` and setup UI status.
|
||||
# If None, falls back to ``validate_config`` or ``check_fn``.
|
||||
is_connected: Optional[Callable[[Any], bool]] = None
|
||||
|
||||
# Env vars this platform needs (for ``hermes setup`` display).
|
||||
required_env: list = field(default_factory=list)
|
||||
|
||||
# Hint shown when check_fn returns False.
|
||||
install_hint: str = ""
|
||||
|
||||
# Optional setup function for interactive configuration.
|
||||
# Signature: () -> None (prompts user, saves env vars).
|
||||
# If None, falls back to _setup_standard_platform (needs token_var + vars)
|
||||
# or a generic "set these env vars" display.
|
||||
setup_fn: Optional[Callable[[], None]] = None
|
||||
|
||||
# "builtin" or "plugin"
|
||||
source: str = "plugin"
|
||||
|
||||
# Name of the plugin manifest that registered this entry (empty for
|
||||
# built-ins). Used by ``hermes gateway setup`` to auto-enable the
|
||||
# owning plugin when the user configures its platform.
|
||||
plugin_name: str = ""
|
||||
|
||||
# ── Auth env var names (for _is_user_authorized integration) ──
|
||||
# E.g. "IRC_ALLOWED_USERS" — checked for comma-separated user IDs.
|
||||
allowed_users_env: str = ""
|
||||
# E.g. "IRC_ALLOW_ALL_USERS" — if truthy, all users authorized.
|
||||
allow_all_env: str = ""
|
||||
|
||||
# ── Message limits ──
|
||||
# Max message length for smart-chunking. 0 = no limit.
|
||||
max_message_length: int = 0
|
||||
|
||||
# ── Privacy ──
|
||||
# If True, session descriptions redact PII (phone numbers, etc.)
|
||||
pii_safe: bool = False
|
||||
|
||||
# ── Display ──
|
||||
# Emoji for CLI/gateway display (e.g. "💬")
|
||||
emoji: str = "🔌"
|
||||
|
||||
# Whether this platform should appear in _UPDATE_ALLOWED_PLATFORMS
|
||||
# (allows /update command from this platform).
|
||||
allow_update_command: bool = True
|
||||
|
||||
# ── LLM guidance ──
|
||||
# Platform hint injected into the system prompt (e.g. "You are on IRC.
|
||||
# Do not use markdown."). Empty string = no hint.
|
||||
platform_hint: str = ""
|
||||
|
||||
# ── Env-driven auto-configuration ──
|
||||
# Optional: read env vars, return a dict of ``PlatformConfig.extra`` fields
|
||||
# to seed when the platform is auto-enabled. Called during
|
||||
# ``_apply_env_overrides`` BEFORE the adapter is constructed, so
|
||||
# ``gateway status`` etc. can reflect env-only configuration without
|
||||
# instantiating the adapter. Return ``None`` (or an empty dict) to skip.
|
||||
# Signature: () -> Optional[dict[str, Any]]
|
||||
env_enablement_fn: Optional[Callable[[], Optional[dict]]] = None
|
||||
|
||||
# ── YAML→env config bridge ──
|
||||
# Optional: translate this platform's ``config.yaml`` keys into env vars
|
||||
# and/or seed ``PlatformConfig.extra`` directly. Lets a plugin own its
|
||||
# YAML config translation instead of forcing core ``gateway/config.py``
|
||||
# to know every platform's schema.
|
||||
#
|
||||
# Signature: (yaml_cfg: dict, platform_cfg: dict) -> Optional[dict]
|
||||
# Called from ``load_gateway_config()`` after the generic shared-key loop
|
||||
# and before ``_apply_env_overrides``. Mutating ``os.environ`` is allowed
|
||||
# (use ``not os.getenv(...)`` guards to preserve env > YAML precedence);
|
||||
# any returned dict is merged into ``PlatformConfig.extra``. Exceptions
|
||||
# are caught and logged at debug level.
|
||||
# See website/docs/developer-guide/adding-platform-adapters.md for the
|
||||
# full contract and a worked example.
|
||||
apply_yaml_config_fn: Optional[Callable[[dict, dict], Optional[dict]]] = None
|
||||
|
||||
# Optional: home-channel env var name for cron/notification delivery
|
||||
# (e.g. ``"IRC_HOME_CHANNEL"``). When set, ``cron.scheduler`` treats this
|
||||
# platform as a valid ``deliver=<name>`` target and reads the env var to
|
||||
# resolve the default chat/room ID. Empty = no cron home-channel support.
|
||||
cron_deliver_env_var: str = ""
|
||||
|
||||
# ── Standalone (out-of-process) sending ──
|
||||
# Optional: async coroutine that delivers a message without a live
|
||||
# gateway adapter. Called by ``tools/send_message_tool._send_via_adapter``
|
||||
# when ``cron`` runs in a separate process from the gateway and the
|
||||
# in-process adapter weakref is therefore ``None``.
|
||||
#
|
||||
# Signature:
|
||||
# async (pconfig, chat_id, message, *, thread_id=None,
|
||||
# media_files=None, force_document=False) -> dict
|
||||
#
|
||||
# Returns ``{"success": True, "message_id": ...}`` on success or
|
||||
# ``{"error": str}`` on failure. Plugin authors typically open an
|
||||
# ephemeral connection / acquire a fresh OAuth token, send, and close.
|
||||
# Without this hook, plugin platforms cannot serve as cron ``deliver=``
|
||||
# targets when the gateway is not co-resident with the cron process.
|
||||
standalone_sender_fn: Optional[Callable[..., Awaitable[dict]]] = None
|
||||
|
||||
|
||||
class PlatformRegistry:
|
||||
"""Central registry of platform adapters.
|
||||
|
||||
Thread-safe for reads (dict lookups are atomic under GIL).
|
||||
Writes happen at startup during sequential discovery.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._entries: dict[str, PlatformEntry] = {}
|
||||
|
||||
def register(self, entry: PlatformEntry) -> None:
|
||||
"""Register a platform adapter entry.
|
||||
|
||||
If an entry with the same name exists, it is replaced (last writer
|
||||
wins -- this lets plugins override built-in adapters if desired).
|
||||
"""
|
||||
if entry.name in self._entries:
|
||||
prev = self._entries[entry.name]
|
||||
logger.info(
|
||||
"Platform '%s' re-registered (was %s, now %s)",
|
||||
entry.name,
|
||||
prev.source,
|
||||
entry.source,
|
||||
)
|
||||
self._entries[entry.name] = entry
|
||||
logger.debug("Registered platform adapter: %s (%s)", entry.name, entry.source)
|
||||
|
||||
def unregister(self, name: str) -> bool:
|
||||
"""Remove a platform entry. Returns True if it existed."""
|
||||
return self._entries.pop(name, None) is not None
|
||||
|
||||
def get(self, name: str) -> Optional[PlatformEntry]:
|
||||
"""Look up a platform entry by name."""
|
||||
return self._entries.get(name)
|
||||
|
||||
def all_entries(self) -> list[PlatformEntry]:
|
||||
"""Return all registered platform entries."""
|
||||
return list(self._entries.values())
|
||||
|
||||
def plugin_entries(self) -> list[PlatformEntry]:
|
||||
"""Return only plugin-registered platform entries."""
|
||||
return [e for e in self._entries.values() if e.source == "plugin"]
|
||||
|
||||
def is_registered(self, name: str) -> bool:
|
||||
return name in self._entries
|
||||
|
||||
def create_adapter(self, name: str, config: Any) -> Optional[Any]:
|
||||
"""Create an adapter instance for the given platform name.
|
||||
|
||||
Returns None if:
|
||||
- No entry registered for *name*
|
||||
- check_fn() returns False (missing deps)
|
||||
- validate_config() returns False (misconfigured)
|
||||
- The factory raises an exception
|
||||
"""
|
||||
entry = self._entries.get(name)
|
||||
if entry is None:
|
||||
return None
|
||||
|
||||
if not entry.check_fn():
|
||||
hint = f" ({entry.install_hint})" if entry.install_hint else ""
|
||||
logger.warning(
|
||||
"Platform '%s' requirements not met%s",
|
||||
entry.label,
|
||||
hint,
|
||||
)
|
||||
return None
|
||||
|
||||
if entry.validate_config is not None:
|
||||
try:
|
||||
if not entry.validate_config(config):
|
||||
logger.warning(
|
||||
"Platform '%s' config validation failed",
|
||||
entry.label,
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Platform '%s' config validation error: %s",
|
||||
entry.label,
|
||||
e,
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
adapter = entry.adapter_factory(config)
|
||||
return adapter
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to create adapter for platform '%s': %s",
|
||||
entry.label,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# Module-level singleton
|
||||
platform_registry = PlatformRegistry()
|
||||
@@ -1,45 +0,0 @@
|
||||
"""
|
||||
Platform adapters for messaging integrations.
|
||||
|
||||
Each adapter handles:
|
||||
- Receiving messages from a platform
|
||||
- Sending messages/responses back
|
||||
- Platform-specific authentication
|
||||
- Message formatting and media handling
|
||||
"""
|
||||
|
||||
from .base import BasePlatformAdapter, MessageEvent, SendResult
|
||||
|
||||
# QQAdapter and YuanbaoAdapter were previously imported eagerly here, but
|
||||
# nothing in the codebase consumes ``from gateway.platforms import
|
||||
# QQAdapter`` (every real call site uses the long-form path
|
||||
# ``from gateway.platforms.qqbot import QQAdapter``). The eager imports
|
||||
# pulled in qqbot's chunked-upload + keyboards + onboard machinery and
|
||||
# yuanbao's websocket stack — about 48 ms wall and ~8 MB RSS on every
|
||||
# CLI invocation, even ones that never touch a gateway adapter.
|
||||
#
|
||||
# Use PEP 562 module ``__getattr__`` to keep the public re-export working
|
||||
# while deferring the actual import to first attribute access. This is
|
||||
# 100% backward-compatible for any external code that still imports the
|
||||
# adapters from the package root.
|
||||
__all__ = [
|
||||
"BasePlatformAdapter",
|
||||
"MessageEvent",
|
||||
"SendResult",
|
||||
"QQAdapter",
|
||||
"YuanbaoAdapter",
|
||||
]
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
if name == "QQAdapter":
|
||||
from .qqbot import QQAdapter # noqa: F401
|
||||
return QQAdapter
|
||||
if name == "YuanbaoAdapter":
|
||||
from .yuanbao import YuanbaoAdapter # noqa: F401
|
||||
return YuanbaoAdapter
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
def __dir__():
|
||||
return sorted(__all__)
|
||||
@@ -1,84 +0,0 @@
|
||||
"""Shared HTTP client factory for long-lived platform adapters.
|
||||
|
||||
Gateway messaging platforms (QQ Bot, Feishu, WeCom, DingTalk, Signal,
|
||||
BlueBubbles, WeCom-callback) keep a persistent ``httpx.AsyncClient``
|
||||
alive for the adapter's lifetime. That amortises TLS/connection setup
|
||||
across many API calls, but it also means the process's file-descriptor
|
||||
pressure is sensitive to how aggressively the pool recycles idle keep-
|
||||
alive connections.
|
||||
|
||||
httpx's default ``keepalive_expiry`` is 5 seconds. On macOS behind
|
||||
Cloudflare Warp (and other transparent proxies), peer-initiated FIN can
|
||||
sit in ``CLOSE_WAIT`` longer than that before the local socket actually
|
||||
drains — which, multiplied across 7 long-lived adapters plus the LLM
|
||||
client and MCP clients, walks straight into the default 256 fd limit.
|
||||
See #18451.
|
||||
|
||||
``platform_httpx_limits()`` returns a tighter ``httpx.Limits`` the
|
||||
adapter factories use instead of the httpx default. The values chosen:
|
||||
|
||||
* ``max_keepalive_connections=10`` — plenty for any single adapter;
|
||||
platform APIs rarely parallelise beyond this.
|
||||
* ``keepalive_expiry=2.0`` — close idle sockets aggressively so a
|
||||
proxy's lingering CLOSE_WAIT window can't starve the process.
|
||||
|
||||
Override via ``HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY`` /
|
||||
``HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE`` env vars when tuning under load.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError: # pragma: no cover — optional dep
|
||||
httpx = None # type: ignore[assignment]
|
||||
|
||||
|
||||
_DEFAULT_KEEPALIVE_EXPIRY_S = 2.0
|
||||
_DEFAULT_MAX_KEEPALIVE = 10
|
||||
|
||||
|
||||
def platform_httpx_limits() -> "httpx.Limits | None":
|
||||
"""Return ``httpx.Limits`` tuned for persistent platform-adapter clients.
|
||||
|
||||
Returns ``None`` when httpx isn't importable, so callers can fall
|
||||
back to httpx's built-in default without a hard dependency on this
|
||||
helper being reachable.
|
||||
"""
|
||||
if httpx is None:
|
||||
return None
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
raw = os.environ.get(name, "").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
val = float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return val if val > 0 else default
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = os.environ.get(name, "").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
val = int(raw)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return val if val > 0 else default
|
||||
|
||||
keepalive_expiry = _env_float(
|
||||
"HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", _DEFAULT_KEEPALIVE_EXPIRY_S
|
||||
)
|
||||
max_keepalive = _env_int(
|
||||
"HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", _DEFAULT_MAX_KEEPALIVE
|
||||
)
|
||||
|
||||
return httpx.Limits(
|
||||
max_keepalive_connections=max_keepalive,
|
||||
# Leave max_connections at httpx default (100) — plenty of headroom.
|
||||
keepalive_expiry=keepalive_expiry,
|
||||
)
|
||||
@@ -1,397 +0,0 @@
|
||||
"""Microsoft Graph webhook adapter for change-notification ingress."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hmac
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
from collections import deque
|
||||
from hashlib import sha1
|
||||
from typing import Any, Awaitable, Callable, Dict, Optional
|
||||
|
||||
try:
|
||||
from aiohttp import web
|
||||
|
||||
AIOHTTP_AVAILABLE = True
|
||||
except ImportError:
|
||||
AIOHTTP_AVAILABLE = False
|
||||
web = None # type: ignore[assignment]
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
SendResult,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_HOST = "0.0.0.0"
|
||||
DEFAULT_PORT = 8646
|
||||
DEFAULT_WEBHOOK_PATH = "/msgraph/webhook"
|
||||
DEFAULT_MAX_SEEN_RECEIPTS = 5000
|
||||
NotificationScheduler = Callable[[Dict[str, Any], MessageEvent], Awaitable[None] | None]
|
||||
|
||||
|
||||
def check_msgraph_webhook_requirements() -> bool:
|
||||
"""Return whether required webhook dependencies are available."""
|
||||
return AIOHTTP_AVAILABLE
|
||||
|
||||
|
||||
class MSGraphWebhookAdapter(BasePlatformAdapter):
|
||||
"""Receive Microsoft Graph change notifications and surface them internally."""
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.MSGRAPH_WEBHOOK)
|
||||
extra = config.extra or {}
|
||||
self._host: str = str(extra.get("host", DEFAULT_HOST))
|
||||
self._port: int = int(extra.get("port", DEFAULT_PORT))
|
||||
self._webhook_path: str = self._normalize_path(
|
||||
extra.get("webhook_path", DEFAULT_WEBHOOK_PATH)
|
||||
)
|
||||
self._health_path: str = self._normalize_path(extra.get("health_path", "/health"))
|
||||
self._accepted_resources: list[str] = [
|
||||
str(value).strip()
|
||||
for value in (extra.get("accepted_resources") or [])
|
||||
if str(value).strip()
|
||||
]
|
||||
self._client_state: Optional[str] = self._string_or_none(extra.get("client_state"))
|
||||
self._max_seen_receipts = max(
|
||||
1, int(extra.get("max_seen_receipts", DEFAULT_MAX_SEEN_RECEIPTS))
|
||||
)
|
||||
self._allowed_source_networks: list[ipaddress._BaseNetwork] = (
|
||||
self._parse_allowed_source_cidrs(extra.get("allowed_source_cidrs"))
|
||||
)
|
||||
self._runner = None
|
||||
self._notification_scheduler: Optional[NotificationScheduler] = None
|
||||
self._seen_receipts: set[str] = set()
|
||||
self._seen_receipt_order: deque[str] = deque()
|
||||
self._accepted_count = 0
|
||||
self._duplicate_count = 0
|
||||
|
||||
@staticmethod
|
||||
def _string_or_none(value: Any) -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
text = str(value).strip()
|
||||
return text or None
|
||||
|
||||
@staticmethod
|
||||
def _normalize_path(path: Any) -> str:
|
||||
raw = str(path or "").strip() or "/"
|
||||
return raw if raw.startswith("/") else f"/{raw}"
|
||||
|
||||
@staticmethod
|
||||
def _build_receipt_key(notification: Dict[str, Any]) -> Optional[str]:
|
||||
explicit_id = str(notification.get("id") or "").strip()
|
||||
if explicit_id:
|
||||
return f"id:{explicit_id}"
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _normalize_resource_value(resource: str) -> str:
|
||||
return str(resource or "").strip().strip("/")
|
||||
|
||||
@staticmethod
|
||||
def _parse_allowed_source_cidrs(
|
||||
raw: Any,
|
||||
) -> list[ipaddress._BaseNetwork]:
|
||||
"""Parse an optional list of CIDR ranges allowed to POST to the webhook.
|
||||
|
||||
An empty or missing value means "allow everything" (same behavior as
|
||||
before this field existed). When populated, requests from source IPs
|
||||
outside every listed CIDR are rejected with 403 before the body is
|
||||
parsed. Use this to restrict the endpoint to Microsoft Graph's
|
||||
published webhook source ranges in production deployments.
|
||||
"""
|
||||
if raw is None:
|
||||
return []
|
||||
if isinstance(raw, str):
|
||||
candidates = [chunk.strip() for chunk in raw.split(",")]
|
||||
elif isinstance(raw, (list, tuple, set)):
|
||||
candidates = [str(chunk).strip() for chunk in raw]
|
||||
else:
|
||||
return []
|
||||
|
||||
networks: list[ipaddress._BaseNetwork] = []
|
||||
for chunk in candidates:
|
||||
if not chunk:
|
||||
continue
|
||||
try:
|
||||
networks.append(ipaddress.ip_network(chunk, strict=False))
|
||||
except ValueError:
|
||||
logger.warning(
|
||||
"[msgraph_webhook] Ignoring invalid allowed_source_cidrs entry: %r",
|
||||
chunk,
|
||||
)
|
||||
return networks
|
||||
|
||||
def set_notification_scheduler(self, scheduler: Optional[NotificationScheduler]) -> None:
|
||||
self._notification_scheduler = scheduler
|
||||
|
||||
async def connect(self) -> bool:
|
||||
app = web.Application()
|
||||
app.router.add_get(self._health_path, self._handle_health)
|
||||
app.router.add_get(self._webhook_path, self._handle_validation)
|
||||
app.router.add_post(self._webhook_path, self._handle_notification)
|
||||
|
||||
self._runner = web.AppRunner(app)
|
||||
await self._runner.setup()
|
||||
site = web.TCPSite(self._runner, self._host, self._port)
|
||||
await site.start()
|
||||
self._mark_connected()
|
||||
logger.info(
|
||||
"[msgraph_webhook] Listening on %s:%d%s",
|
||||
self._host,
|
||||
self._port,
|
||||
self._webhook_path,
|
||||
)
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
if self._runner is not None:
|
||||
await self._runner.cleanup()
|
||||
self._runner = None
|
||||
self._mark_disconnected()
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
content: str,
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
logger.info("[msgraph_webhook] Response for %s: %s", chat_id, content[:200])
|
||||
return SendResult(success=True)
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
return {"name": chat_id, "type": "webhook"}
|
||||
|
||||
async def _handle_health(self, request: "web.Request") -> "web.Response":
|
||||
return web.json_response(
|
||||
{
|
||||
"status": "ok",
|
||||
"platform": self.platform.value,
|
||||
"webhook_path": self._webhook_path,
|
||||
"accepted": self._accepted_count,
|
||||
"duplicates": self._duplicate_count,
|
||||
}
|
||||
)
|
||||
|
||||
async def _handle_validation(self, request: "web.Request") -> "web.Response":
|
||||
"""Handle Microsoft Graph subscription validation handshake.
|
||||
|
||||
Graph validates a subscription endpoint by sending a GET with
|
||||
``validationToken`` in the query string; the service must echo the
|
||||
token verbatim as ``text/plain`` within 10 seconds. Anything else
|
||||
(bare GET, GET without the token) is rejected so the endpoint can't
|
||||
be enumerated or mistakenly used for data exfiltration.
|
||||
"""
|
||||
if not self._source_ip_allowed(request):
|
||||
return web.Response(status=403)
|
||||
validation_token = request.query.get("validationToken", "")
|
||||
if not validation_token:
|
||||
return web.Response(status=400)
|
||||
return web.Response(text=validation_token, content_type="text/plain")
|
||||
|
||||
async def _handle_notification(self, request: "web.Request") -> "web.Response":
|
||||
if not self._source_ip_allowed(request):
|
||||
return web.Response(status=403)
|
||||
|
||||
# Graph never sends validationToken on POST, but tolerate it for
|
||||
# defensive clients that replay the handshake in-band.
|
||||
validation_token = request.query.get("validationToken", "")
|
||||
if validation_token:
|
||||
return web.Response(text=validation_token, content_type="text/plain")
|
||||
|
||||
try:
|
||||
body = await request.json()
|
||||
except Exception:
|
||||
return web.Response(status=400)
|
||||
|
||||
notifications = body.get("value")
|
||||
if not isinstance(notifications, list):
|
||||
return web.Response(status=400)
|
||||
|
||||
accepted = 0
|
||||
duplicates = 0
|
||||
auth_rejected = 0
|
||||
other_rejected = 0
|
||||
|
||||
for raw_notification in notifications:
|
||||
if not isinstance(raw_notification, dict):
|
||||
other_rejected += 1
|
||||
continue
|
||||
notification = dict(raw_notification)
|
||||
if not self._resource_accepted(str(notification.get("resource") or "")):
|
||||
other_rejected += 1
|
||||
continue
|
||||
if not self._verify_client_state(notification):
|
||||
# Treat bad clientState as an auth failure: if the whole
|
||||
# batch is forged, we want to signal 403 so the sender
|
||||
# stops retrying. Legitimate Graph retries have valid
|
||||
# clientState and hit the accepted/duplicate paths.
|
||||
auth_rejected += 1
|
||||
continue
|
||||
|
||||
receipt_key = self._build_receipt_key(notification)
|
||||
if receipt_key is not None:
|
||||
if self._has_seen_receipt(receipt_key):
|
||||
duplicates += 1
|
||||
continue
|
||||
self._remember_receipt(receipt_key)
|
||||
|
||||
accepted += 1
|
||||
self._accepted_count += 1
|
||||
event = self._build_message_event(notification, receipt_key)
|
||||
self._schedule_notification(notification, event)
|
||||
|
||||
self._duplicate_count += duplicates
|
||||
# If anything ingested OR deduped, return 202 with empty body so
|
||||
# Graph acks successfully and we don't leak internal counters. If
|
||||
# every item failed auth, return 403 so an attacker POSTing fake
|
||||
# notifications gets a clear reject. Other failures (malformed,
|
||||
# resource-not-accepted) are the sender's configuration problem,
|
||||
# so 400.
|
||||
if accepted or duplicates:
|
||||
return web.Response(status=202)
|
||||
if auth_rejected and not other_rejected:
|
||||
return web.Response(status=403)
|
||||
return web.Response(status=400)
|
||||
|
||||
def _source_ip_allowed(self, request: "web.Request") -> bool:
|
||||
"""Return True if the request's source IP is in the configured allowlist.
|
||||
|
||||
When ``allowed_source_cidrs`` is empty (the default), everything is
|
||||
allowed — preserves behavior for dev tunnels / localhost setups.
|
||||
"""
|
||||
if not self._allowed_source_networks:
|
||||
return True
|
||||
peer = request.remote or ""
|
||||
if not peer:
|
||||
return False
|
||||
try:
|
||||
peer_addr = ipaddress.ip_address(peer)
|
||||
except ValueError:
|
||||
return False
|
||||
return any(peer_addr in network for network in self._allowed_source_networks)
|
||||
|
||||
def _resource_accepted(self, resource: str) -> bool:
|
||||
if not self._accepted_resources:
|
||||
return True
|
||||
normalized_resource = self._normalize_resource_value(resource)
|
||||
for pattern in self._accepted_resources:
|
||||
normalized_pattern = self._normalize_resource_value(pattern)
|
||||
if not normalized_pattern:
|
||||
continue
|
||||
if normalized_pattern.endswith("*"):
|
||||
prefix = normalized_pattern[:-1].rstrip("/")
|
||||
if normalized_resource == prefix or normalized_resource.startswith(f"{prefix}/"):
|
||||
return True
|
||||
continue
|
||||
if (
|
||||
normalized_resource == normalized_pattern
|
||||
or normalized_resource.startswith(f"{normalized_pattern}/")
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _verify_client_state(self, notification: Dict[str, Any]) -> bool:
|
||||
"""Verify the Graph-supplied clientState matches the configured secret.
|
||||
|
||||
Uses ``hmac.compare_digest`` instead of ``==`` so that a mismatch
|
||||
doesn't leak how many leading characters matched via string-compare
|
||||
timing. The configured client_state is a shared secret (documented in
|
||||
the setup guide as "generate with ``openssl rand -hex 32``"), so a
|
||||
timing-safe compare is the right primitive.
|
||||
"""
|
||||
expected = self._client_state
|
||||
if expected is None:
|
||||
return True
|
||||
provided = self._string_or_none(notification.get("clientState"))
|
||||
if provided is None:
|
||||
return False
|
||||
return hmac.compare_digest(provided, expected)
|
||||
|
||||
def _has_seen_receipt(self, receipt_key: str) -> bool:
|
||||
return receipt_key in self._seen_receipts
|
||||
|
||||
def _remember_receipt(self, receipt_key: str) -> None:
|
||||
self._seen_receipts.add(receipt_key)
|
||||
self._seen_receipt_order.append(receipt_key)
|
||||
while len(self._seen_receipt_order) > self._max_seen_receipts:
|
||||
oldest = self._seen_receipt_order.popleft()
|
||||
self._seen_receipts.discard(oldest)
|
||||
|
||||
def _build_message_event(
|
||||
self,
|
||||
notification: Dict[str, Any],
|
||||
receipt_key: Optional[str],
|
||||
) -> MessageEvent:
|
||||
message_id = receipt_key or f"sha1:{sha1(json.dumps(notification, sort_keys=True).encode('utf-8')).hexdigest()}"
|
||||
source = self.build_source(
|
||||
chat_id=f"msgraph:{notification.get('subscriptionId', 'unknown')}",
|
||||
chat_name="msgraph/webhook",
|
||||
chat_type="webhook",
|
||||
user_id="msgraph",
|
||||
user_name="Microsoft Graph",
|
||||
)
|
||||
return MessageEvent(
|
||||
text=self._render_prompt(notification),
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
raw_message=notification,
|
||||
message_id=message_id,
|
||||
internal=True,
|
||||
)
|
||||
|
||||
def _render_prompt(self, notification: Dict[str, Any]) -> str:
|
||||
template = self.config.extra.get("prompt", "")
|
||||
if template:
|
||||
payload = {
|
||||
"notification": notification,
|
||||
"resource": notification.get("resource", ""),
|
||||
"change_type": notification.get("changeType", ""),
|
||||
"subscription_id": notification.get("subscriptionId", ""),
|
||||
}
|
||||
return self._render_template(template, payload)
|
||||
rendered = json.dumps(notification, indent=2, sort_keys=True)[:4000]
|
||||
return f"Microsoft Graph change notification:\n\n```json\n{rendered}\n```"
|
||||
|
||||
def _render_template(self, template: str, payload: Dict[str, Any]) -> str:
|
||||
import re
|
||||
|
||||
def _resolve(match: "re.Match[str]") -> str:
|
||||
key = match.group(1)
|
||||
value: Any = payload
|
||||
for part in key.split("."):
|
||||
if isinstance(value, dict):
|
||||
value = value.get(part, f"{{{key}}}")
|
||||
else:
|
||||
return f"{{{key}}}"
|
||||
if isinstance(value, (dict, list)):
|
||||
return json.dumps(value, sort_keys=True)[:2000]
|
||||
return str(value)
|
||||
|
||||
return re.sub(r"\{([a-zA-Z0-9_.]+)\}", _resolve, template)
|
||||
|
||||
def _schedule_notification(
|
||||
self,
|
||||
notification: Dict[str, Any],
|
||||
event: MessageEvent,
|
||||
) -> None:
|
||||
scheduler = self._notification_scheduler
|
||||
if scheduler is not None:
|
||||
result = scheduler(notification, event)
|
||||
if asyncio.iscoroutine(result):
|
||||
task = asyncio.create_task(result)
|
||||
self._background_tasks.add(task)
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
return
|
||||
|
||||
task = asyncio.create_task(self.handle_message(event))
|
||||
self._background_tasks.add(task)
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
@@ -1,602 +0,0 @@
|
||||
"""QQ Bot chunked upload flow.
|
||||
|
||||
The QQ v2 API caps inline base64 uploads (``file_data`` / ``url``) at ~10 MB.
|
||||
For files between 10 MB and ~100 MB we have to use the three-step chunked
|
||||
upload flow::
|
||||
|
||||
1. POST /v2/{users|groups}/{id}/upload_prepare
|
||||
→ returns upload_id, block_size, and an array of pre-signed COS part URLs.
|
||||
2. For each part:
|
||||
PUT the part bytes to its pre-signed COS URL,
|
||||
then POST /v2/{users|groups}/{id}/upload_part_finish to acknowledge.
|
||||
3. POST /v2/{users|groups}/{id}/files with {"upload_id": ...}
|
||||
→ returns the ``file_info`` token the caller uses in a RichMedia
|
||||
message.
|
||||
|
||||
Error-code semantics (from the QQ Bot v2 API spec):
|
||||
|
||||
- ``40093001`` — ``upload_part_finish`` retryable. Retry until the server-provided
|
||||
``retry_timeout`` elapses (or a local cap).
|
||||
- ``40093002`` — daily cumulative upload quota exceeded. Not retryable; surface
|
||||
as :class:`UploadDailyLimitExceededError` so the caller can build a
|
||||
user-friendly reply.
|
||||
|
||||
Exceptions:
|
||||
|
||||
- :class:`UploadDailyLimitExceededError` — daily quota hit (non-retryable).
|
||||
- :class:`UploadFileTooLargeError` — file exceeds the platform per-file limit.
|
||||
- :class:`RuntimeError` — generic upload failure (network, part PUT, complete).
|
||||
|
||||
Ported from WideLee's qqbot-agent-sdk v1.2.2 (``media_loader.py::ChunkedUploader``)
|
||||
so the heavy-upload path stays in-tree. Authorship preserved via Co-authored-by.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import hashlib
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
||||
|
||||
from gateway.platforms.qqbot.constants import FILE_UPLOAD_TIMEOUT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Error codes ──────────────────────────────────────────────────────
|
||||
_BIZ_CODE_DAILY_LIMIT = 40093002 # upload_prepare: daily cumulative limit
|
||||
_BIZ_CODE_PART_RETRYABLE = 40093001 # upload_part_finish: transient
|
||||
|
||||
# ── Part upload tuning ───────────────────────────────────────────────
|
||||
_DEFAULT_CONCURRENT_PARTS = 1
|
||||
_MAX_CONCURRENT_PARTS = 10
|
||||
|
||||
_PART_UPLOAD_TIMEOUT = 300.0 # 5 minutes per COS PUT
|
||||
_PART_UPLOAD_MAX_RETRIES = 2
|
||||
_PART_FINISH_RETRY_INTERVAL = 1.0
|
||||
_PART_FINISH_DEFAULT_TIMEOUT = 120.0
|
||||
_PART_FINISH_MAX_TIMEOUT = 600.0
|
||||
|
||||
_COMPLETE_UPLOAD_MAX_RETRIES = 2
|
||||
_COMPLETE_UPLOAD_BASE_DELAY = 2.0
|
||||
|
||||
# First 10,002,432 bytes used for the ``md5_10m`` hash (per QQ API spec).
|
||||
_MD5_10M_SIZE = 10_002_432
|
||||
|
||||
|
||||
# ── Exceptions ───────────────────────────────────────────────────────
|
||||
|
||||
class UploadDailyLimitExceededError(Exception):
|
||||
"""Raised when ``upload_prepare`` returns biz_code 40093002.
|
||||
|
||||
The daily cumulative upload quota for this bot has been reached. Callers
|
||||
should surface :attr:`file_name` + :attr:`file_size_human` so the model
|
||||
can compose a helpful reply.
|
||||
"""
|
||||
|
||||
def __init__(self, file_name: str, file_size: int, message: str = "") -> None:
|
||||
self.file_name = file_name
|
||||
self.file_size = file_size
|
||||
super().__init__(
|
||||
message or f"Daily upload limit exceeded for {file_name!r}"
|
||||
)
|
||||
|
||||
@property
|
||||
def file_size_human(self) -> str:
|
||||
return format_size(self.file_size)
|
||||
|
||||
|
||||
class UploadFileTooLargeError(Exception):
|
||||
"""Raised when a file exceeds the platform per-file size limit."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file_name: str,
|
||||
file_size: int,
|
||||
limit_bytes: int = 0,
|
||||
message: str = "",
|
||||
) -> None:
|
||||
self.file_name = file_name
|
||||
self.file_size = file_size
|
||||
self.limit_bytes = limit_bytes
|
||||
limit_str = f" ({format_size(limit_bytes)})" if limit_bytes else ""
|
||||
super().__init__(
|
||||
message
|
||||
or (
|
||||
f"File {file_name!r} ({format_size(file_size)}) "
|
||||
f"exceeds platform limit{limit_str}"
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def file_size_human(self) -> str:
|
||||
return format_size(self.file_size)
|
||||
|
||||
@property
|
||||
def limit_human(self) -> str:
|
||||
return format_size(self.limit_bytes) if self.limit_bytes else "unknown"
|
||||
|
||||
|
||||
# ── Progress tracking ────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class _UploadProgress:
|
||||
total_parts: int = 0
|
||||
total_bytes: int = 0
|
||||
completed_parts: int = 0
|
||||
uploaded_bytes: int = 0
|
||||
|
||||
|
||||
# ── Prepare-response shape ───────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class _PreparePart:
|
||||
index: int
|
||||
presigned_url: str
|
||||
block_size: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class _PrepareResult:
|
||||
upload_id: str
|
||||
block_size: int
|
||||
parts: List[_PreparePart]
|
||||
concurrency: int = _DEFAULT_CONCURRENT_PARTS
|
||||
retry_timeout: float = 0.0
|
||||
|
||||
|
||||
def _parse_prepare_response(raw: Dict[str, Any]) -> _PrepareResult:
|
||||
"""Parse the upload_prepare API response into a normalized shape.
|
||||
|
||||
The API may return the response directly or wrapped in ``data``.
|
||||
"""
|
||||
src = raw.get("data") if isinstance(raw.get("data"), dict) else raw
|
||||
upload_id = str(src.get("upload_id", ""))
|
||||
if not upload_id:
|
||||
raise ValueError(
|
||||
f"upload_prepare response missing upload_id: {str(raw)[:200]}"
|
||||
)
|
||||
block_size = int(src.get("block_size", 0))
|
||||
raw_parts = src.get("parts") or src.get("part_list") or []
|
||||
if not isinstance(raw_parts, list) or not raw_parts:
|
||||
raise ValueError(
|
||||
f"upload_prepare response missing parts: {str(raw)[:200]}"
|
||||
)
|
||||
parts: List[_PreparePart] = []
|
||||
for p in raw_parts:
|
||||
if not isinstance(p, dict):
|
||||
continue
|
||||
parts.append(
|
||||
_PreparePart(
|
||||
index=int(p.get("part_index") or p.get("index") or 0),
|
||||
presigned_url=str(
|
||||
p.get("presigned_url") or p.get("url") or ""
|
||||
),
|
||||
block_size=int(p.get("block_size", 0)),
|
||||
)
|
||||
)
|
||||
return _PrepareResult(
|
||||
upload_id=upload_id,
|
||||
block_size=block_size,
|
||||
parts=parts,
|
||||
concurrency=int(src.get("concurrency", _DEFAULT_CONCURRENT_PARTS)) or _DEFAULT_CONCURRENT_PARTS,
|
||||
retry_timeout=float(src.get("retry_timeout", 0.0) or 0.0),
|
||||
)
|
||||
|
||||
|
||||
# ── Chunked upload driver ────────────────────────────────────────────
|
||||
|
||||
ApiRequestFn = Callable[..., Awaitable[Dict[str, Any]]]
|
||||
"""Signature of the adapter's ``_api_request`` callable.
|
||||
|
||||
We pass the bound method in rather than importing the adapter, to avoid
|
||||
circular imports and keep this module testable in isolation.
|
||||
"""
|
||||
|
||||
|
||||
class ChunkedUploader:
|
||||
"""Run the prepare → PUT parts → complete sequence.
|
||||
|
||||
:param api_request: Bound ``_api_request(method, path, body=..., timeout=...)``
|
||||
coroutine from the adapter. Must raise ``RuntimeError`` with the biz_code
|
||||
embedded in the message on API errors.
|
||||
:param http_put: Coroutine ``(url, data, headers, timeout) -> response`` for
|
||||
COS part uploads. Typically wraps ``httpx.AsyncClient.put``.
|
||||
:param log_tag: Log prefix.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_request: ApiRequestFn,
|
||||
http_put: Callable[..., Awaitable[Any]],
|
||||
log_tag: str = "QQBot",
|
||||
) -> None:
|
||||
self._api_request = api_request
|
||||
self._http_put = http_put
|
||||
self._log_tag = log_tag
|
||||
|
||||
async def upload(
|
||||
self,
|
||||
chat_type: str,
|
||||
target_id: str,
|
||||
file_path: str,
|
||||
file_type: int,
|
||||
file_name: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run the full chunked upload and return the ``complete_upload`` response.
|
||||
|
||||
:param chat_type: ``'c2c'`` or ``'group'``.
|
||||
:param target_id: User or group openid.
|
||||
:param file_path: Absolute path to a local file.
|
||||
:param file_type: ``MEDIA_TYPE_*`` constant.
|
||||
:param file_name: Original filename (for upload_prepare).
|
||||
:returns: The raw response dict from ``complete_upload`` — contains
|
||||
``file_info`` that the caller uses in a RichMedia message body.
|
||||
:raises UploadDailyLimitExceededError: On biz_code 40093002.
|
||||
:raises UploadFileTooLargeError: When the file exceeds the platform limit.
|
||||
:raises RuntimeError: On other API or I/O failures.
|
||||
"""
|
||||
if chat_type not in {"c2c", "group"}:
|
||||
raise ValueError(
|
||||
f"ChunkedUploader: unsupported chat_type {chat_type!r}"
|
||||
)
|
||||
|
||||
path = Path(file_path)
|
||||
file_size = path.stat().st_size
|
||||
|
||||
logger.info(
|
||||
"[%s] Chunked upload start: file=%s size=%s type=%d",
|
||||
self._log_tag, file_name, format_size(file_size), file_type,
|
||||
)
|
||||
|
||||
# Step 1: compute hashes (blocking I/O → executor).
|
||||
hashes = await asyncio.get_running_loop().run_in_executor(
|
||||
None, _compute_file_hashes, file_path, file_size
|
||||
)
|
||||
|
||||
# Step 2: upload_prepare.
|
||||
prepare = await self._prepare(
|
||||
chat_type, target_id, file_type, file_name, file_size, hashes
|
||||
)
|
||||
max_concurrent = min(prepare.concurrency, _MAX_CONCURRENT_PARTS)
|
||||
retry_timeout = min(
|
||||
prepare.retry_timeout if prepare.retry_timeout > 0 else _PART_FINISH_DEFAULT_TIMEOUT,
|
||||
_PART_FINISH_MAX_TIMEOUT,
|
||||
)
|
||||
logger.info(
|
||||
"[%s] Prepared: upload_id=%s block_size=%s parts=%d concurrency=%d",
|
||||
self._log_tag, prepare.upload_id, format_size(prepare.block_size),
|
||||
len(prepare.parts), max_concurrent,
|
||||
)
|
||||
|
||||
progress = _UploadProgress(
|
||||
total_parts=len(prepare.parts),
|
||||
total_bytes=file_size,
|
||||
)
|
||||
|
||||
# Step 3: PUT each part + notify.
|
||||
tasks: List[Callable[[], Awaitable[None]]] = [
|
||||
functools.partial(
|
||||
self._upload_one_part,
|
||||
chat_type=chat_type,
|
||||
target_id=target_id,
|
||||
file_path=file_path,
|
||||
file_size=file_size,
|
||||
upload_id=prepare.upload_id,
|
||||
rsp_block_size=prepare.block_size,
|
||||
part=part,
|
||||
retry_timeout=retry_timeout,
|
||||
progress=progress,
|
||||
)
|
||||
for part in prepare.parts
|
||||
]
|
||||
await _run_with_concurrency(tasks, max_concurrent)
|
||||
|
||||
logger.info(
|
||||
"[%s] All %d parts uploaded, completing…",
|
||||
self._log_tag, len(prepare.parts),
|
||||
)
|
||||
|
||||
# Step 4: complete_upload (retry on transient errors).
|
||||
return await self._complete(chat_type, target_id, prepare.upload_id)
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# Step 1 — upload_prepare
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
|
||||
async def _prepare(
|
||||
self,
|
||||
chat_type: str,
|
||||
target_id: str,
|
||||
file_type: int,
|
||||
file_name: str,
|
||||
file_size: int,
|
||||
hashes: Dict[str, str],
|
||||
) -> _PrepareResult:
|
||||
base = "/v2/users" if chat_type == "c2c" else "/v2/groups"
|
||||
path = f"{base}/{target_id}/upload_prepare"
|
||||
body = {
|
||||
"file_type": file_type,
|
||||
"file_name": file_name,
|
||||
"file_size": file_size,
|
||||
"md5": hashes["md5"],
|
||||
"sha1": hashes["sha1"],
|
||||
"md5_10m": hashes["md5_10m"],
|
||||
}
|
||||
try:
|
||||
raw = await self._api_request(
|
||||
"POST", path, body=body, timeout=FILE_UPLOAD_TIMEOUT
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
err_msg = str(exc)
|
||||
if f"{_BIZ_CODE_DAILY_LIMIT}" in err_msg:
|
||||
raise UploadDailyLimitExceededError(
|
||||
file_name, file_size, err_msg
|
||||
) from exc
|
||||
raise
|
||||
return _parse_prepare_response(raw)
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# Step 2 — PUT one part + part_finish
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
|
||||
async def _upload_one_part(
|
||||
self,
|
||||
chat_type: str,
|
||||
target_id: str,
|
||||
file_path: str,
|
||||
file_size: int,
|
||||
upload_id: str,
|
||||
rsp_block_size: int,
|
||||
part: _PreparePart,
|
||||
retry_timeout: float,
|
||||
progress: _UploadProgress,
|
||||
) -> None:
|
||||
"""PUT one part to COS, then call ``upload_part_finish``."""
|
||||
part_index = part.index
|
||||
# Per-part block_size wins; fall back to the response-level value.
|
||||
actual_block_size = part.block_size if part.block_size > 0 else rsp_block_size
|
||||
offset = (part_index - 1) * rsp_block_size
|
||||
length = min(actual_block_size, file_size - offset)
|
||||
|
||||
# Read this slice of the file (blocking → executor).
|
||||
data = await asyncio.get_running_loop().run_in_executor(
|
||||
None, _read_file_chunk, file_path, offset, length
|
||||
)
|
||||
md5_hex = hashlib.md5(data).hexdigest()
|
||||
|
||||
logger.debug(
|
||||
"[%s] Part %d/%d: uploading %s (offset=%d md5=%s)",
|
||||
self._log_tag, part_index, progress.total_parts,
|
||||
format_size(length), offset, md5_hex,
|
||||
)
|
||||
|
||||
await self._put_to_presigned_url(
|
||||
part.presigned_url, data, part_index, progress.total_parts
|
||||
)
|
||||
await self._part_finish_with_retry(
|
||||
chat_type, target_id, upload_id,
|
||||
part_index, length, md5_hex, retry_timeout,
|
||||
)
|
||||
|
||||
progress.completed_parts += 1
|
||||
progress.uploaded_bytes += length
|
||||
logger.debug(
|
||||
"[%s] Part %d/%d done (%d/%d total)",
|
||||
self._log_tag, part_index, progress.total_parts,
|
||||
progress.completed_parts, progress.total_parts,
|
||||
)
|
||||
|
||||
async def _put_to_presigned_url(
|
||||
self,
|
||||
url: str,
|
||||
data: bytes,
|
||||
part_index: int,
|
||||
total_parts: int,
|
||||
) -> None:
|
||||
"""PUT part data to a pre-signed COS URL with retry."""
|
||||
last_exc: Optional[Exception] = None
|
||||
for attempt in range(_PART_UPLOAD_MAX_RETRIES + 1):
|
||||
try:
|
||||
resp = await asyncio.wait_for(
|
||||
self._http_put(
|
||||
url,
|
||||
data=data,
|
||||
headers={"Content-Length": str(len(data))},
|
||||
),
|
||||
timeout=_PART_UPLOAD_TIMEOUT,
|
||||
)
|
||||
# Caller's http_put is expected to return an httpx-like response.
|
||||
status = getattr(resp, "status_code", 0)
|
||||
if 200 <= status < 300:
|
||||
logger.debug(
|
||||
"[%s] PUT part %d/%d: %d OK",
|
||||
self._log_tag, part_index, total_parts, status,
|
||||
)
|
||||
return
|
||||
body_preview = ""
|
||||
try:
|
||||
body_preview = getattr(resp, "text", "")[:200]
|
||||
except Exception: # pragma: no cover — defensive
|
||||
pass
|
||||
raise RuntimeError(
|
||||
f"COS PUT returned {status}: {body_preview}"
|
||||
)
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
if attempt < _PART_UPLOAD_MAX_RETRIES:
|
||||
delay = 1.0 * (2 ** attempt)
|
||||
logger.warning(
|
||||
"[%s] PUT part %d/%d attempt %d failed, retry in %.1fs: %s",
|
||||
self._log_tag, part_index, total_parts,
|
||||
attempt + 1, delay, exc,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
raise RuntimeError(
|
||||
f"Part {part_index}/{total_parts} upload failed after "
|
||||
f"{_PART_UPLOAD_MAX_RETRIES + 1} attempts: {last_exc}"
|
||||
)
|
||||
|
||||
async def _part_finish_with_retry(
|
||||
self,
|
||||
chat_type: str,
|
||||
target_id: str,
|
||||
upload_id: str,
|
||||
part_index: int,
|
||||
block_size: int,
|
||||
md5: str,
|
||||
retry_timeout: float,
|
||||
) -> None:
|
||||
"""Call ``upload_part_finish``, retrying on biz_code 40093001."""
|
||||
base = "/v2/users" if chat_type == "c2c" else "/v2/groups"
|
||||
path = f"{base}/{target_id}/upload_part_finish"
|
||||
body = {
|
||||
"upload_id": upload_id,
|
||||
"part_index": part_index,
|
||||
"block_size": block_size,
|
||||
"md5": md5,
|
||||
}
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
start = loop.time()
|
||||
attempt = 0
|
||||
while True:
|
||||
try:
|
||||
await self._api_request(
|
||||
"POST", path, body=body, timeout=FILE_UPLOAD_TIMEOUT
|
||||
)
|
||||
return
|
||||
except RuntimeError as exc:
|
||||
err_msg = str(exc)
|
||||
if f"{_BIZ_CODE_PART_RETRYABLE}" not in err_msg:
|
||||
raise
|
||||
elapsed = loop.time() - start
|
||||
if elapsed >= retry_timeout:
|
||||
raise RuntimeError(
|
||||
f"upload_part_finish persistent retry timed out "
|
||||
f"after {retry_timeout:.0f}s ({attempt} retries): {exc}"
|
||||
) from exc
|
||||
attempt += 1
|
||||
logger.debug(
|
||||
"[%s] part_finish retryable error, attempt %d, "
|
||||
"elapsed=%.1fs: %s",
|
||||
self._log_tag, attempt, elapsed, exc,
|
||||
)
|
||||
await asyncio.sleep(_PART_FINISH_RETRY_INTERVAL)
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
# Step 3 — complete_upload
|
||||
# ──────────────────────────────────────────────────────────────────
|
||||
|
||||
async def _complete(
|
||||
self,
|
||||
chat_type: str,
|
||||
target_id: str,
|
||||
upload_id: str,
|
||||
) -> Dict[str, Any]:
|
||||
"""Call ``complete_upload`` with retry.
|
||||
|
||||
This reuses the ``/files`` endpoint (same as the simple URL-based upload)
|
||||
but signals the chunked-completion path by sending only ``upload_id``.
|
||||
"""
|
||||
base = "/v2/users" if chat_type == "c2c" else "/v2/groups"
|
||||
path = f"{base}/{target_id}/files"
|
||||
body = {"upload_id": upload_id}
|
||||
|
||||
last_exc: Optional[Exception] = None
|
||||
for attempt in range(_COMPLETE_UPLOAD_MAX_RETRIES + 1):
|
||||
try:
|
||||
return await self._api_request(
|
||||
"POST", path, body=body, timeout=FILE_UPLOAD_TIMEOUT
|
||||
)
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
if attempt < _COMPLETE_UPLOAD_MAX_RETRIES:
|
||||
delay = _COMPLETE_UPLOAD_BASE_DELAY * (2 ** attempt)
|
||||
logger.warning(
|
||||
"[%s] complete_upload attempt %d failed, "
|
||||
"retry in %.1fs: %s",
|
||||
self._log_tag, attempt + 1, delay, exc,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
raise RuntimeError(
|
||||
f"complete_upload failed after "
|
||||
f"{_COMPLETE_UPLOAD_MAX_RETRIES + 1} attempts: {last_exc}"
|
||||
)
|
||||
|
||||
|
||||
# ── Helpers (module-level for testability) ───────────────────────────
|
||||
|
||||
def format_size(size_bytes: int) -> str:
|
||||
"""Return a human-readable file size string (e.g. ``'12.3 MB'``)."""
|
||||
size = float(size_bytes)
|
||||
for unit in ("B", "KB", "MB", "GB"):
|
||||
if size < 1024.0:
|
||||
return f"{size:.1f} {unit}"
|
||||
size /= 1024.0
|
||||
return f"{size:.1f} TB"
|
||||
|
||||
|
||||
def _read_file_chunk(file_path: str, offset: int, length: int) -> bytes:
|
||||
"""Read *length* bytes from *file_path* starting at *offset*.
|
||||
|
||||
:raises IOError: If fewer bytes were read than expected (truncated file).
|
||||
"""
|
||||
with open(file_path, "rb") as fh:
|
||||
fh.seek(offset)
|
||||
data = fh.read(length)
|
||||
if len(data) != length:
|
||||
raise IOError(
|
||||
f"Short read from {file_path}: expected {length} bytes at "
|
||||
f"offset {offset}, got {len(data)} (file may be truncated)"
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def _compute_file_hashes(file_path: str, file_size: int) -> Dict[str, str]:
|
||||
"""Compute md5, sha1, and md5_10m in a single pass."""
|
||||
md5 = hashlib.md5()
|
||||
sha1 = hashlib.sha1()
|
||||
md5_10m = hashlib.md5()
|
||||
|
||||
need_10m = file_size > _MD5_10M_SIZE
|
||||
bytes_read = 0
|
||||
|
||||
with open(file_path, "rb") as fh:
|
||||
while True:
|
||||
chunk = fh.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
md5.update(chunk)
|
||||
sha1.update(chunk)
|
||||
if need_10m:
|
||||
remaining = _MD5_10M_SIZE - bytes_read
|
||||
if remaining > 0:
|
||||
md5_10m.update(chunk[:remaining])
|
||||
bytes_read += len(chunk)
|
||||
|
||||
full_md5 = md5.hexdigest()
|
||||
return {
|
||||
"md5": full_md5,
|
||||
"sha1": sha1.hexdigest(),
|
||||
# For small files the "10m" hash is just the full md5.
|
||||
"md5_10m": md5_10m.hexdigest() if need_10m else full_md5,
|
||||
}
|
||||
|
||||
|
||||
async def _run_with_concurrency(
|
||||
tasks: List[Callable[[], Awaitable[None]]],
|
||||
concurrency: int,
|
||||
) -> None:
|
||||
"""Run a list of thunks with a bounded number in flight at once."""
|
||||
concurrency = max(concurrency, 1)
|
||||
sem = asyncio.Semaphore(concurrency)
|
||||
|
||||
async def _wrap(thunk: Callable[[], Awaitable[None]]) -> None:
|
||||
async with sem:
|
||||
await thunk()
|
||||
|
||||
await asyncio.gather(*(_wrap(t) for t in tasks))
|
||||
@@ -1,473 +0,0 @@
|
||||
"""QQ Bot inline keyboards + approval / update-prompt senders.
|
||||
|
||||
QQ Bot v2 supports attaching inline keyboards to outbound messages. When a
|
||||
user clicks a button, the platform dispatches an ``INTERACTION_CREATE``
|
||||
gateway event containing the button's ``data`` payload. The bot must ACK the
|
||||
interaction promptly via ``PUT /interactions/{id}`` or the user sees an
|
||||
error indicator on the button.
|
||||
|
||||
This module provides:
|
||||
|
||||
- :class:`InlineKeyboard` + button dataclasses — serialized into the
|
||||
``keyboard`` field of the outbound message body.
|
||||
- :func:`build_approval_keyboard` — 3-button ✅ once / ⭐ always / ❌ deny
|
||||
keyboard for tool-approval flows.
|
||||
- :func:`build_update_prompt_keyboard` — Yes/No keyboard for update confirms.
|
||||
- :func:`parse_approval_button_data` / :func:`parse_update_prompt_button_data`
|
||||
— decode the ``button_data`` payload from ``INTERACTION_CREATE``.
|
||||
- :class:`ApprovalRequest` + :class:`ApprovalSender` — high-level helper that
|
||||
builds an approval message with keyboard and posts it to a c2c / group chat.
|
||||
|
||||
``button_data`` formats::
|
||||
|
||||
approve:<session_key>:<decision> # decision = allow-once|allow-always|deny
|
||||
update_prompt:<answer> # answer = y|n
|
||||
|
||||
Ported from WideLee's qqbot-agent-sdk v1.2.2 (``approval.py`` + ``dto.py``
|
||||
keyboard types). Authorship preserved via Co-authored-by.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Awaitable, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── button_data prefixes + patterns ──────────────────────────────────
|
||||
|
||||
APPROVAL_BUTTON_PREFIX = "approve:"
|
||||
UPDATE_PROMPT_PREFIX = "update_prompt:"
|
||||
|
||||
# Pattern: approve:<session_key>:<decision>
|
||||
# session_key may itself contain colons (e.g. agent:main:qqbot:c2c:OPENID),
|
||||
# so the session_key group is greedy but trails the decision.
|
||||
_APPROVAL_DATA_RE = re.compile(
|
||||
r"^approve:(.+):(allow-once|allow-always|deny)$"
|
||||
)
|
||||
|
||||
# Pattern: update_prompt:y | update_prompt:n
|
||||
_UPDATE_PROMPT_RE = re.compile(r"^update_prompt:(y|n)$")
|
||||
|
||||
|
||||
# ── Keyboard dataclasses ─────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class KeyboardButtonPermission:
|
||||
"""Button permission metadata. ``type=2`` means all users can click."""
|
||||
type: int = 2
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"type": self.type}
|
||||
|
||||
|
||||
@dataclass
|
||||
class KeyboardButtonAction:
|
||||
"""What happens when the button is clicked.
|
||||
|
||||
:param type: ``1`` (Callback — triggers ``INTERACTION_CREATE``) or
|
||||
``2`` (Link — opens a URL).
|
||||
:param data: Payload delivered in ``data.resolved.button_data`` when
|
||||
``type=1``.
|
||||
:param permission: :class:`KeyboardButtonPermission`.
|
||||
:param click_limit: Max clicks per user (``1`` = single-use).
|
||||
"""
|
||||
type: int
|
||||
data: str
|
||||
permission: KeyboardButtonPermission = field(
|
||||
default_factory=KeyboardButtonPermission
|
||||
)
|
||||
click_limit: int = 1
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"type": self.type,
|
||||
"data": self.data,
|
||||
"permission": self.permission.to_dict(),
|
||||
"click_limit": self.click_limit,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class KeyboardButtonRenderData:
|
||||
"""Visual rendering of a button.
|
||||
|
||||
:param label: Pre-click label.
|
||||
:param visited_label: Post-click label (button stays greyed in place).
|
||||
:param style: ``0`` = grey, ``1`` = blue.
|
||||
"""
|
||||
label: str
|
||||
visited_label: str
|
||||
style: int = 1
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"label": self.label,
|
||||
"visited_label": self.visited_label,
|
||||
"style": self.style,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class KeyboardButton:
|
||||
"""One button in a keyboard.
|
||||
|
||||
:param group_id: Buttons sharing a ``group_id`` are mutually exclusive —
|
||||
clicking one greys the rest.
|
||||
"""
|
||||
id: str
|
||||
render_data: KeyboardButtonRenderData
|
||||
action: KeyboardButtonAction
|
||||
group_id: str = "default"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"id": self.id,
|
||||
"render_data": self.render_data.to_dict(),
|
||||
"action": self.action.to_dict(),
|
||||
"group_id": self.group_id,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class KeyboardRow:
|
||||
buttons: List[KeyboardButton] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"buttons": [b.to_dict() for b in self.buttons]}
|
||||
|
||||
|
||||
@dataclass
|
||||
class KeyboardContent:
|
||||
rows: List[KeyboardRow] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"rows": [r.to_dict() for r in self.rows]}
|
||||
|
||||
|
||||
@dataclass
|
||||
class InlineKeyboard:
|
||||
"""Top-level keyboard payload — goes into ``MessageToCreate.keyboard``."""
|
||||
content: KeyboardContent = field(default_factory=KeyboardContent)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {"content": self.content.to_dict()}
|
||||
|
||||
|
||||
# ── INTERACTION_CREATE parsing ───────────────────────────────────────
|
||||
|
||||
def parse_approval_button_data(button_data: str) -> Optional[tuple[str, str]]:
|
||||
"""Parse approval ``button_data`` into ``(session_key, decision)``.
|
||||
|
||||
:param button_data: Raw ``data.resolved.button_data`` from
|
||||
``INTERACTION_CREATE``.
|
||||
:returns: ``(session_key, decision)`` or ``None`` if not an approval button.
|
||||
"""
|
||||
m = _APPROVAL_DATA_RE.match(button_data or "")
|
||||
if not m:
|
||||
return None
|
||||
return m.group(1), m.group(2)
|
||||
|
||||
|
||||
def parse_update_prompt_button_data(button_data: str) -> Optional[str]:
|
||||
"""Parse update-prompt ``button_data`` into ``'y'`` or ``'n'``."""
|
||||
m = _UPDATE_PROMPT_RE.match(button_data or "")
|
||||
if not m:
|
||||
return None
|
||||
return m.group(1)
|
||||
|
||||
|
||||
# ── Keyboard builders ────────────────────────────────────────────────
|
||||
|
||||
def _make_callback_button(
|
||||
btn_id: str,
|
||||
label: str,
|
||||
visited_label: str,
|
||||
data: str,
|
||||
style: int,
|
||||
group_id: str,
|
||||
) -> KeyboardButton:
|
||||
return KeyboardButton(
|
||||
id=btn_id,
|
||||
render_data=KeyboardButtonRenderData(
|
||||
label=label,
|
||||
visited_label=visited_label,
|
||||
style=style,
|
||||
),
|
||||
action=KeyboardButtonAction(type=1, data=data),
|
||||
group_id=group_id,
|
||||
)
|
||||
|
||||
|
||||
def build_approval_keyboard(session_key: str) -> InlineKeyboard:
|
||||
"""Build the 3-button approval keyboard.
|
||||
|
||||
Layout: ``[✅ 允许一次] [⭐ 始终允许] [❌ 拒绝]`` — all three share
|
||||
``group_id='approval'`` so clicking one greys out the rest.
|
||||
|
||||
:param session_key: Embedded into ``button_data`` so the decision
|
||||
routes back to the right pending approval.
|
||||
"""
|
||||
return InlineKeyboard(
|
||||
content=KeyboardContent(
|
||||
rows=[
|
||||
KeyboardRow(buttons=[
|
||||
_make_callback_button(
|
||||
btn_id="allow",
|
||||
label="✅ 允许一次",
|
||||
visited_label="已允许",
|
||||
data=f"{APPROVAL_BUTTON_PREFIX}{session_key}:allow-once",
|
||||
style=1,
|
||||
group_id="approval",
|
||||
),
|
||||
_make_callback_button(
|
||||
btn_id="always",
|
||||
label="⭐ 始终允许",
|
||||
visited_label="已始终允许",
|
||||
data=f"{APPROVAL_BUTTON_PREFIX}{session_key}:allow-always",
|
||||
style=1,
|
||||
group_id="approval",
|
||||
),
|
||||
_make_callback_button(
|
||||
btn_id="deny",
|
||||
label="❌ 拒绝",
|
||||
visited_label="已拒绝",
|
||||
data=f"{APPROVAL_BUTTON_PREFIX}{session_key}:deny",
|
||||
style=0,
|
||||
group_id="approval",
|
||||
),
|
||||
]),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def build_update_prompt_keyboard() -> InlineKeyboard:
|
||||
"""Build a Yes/No keyboard for update confirmation prompts."""
|
||||
return InlineKeyboard(
|
||||
content=KeyboardContent(
|
||||
rows=[
|
||||
KeyboardRow(buttons=[
|
||||
_make_callback_button(
|
||||
btn_id="yes",
|
||||
label="✓ 确认",
|
||||
visited_label="已确认",
|
||||
data=f"{UPDATE_PROMPT_PREFIX}y",
|
||||
style=1,
|
||||
group_id="update_prompt",
|
||||
),
|
||||
_make_callback_button(
|
||||
btn_id="no",
|
||||
label="✗ 取消",
|
||||
visited_label="已取消",
|
||||
data=f"{UPDATE_PROMPT_PREFIX}n",
|
||||
style=0,
|
||||
group_id="update_prompt",
|
||||
),
|
||||
]),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# ── ApprovalRequest + text builder ───────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class ApprovalRequest:
|
||||
"""Structured approval-request display data.
|
||||
|
||||
:param session_key: Routes the decision back to the waiting caller.
|
||||
:param title: Short title at the top.
|
||||
:param description: Optional longer description.
|
||||
:param command_preview: Command text (exec approvals).
|
||||
:param cwd: Working directory (exec approvals).
|
||||
:param tool_name: Tool name (plugin approvals).
|
||||
:param severity: ``'critical' | 'info' | ''``.
|
||||
:param timeout_sec: Seconds until the approval expires.
|
||||
"""
|
||||
session_key: str
|
||||
title: str
|
||||
description: str = ""
|
||||
command_preview: str = ""
|
||||
cwd: str = ""
|
||||
tool_name: str = ""
|
||||
severity: str = ""
|
||||
timeout_sec: int = 120
|
||||
|
||||
|
||||
def build_approval_text(req: ApprovalRequest) -> str:
|
||||
"""Render an :class:`ApprovalRequest` into the message body (markdown)."""
|
||||
if req.command_preview or req.cwd:
|
||||
return _build_exec_text(req)
|
||||
return _build_plugin_text(req)
|
||||
|
||||
|
||||
def _build_exec_text(req: ApprovalRequest) -> str:
|
||||
lines: List[str] = ["🔐 **命令执行审批**", ""]
|
||||
if req.command_preview:
|
||||
preview = req.command_preview[:300]
|
||||
lines.append(f"```\n{preview}\n```")
|
||||
if req.cwd:
|
||||
lines.append(f"📁 目录: {req.cwd}")
|
||||
if req.title and req.title != req.command_preview:
|
||||
lines.append(f"📋 {req.title}")
|
||||
if req.description:
|
||||
lines.append(f"📝 {req.description}")
|
||||
lines.append("")
|
||||
lines.append(f"⏱️ 超时: {req.timeout_sec} 秒")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_plugin_text(req: ApprovalRequest) -> str:
|
||||
icon = (
|
||||
"🔴" if req.severity == "critical"
|
||||
else "🔵" if req.severity == "info"
|
||||
else "🟡"
|
||||
)
|
||||
lines: List[str] = [f"{icon} **审批请求**", ""]
|
||||
lines.append(f"📋 {req.title}")
|
||||
if req.description:
|
||||
lines.append(f"📝 {req.description}")
|
||||
if req.tool_name:
|
||||
lines.append(f"🔧 工具: {req.tool_name}")
|
||||
lines.append("")
|
||||
lines.append(f"⏱️ 超时: {req.timeout_sec} 秒")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── ApprovalSender ───────────────────────────────────────────────────
|
||||
|
||||
PostMessageFn = Callable[..., Awaitable[Dict[str, Any]]]
|
||||
"""Signature of an async POST to ``/v2/{users|groups}/{id}/messages``.
|
||||
|
||||
Implementations accept a body dict and return the raw API response.
|
||||
"""
|
||||
|
||||
|
||||
class ApprovalSender:
|
||||
"""Send an approval-request message with an inline keyboard.
|
||||
|
||||
Decoupled from the adapter via callables so it can be unit-tested in
|
||||
isolation. Pass the adapter's ``_send_message_with_keyboard`` helper
|
||||
(or any equivalent) as ``post_message``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
post_c2c: PostMessageFn,
|
||||
post_group: PostMessageFn,
|
||||
log_tag: str = "QQBot",
|
||||
) -> None:
|
||||
self._post_c2c = post_c2c
|
||||
self._post_group = post_group
|
||||
self._log_tag = log_tag
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_type: str,
|
||||
chat_id: str,
|
||||
req: ApprovalRequest,
|
||||
msg_id: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Send an approval message to *chat_id*.
|
||||
|
||||
:param chat_type: ``'c2c'`` or ``'group'``.
|
||||
:param chat_id: User openid or group openid.
|
||||
:param req: :class:`ApprovalRequest`.
|
||||
:param msg_id: Reply-to message id (required for passive messages).
|
||||
:returns: ``True`` on success, ``False`` on failure.
|
||||
"""
|
||||
text = build_approval_text(req)
|
||||
keyboard = build_approval_keyboard(req.session_key)
|
||||
|
||||
logger.info(
|
||||
"[%s] Sending approval request to %s:%s (session=%.20s…)",
|
||||
self._log_tag, chat_type, chat_id, req.session_key,
|
||||
)
|
||||
|
||||
try:
|
||||
if chat_type == "c2c":
|
||||
await self._post_c2c(chat_id, text, msg_id, keyboard)
|
||||
elif chat_type == "group":
|
||||
await self._post_group(chat_id, text, msg_id, keyboard)
|
||||
else:
|
||||
logger.warning(
|
||||
"[%s] Approval: unsupported chat_type %r",
|
||||
self._log_tag, chat_type,
|
||||
)
|
||||
return False
|
||||
logger.info(
|
||||
"[%s] Approval message sent to %s:%s",
|
||||
self._log_tag, chat_type, chat_id,
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"[%s] Failed to send approval message to %s:%s: %s",
|
||||
self._log_tag, chat_type, chat_id, exc,
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
# ── INTERACTION_CREATE event shape ───────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class InteractionEvent:
|
||||
"""Parsed ``INTERACTION_CREATE`` event payload.
|
||||
|
||||
See https://bot.q.qq.com/wiki/develop/api-v2/dev-prepare/interface-framework/event-emit.html
|
||||
"""
|
||||
id: str = ""
|
||||
"""Interaction event id — required for the ``PUT /interactions/{id}`` ACK."""
|
||||
|
||||
type: int = 0
|
||||
"""Event type code (``11`` = message button)."""
|
||||
|
||||
chat_type: int = 0
|
||||
"""``0`` = guild, ``1`` = group, ``2`` = c2c."""
|
||||
|
||||
scene: str = ""
|
||||
"""``'guild'`` | ``'group'`` | ``'c2c'`` — human-readable scene."""
|
||||
|
||||
group_openid: str = ""
|
||||
group_member_openid: str = ""
|
||||
user_openid: str = ""
|
||||
channel_id: str = ""
|
||||
guild_id: str = ""
|
||||
|
||||
button_data: str = ""
|
||||
button_id: str = ""
|
||||
resolver_user_id: str = ""
|
||||
|
||||
@property
|
||||
def operator_openid(self) -> str:
|
||||
"""Best available operator openid (group → member; c2c → user)."""
|
||||
return (
|
||||
self.group_member_openid
|
||||
or self.user_openid
|
||||
or self.resolver_user_id
|
||||
)
|
||||
|
||||
|
||||
def parse_interaction_event(raw: Dict[str, Any]) -> InteractionEvent:
|
||||
"""Parse a raw ``INTERACTION_CREATE`` dispatch payload (``d``)."""
|
||||
data_raw = raw.get("data") or {}
|
||||
resolved = data_raw.get("resolved") or {}
|
||||
scene_code = int(raw.get("chat_type", 0) or 0)
|
||||
scene = {0: "guild", 1: "group", 2: "c2c"}.get(scene_code, "")
|
||||
return InteractionEvent(
|
||||
id=str(raw.get("id", "")),
|
||||
type=int(data_raw.get("type", 0) or 0),
|
||||
chat_type=scene_code,
|
||||
scene=scene,
|
||||
group_openid=str(raw.get("group_openid", "")),
|
||||
group_member_openid=str(raw.get("group_member_openid", "")),
|
||||
user_openid=str(raw.get("user_openid", "")),
|
||||
channel_id=str(raw.get("channel_id", "")),
|
||||
guild_id=str(raw.get("guild_id", "")),
|
||||
button_data=str(resolved.get("button_data", "")),
|
||||
button_id=str(resolved.get("button_id", "")),
|
||||
resolver_user_id=str(resolved.get("user_id", "")),
|
||||
)
|
||||
@@ -1,220 +0,0 @@
|
||||
"""
|
||||
QQBot scan-to-configure (QR code onboard) module.
|
||||
|
||||
Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
|
||||
entry-point ``qr_register()`` that handles the full flow (create task →
|
||||
display QR code → poll → decrypt credentials).
|
||||
|
||||
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
|
||||
generate a QR-code URL and poll for scan completion. On success the caller
|
||||
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
|
||||
scanner's *user_openid* — enough to fully configure the QQBot gateway.
|
||||
|
||||
Reference: https://bot.q.qq.com/wiki/develop/api-v2/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from enum import IntEnum
|
||||
from typing import Optional, Tuple
|
||||
from urllib.parse import quote
|
||||
|
||||
from .constants import (
|
||||
ONBOARD_API_TIMEOUT,
|
||||
ONBOARD_CREATE_PATH,
|
||||
ONBOARD_POLL_INTERVAL,
|
||||
ONBOARD_POLL_PATH,
|
||||
PORTAL_HOST,
|
||||
QR_URL_TEMPLATE,
|
||||
)
|
||||
from .crypto import decrypt_secret, generate_bind_key
|
||||
from .utils import get_api_headers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bind status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class BindStatus(IntEnum):
|
||||
"""Status codes returned by ``_poll_bind_result``."""
|
||||
|
||||
NONE = 0
|
||||
PENDING = 1
|
||||
COMPLETED = 2
|
||||
EXPIRED = 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# QR rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
try:
|
||||
import qrcode as _qrcode_mod
|
||||
except (ImportError, TypeError):
|
||||
_qrcode_mod = None # type: ignore[assignment]
|
||||
|
||||
|
||||
def _render_qr(url: str) -> bool:
|
||||
"""Try to render a QR code in the terminal. Returns True if successful."""
|
||||
if _qrcode_mod is None:
|
||||
return False
|
||||
try:
|
||||
qr = _qrcode_mod.QRCode(
|
||||
error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
|
||||
border=2,
|
||||
)
|
||||
qr.add_data(url)
|
||||
qr.make(fit=True)
|
||||
qr.print_ascii(invert=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
|
||||
"""Create a bind task and return *(task_id, aes_key_base64)*.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the API returns a non-zero ``retcode``.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
|
||||
key = generate_bind_key()
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = client.post(url, json={"key": key}, headers=get_api_headers())
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if data.get("retcode") != 0:
|
||||
raise RuntimeError(data.get("msg", "create_bind_task failed"))
|
||||
|
||||
task_id = data.get("data", {}).get("task_id")
|
||||
if not task_id:
|
||||
raise RuntimeError("create_bind_task: missing task_id in response")
|
||||
|
||||
logger.debug("create_bind_task ok: task_id=%s", task_id)
|
||||
return task_id, key
|
||||
|
||||
|
||||
def _poll_bind_result(
|
||||
task_id: str,
|
||||
timeout: float = ONBOARD_API_TIMEOUT,
|
||||
) -> Tuple[BindStatus, str, str, str]:
|
||||
"""Poll the bind result for *task_id*.
|
||||
|
||||
Returns:
|
||||
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the API returns a non-zero ``retcode``.
|
||||
"""
|
||||
import httpx
|
||||
|
||||
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if data.get("retcode") != 0:
|
||||
raise RuntimeError(data.get("msg", "poll_bind_result failed"))
|
||||
|
||||
d = data.get("data", {})
|
||||
return (
|
||||
BindStatus(d.get("status", 0)),
|
||||
str(d.get("bot_appid", "")),
|
||||
d.get("bot_encrypt_secret", ""),
|
||||
d.get("user_openid", ""),
|
||||
)
|
||||
|
||||
|
||||
def build_connect_url(task_id: str) -> str:
|
||||
"""Build the QR-code target URL for a given *task_id*."""
|
||||
return QR_URL_TEMPLATE.format(task_id=quote(task_id))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entry-point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MAX_REFRESHES = 3
|
||||
|
||||
|
||||
def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
|
||||
"""Run the QQBot scan-to-configure QR registration flow.
|
||||
|
||||
Mirrors ``feishu.qr_register()``: handles create → display → poll →
|
||||
decrypt in one call. Unexpected errors propagate to the caller.
|
||||
|
||||
:returns:
|
||||
``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
|
||||
success, or ``None`` on failure / expiry / cancellation.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
|
||||
for refresh_count in range(_MAX_REFRESHES + 1):
|
||||
# ── Create bind task ──
|
||||
try:
|
||||
task_id, aes_key = _create_bind_task()
|
||||
except Exception as exc:
|
||||
logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
|
||||
return None
|
||||
|
||||
url = build_connect_url(task_id)
|
||||
|
||||
# ── Display QR code + URL ──
|
||||
print()
|
||||
if _render_qr(url):
|
||||
print(f" Scan the QR code above, or open this URL directly:\n {url}")
|
||||
else:
|
||||
print(f" Open this URL in QQ on your phone:\n {url}")
|
||||
print(" Tip: pip install qrcode to display a scannable QR code here")
|
||||
print()
|
||||
|
||||
# ── Poll loop ──
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
|
||||
except Exception:
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
if status == BindStatus.COMPLETED:
|
||||
client_secret = decrypt_secret(encrypted_secret, aes_key)
|
||||
print()
|
||||
print(f" QR scan complete! (App ID: {app_id})")
|
||||
if user_openid:
|
||||
print(f" Scanner's OpenID: {user_openid}")
|
||||
return {
|
||||
"app_id": app_id,
|
||||
"client_secret": client_secret,
|
||||
"user_openid": user_openid,
|
||||
}
|
||||
|
||||
if status == BindStatus.EXPIRED:
|
||||
if refresh_count >= _MAX_REFRESHES:
|
||||
logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
|
||||
return None
|
||||
print(f"\n QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
|
||||
break # next for-loop iteration creates a new task
|
||||
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
else:
|
||||
# deadline reached without completing
|
||||
logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
|
||||
return None
|
||||
|
||||
return None
|
||||
@@ -1,369 +0,0 @@
|
||||
"""
|
||||
Signal attachment rate-limit scheduler.
|
||||
|
||||
Process-wide token-bucket simulator that mirrors the per-account
|
||||
attachment rate limit signal-cli/Signal-Server enforce. Producers
|
||||
(``SignalAdapter.send_multiple_images`` and the ``send_message`` tool's
|
||||
Signal path) call ``acquire(n)`` before an attachment send; on a 429
|
||||
they call ``feedback(retry_after, n)`` so the model recalibrates from
|
||||
the server's authoritative hint.
|
||||
|
||||
The scheduler serializes concurrent calls through an ``asyncio.Lock``,
|
||||
giving FIFO fairness across agent sessions sharing one signal-cli
|
||||
daemon.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SIGNAL_MAX_ATTACHMENTS_PER_MSG = 32 # per-message attachment cap (source: Signal-{Android,Desktop} source code)
|
||||
SIGNAL_RATE_LIMIT_BUCKET_CAPACITY = 50 # server-side token-bucket capacity for attachments rate limiting
|
||||
SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER = 4 # fallback token refill interval for signal-cli < v0.14.3
|
||||
SIGNAL_RATE_LIMIT_MAX_ATTEMPTS = 2 # initial attempt + 1 retry
|
||||
SIGNAL_BATCH_PACING_NOTICE_THRESHOLD = 10.0 # if estimated waiting time > 10s, notify the user about the delay
|
||||
SIGNAL_RPC_ERROR_RATELIMIT = -5 # signal-cli (v0.14.3+) JSON-RPC error code for RateLimitException
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SignalRateLimitError(Exception):
|
||||
"""
|
||||
Raised by ``SignalAdapter._rpc`` for rate-limit responses when the
|
||||
caller has opted in via ``raise_on_rate_limit=True``.
|
||||
|
||||
Carries the server-supplied per-token Retry-After (in seconds) on
|
||||
signal-cli ≥ v0.14.3
|
||||
``retry_after`` is None when the version doesn't expose it.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
|
||||
super().__init__(message)
|
||||
self.retry_after = retry_after
|
||||
|
||||
|
||||
class SignalSchedulerError(Exception):
|
||||
pass
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Detection helpers — used to fish a 429 out of signal-cli's various error
|
||||
# shapes (typed code, [429] substring, libsignal-net RetryLaterException
|
||||
# leaked through AttachmentInvalidException).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# "Retry after 4 seconds" / "retry after 4 second" — libsignal-net's
|
||||
# RetryLaterException string form, surfaced when 429s hit during
|
||||
# attachment upload (signal-cli wraps these as AttachmentInvalidException
|
||||
# rather than RateLimitException, so the typed path doesn't fire).
|
||||
_RETRY_AFTER_RE = re.compile(r"Retry after (\d+(?:\.\d+)?)\s*second", re.IGNORECASE)
|
||||
|
||||
|
||||
def _extract_retry_after_seconds(err: Any) -> Optional[float]:
|
||||
"""Pull the per-token Retry-After window from a signal-cli rate-limit error.
|
||||
|
||||
Tries two sources, in order:
|
||||
1. ``error.data.response.results[*].retryAfterSeconds`` — the
|
||||
structured field signal-cli ≥ v0.14.3 surfaces for plain
|
||||
RateLimitException.
|
||||
2. ``"Retry after N seconds"`` parsed out of the message — covers
|
||||
libsignal-net's RetryLaterException that gets wrapped as
|
||||
AttachmentInvalidException during attachment upload, where the
|
||||
structured field stays null.
|
||||
|
||||
Returns None when neither yields a value.
|
||||
"""
|
||||
msg = ""
|
||||
if isinstance(err, dict):
|
||||
data = err.get("data") or {}
|
||||
response = data.get("response") or {}
|
||||
results = response.get("results") or []
|
||||
candidates = [
|
||||
r.get("retryAfterSeconds") for r in results
|
||||
if isinstance(r, dict) and r.get("retryAfterSeconds")
|
||||
]
|
||||
if candidates:
|
||||
return float(max(candidates))
|
||||
msg = str(err.get("message", ""))
|
||||
else:
|
||||
msg = str(err)
|
||||
match = _RETRY_AFTER_RE.search(msg)
|
||||
return float(match.group(1)) if match else None
|
||||
|
||||
|
||||
def _is_signal_rate_limit_error(err: Any) -> bool:
|
||||
"""True if a signal-cli RPC error reflects a rate-limit failure.
|
||||
|
||||
Matches three layers:
|
||||
- typed ``RATELIMIT_ERROR`` code (signal-cli ≥ v0.14.3, plain
|
||||
RateLimitException)
|
||||
- legacy ``[429] / RateLimitException`` substrings
|
||||
- libsignal-net's ``RetryLaterException`` / ``Retry after N seconds``
|
||||
surfaced inside ``AttachmentInvalidException`` when the rate
|
||||
limit is hit during attachment upload — signal-cli never re-tags
|
||||
these as RateLimitException, so substring is the only signal.
|
||||
"""
|
||||
if isinstance(err, dict) and err.get("code") == SIGNAL_RPC_ERROR_RATELIMIT:
|
||||
return True
|
||||
|
||||
message = (
|
||||
str(err.get("message", ""))
|
||||
if isinstance(err, dict)
|
||||
else str(err)
|
||||
)
|
||||
msg_lower = message.lower()
|
||||
return (
|
||||
"[429]" in message
|
||||
or "ratelimit" in msg_lower
|
||||
or "retrylaterexception" in msg_lower
|
||||
or "retry after" in msg_lower
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Misc helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _format_wait(seconds: float) -> str:
|
||||
"""Human-friendly wait label for user-facing pacing notices."""
|
||||
s = max(0.0, seconds)
|
||||
if s < 90:
|
||||
return f"{int(round(s))}s"
|
||||
return f"{max(1, int(round(s / 60)))} min"
|
||||
|
||||
|
||||
def _signal_send_timeout(num_attachments: int) -> float:
|
||||
"""HTTP timeout for a Signal ``send`` RPC.
|
||||
|
||||
signal-cli uploads attachments serially during the call, so the
|
||||
server-side time scales with batch size. Default 30s is fine for
|
||||
text-only sends but truncates large attachment batches mid-upload —
|
||||
we then log a phantom failure even though signal-cli completes the
|
||||
send a few seconds later. Scale at 5s/attachment with a 60s floor.
|
||||
"""
|
||||
if num_attachments <= 0:
|
||||
return 30.0
|
||||
return max(60.0, 5.0 * num_attachments)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scheduler
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SignalAttachmentScheduler:
|
||||
"""Process-wide token-bucket simulator for Signal attachment sends.
|
||||
|
||||
The bucket holds up to ``capacity`` tokens (default 50, matching
|
||||
Signal's server-side rate-limit bucket size). Each attachment consumes one
|
||||
token. Tokens refill at ``refill_rate`` tokens/second, calibrated
|
||||
from the per-token Retry-After hint we get from the server when a
|
||||
429 fires. Until we've observed one, we use the documented default
|
||||
(1 token / 4 seconds).
|
||||
|
||||
Concurrent ``acquire(n)`` calls serialize through an
|
||||
``asyncio.Lock`` — natural FIFO across agent sessions hitting the
|
||||
same daemon.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
capacity: float = float(SIGNAL_RATE_LIMIT_BUCKET_CAPACITY),
|
||||
default_retry_after: float = float(SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER),
|
||||
) -> None:
|
||||
self.capacity = float(capacity)
|
||||
self.tokens = float(capacity)
|
||||
self.refill_rate = 1.0 / float(default_retry_after)
|
||||
self.last_refill = time.monotonic()
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internals
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _refill(self) -> None:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
if elapsed > 0 and self.tokens < self.capacity:
|
||||
self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate)
|
||||
self.last_refill = now
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def estimate_wait(self, n: int) -> float:
|
||||
"""Best-effort estimate of the seconds until ``n`` tokens would
|
||||
be available. Used to decide whether to emit a user-facing
|
||||
pacing notice *before* committing to an ``acquire`` that may
|
||||
block silently. Lock-free; small races vs. concurrent acquires
|
||||
are benign for an informational notice.
|
||||
"""
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
projected = self.tokens
|
||||
if elapsed > 0 and projected < self.capacity:
|
||||
projected = min(self.capacity, projected + elapsed * self.refill_rate)
|
||||
deficit = n - projected
|
||||
if deficit <= 0:
|
||||
return 0.0
|
||||
return deficit / self.refill_rate
|
||||
|
||||
async def acquire(self, n: int) -> float:
|
||||
"""Block until at least ``n`` tokens are available, return the
|
||||
seconds slept.
|
||||
|
||||
Does **not** deduct tokens — the bucket is a read-only model of
|
||||
server-side capacity. Call ``report_rpc_duration()`` after the
|
||||
RPC to synchronise the model with the server timeline.
|
||||
|
||||
Not perfect in case lots of coroutines try to acquire for big
|
||||
uploads (``report_rpc_duration`` will take a long time to get hit)
|
||||
but this is just a simulation. Signal server is ground truth and
|
||||
will raise rate-limit exceptions triggering requeues.
|
||||
|
||||
The lock is released during ``asyncio.sleep`` so other callers
|
||||
can interleave. A retry loop re-checks after each sleep in
|
||||
case the deadline was pessimistic.
|
||||
"""
|
||||
if n <= 0:
|
||||
return 0.0
|
||||
if n > self.capacity:
|
||||
raise SignalSchedulerError(
|
||||
f"Signal scheduler was called requesting {n} tokens "
|
||||
f"(max is {self.capacity})",
|
||||
)
|
||||
|
||||
total_slept = 0.0
|
||||
first_pass = True
|
||||
while True:
|
||||
async with self._lock:
|
||||
self._refill()
|
||||
if self.tokens >= n:
|
||||
if not first_pass or total_slept > 0:
|
||||
logger.debug(
|
||||
"Signal scheduler: tokens sufficient for %d "
|
||||
"(remaining=%.1f, total_slept=%.1fs)",
|
||||
n, self.tokens, total_slept,
|
||||
)
|
||||
return total_slept
|
||||
deficit = n - self.tokens
|
||||
wait = deficit / self.refill_rate
|
||||
if first_pass:
|
||||
logger.info(
|
||||
"Signal scheduler: pausing %.1fs for %d tokens "
|
||||
"(available=%.1f, deficit=%.1f, refill=%.4f/s ≈ %.1fs/token)",
|
||||
wait, n, self.tokens, deficit,
|
||||
self.refill_rate, 1.0 / self.refill_rate,
|
||||
)
|
||||
first_pass = False
|
||||
await asyncio.sleep(wait)
|
||||
total_slept += wait
|
||||
|
||||
async def report_rpc_duration(self, rpc_duration: float, n_attachments: int) -> None:
|
||||
"""Record an attachment-send RPC that just completed.
|
||||
|
||||
Deducts ``n_attachments`` tokens without crediting refill during
|
||||
the upload window. Signal's server checks the bucket at RPC start
|
||||
and does *not* refill during request processing — refill resumes
|
||||
after the response. Crediting upload-time refill causes cumulative
|
||||
drift that eventually triggers 429s.
|
||||
|
||||
Advances ``last_refill`` so the next ``acquire`` / ``_refill``
|
||||
starts counting from this point.
|
||||
"""
|
||||
if n_attachments <= 0:
|
||||
return
|
||||
|
||||
async with self._lock:
|
||||
now = time.monotonic()
|
||||
token_before = self.tokens
|
||||
self.tokens = max(0.0, token_before - float(n_attachments))
|
||||
self.last_refill = now
|
||||
logger.log(
|
||||
logging.INFO if rpc_duration > 10 and n_attachments > 5 else logging.DEBUG,
|
||||
"Signal scheduler: RPC for %d att took %.1fs — "
|
||||
"tokens %.1f → %.1f (deducted=%d, no upload refill credited, refill=%.4fs⁻¹)",
|
||||
n_attachments, rpc_duration,
|
||||
token_before, self.tokens,
|
||||
n_attachments, self.refill_rate,
|
||||
)
|
||||
|
||||
def feedback(self, retry_after: Optional[float], n_attempted: int) -> None:
|
||||
"""Apply server feedback after a 429.
|
||||
|
||||
``retry_after`` is the per-*token* refill window the server
|
||||
reports (None when signal-cli is older than v0.14.3 and didn't
|
||||
surface it).
|
||||
|
||||
When present we calibrate ``refill_rate`` from it:
|
||||
the server is authoritative.
|
||||
"""
|
||||
if retry_after and retry_after > 0:
|
||||
new_rate = 1.0 / float(retry_after)
|
||||
if new_rate != self.refill_rate:
|
||||
logger.info(
|
||||
"Signal scheduler: calibrating refill_rate to %.4f tokens/sec "
|
||||
"(server retry_after=%.1fs per token)",
|
||||
new_rate, retry_after,
|
||||
)
|
||||
self.refill_rate = new_rate
|
||||
self.tokens = 0.0
|
||||
self.last_refill = time.monotonic()
|
||||
|
||||
def state(self) -> dict:
|
||||
"""Return current scheduler state for diagnostic logging (read-only).
|
||||
|
||||
Does not advance ``last_refill`` — safe to call from logging paths
|
||||
without perturbing the bucket.
|
||||
"""
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
projected = self.tokens
|
||||
if elapsed > 0 and projected < self.capacity:
|
||||
projected = min(self.capacity, projected + elapsed * self.refill_rate)
|
||||
return {
|
||||
"tokens": round(projected, 1),
|
||||
"capacity": int(self.capacity),
|
||||
"refill_rate": round(self.refill_rate, 4),
|
||||
"refill_seconds_per_token": round(1.0 / self.refill_rate, 1) if self.refill_rate > 0 else float("inf"),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-wide singleton
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_scheduler: Optional[SignalAttachmentScheduler] = None
|
||||
|
||||
|
||||
def get_scheduler() -> SignalAttachmentScheduler:
|
||||
"""Return the process-wide scheduler, creating it on first access."""
|
||||
global _scheduler
|
||||
if _scheduler is None:
|
||||
_scheduler = SignalAttachmentScheduler()
|
||||
logger.info(
|
||||
"Signal scheduler: created (capacity=%d tokens, refill=%.4f/s ≈ %.1fs/token)",
|
||||
int(_scheduler.capacity),
|
||||
_scheduler.refill_rate,
|
||||
1.0 / _scheduler.refill_rate,
|
||||
)
|
||||
return _scheduler
|
||||
|
||||
|
||||
def _reset_scheduler() -> None:
|
||||
"""Drop the cached scheduler so the next ``get_scheduler`` call
|
||||
builds a fresh one. Test-only — never call from production paths."""
|
||||
global _scheduler
|
||||
_scheduler = None
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,645 +0,0 @@
|
||||
"""
|
||||
yuanbao_media.py — 元宝平台媒体处理模块
|
||||
|
||||
提供 COS 上传、文件下载、TIM 媒体消息构建等功能。
|
||||
移植自 TypeScript 版 media.ts(yuanbao-openclaw-plugin),
|
||||
使用 httpx 替代 cos-nodejs-sdk-v5,避免引入额外 SDK 依赖。
|
||||
|
||||
COS 上传流程:
|
||||
1. 调用 genUploadInfo 获取临时凭证(tmpSecretId/tmpSecretKey/sessionToken)
|
||||
2. 用临时凭证通过 HMAC-SHA1 签名构建 Authorization 头
|
||||
3. HTTP PUT 上传到 COS
|
||||
|
||||
TIM 消息体构建:
|
||||
- buildImageMsgBody() → TIMImageElem
|
||||
- buildFileMsgBody() → TIMFileElem
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
from typing import Optional, Any
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ============ 常量 ============
|
||||
|
||||
UPLOAD_INFO_PATH = "/api/resource/genUploadInfo"
|
||||
DEFAULT_API_DOMAIN = "yuanbao.tencent.com"
|
||||
DEFAULT_MAX_SIZE_MB = 50
|
||||
|
||||
# COS 加速域名后缀(优先使用全球加速)
|
||||
COS_USE_ACCELERATE = True
|
||||
|
||||
# ============ 类型映射 ============
|
||||
|
||||
# MIME → image_format 数字(TIM 协议字段)
|
||||
_MIME_TO_IMAGE_FORMAT: dict[str, int] = {
|
||||
"image/jpeg": 1,
|
||||
"image/jpg": 1,
|
||||
"image/gif": 2,
|
||||
"image/png": 3,
|
||||
"image/bmp": 4,
|
||||
"image/webp": 255,
|
||||
"image/heic": 255,
|
||||
"image/tiff": 255,
|
||||
}
|
||||
|
||||
# 文件扩展名 → MIME
|
||||
_EXT_TO_MIME: dict[str, str] = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
".heic": "image/heic",
|
||||
".tiff": "image/tiff",
|
||||
".ico": "image/x-icon",
|
||||
".pdf": "application/pdf",
|
||||
".doc": "application/msword",
|
||||
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".xls": "application/vnd.ms-excel",
|
||||
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
".ppt": "application/vnd.ms-powerpoint",
|
||||
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
".txt": "text/plain",
|
||||
".zip": "application/zip",
|
||||
".tar": "application/x-tar",
|
||||
".gz": "application/gzip",
|
||||
".mp3": "audio/mpeg",
|
||||
".mp4": "video/mp4",
|
||||
".wav": "audio/wav",
|
||||
".ogg": "audio/ogg",
|
||||
".webm": "video/webm",
|
||||
}
|
||||
|
||||
|
||||
# ============ 工具函数 ============
|
||||
|
||||
def guess_mime_type(filename: str) -> str:
|
||||
"""根据文件扩展名猜测 MIME 类型。"""
|
||||
ext = os.path.splitext(filename)[-1].lower()
|
||||
return _EXT_TO_MIME.get(ext, "application/octet-stream")
|
||||
|
||||
|
||||
def is_image(filename: str, mime_type: str = "") -> bool:
|
||||
"""判断是否为图片类型。"""
|
||||
if mime_type.startswith("image/"):
|
||||
return True
|
||||
ext = os.path.splitext(filename)[-1].lower()
|
||||
return ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".heic", ".tiff", ".ico"}
|
||||
|
||||
|
||||
def get_image_format(mime_type: str) -> int:
|
||||
"""获取 TIM 图片格式编号。"""
|
||||
return _MIME_TO_IMAGE_FORMAT.get(mime_type.lower(), 255)
|
||||
|
||||
|
||||
def md5_hex(data: bytes) -> str:
|
||||
"""计算 MD5 十六进制摘要。"""
|
||||
return hashlib.md5(data).hexdigest()
|
||||
|
||||
|
||||
def generate_file_id() -> str:
|
||||
"""生成随机文件 ID(32 位 hex)。"""
|
||||
return secrets.token_hex(16)
|
||||
|
||||
|
||||
|
||||
# ============ 图片尺寸解析(纯 Python,无需 Pillow) ============
|
||||
|
||||
def parse_image_size(data: bytes) -> Optional[dict[str, int]]:
|
||||
"""
|
||||
解析图片宽高(支持 JPEG/PNG/GIF/WebP),无需第三方依赖。
|
||||
返回 {"width": w, "height": h} 或 None(无法识别)。
|
||||
"""
|
||||
return (
|
||||
_parse_png_size(data)
|
||||
or _parse_jpeg_size(data)
|
||||
or _parse_gif_size(data)
|
||||
or _parse_webp_size(data)
|
||||
)
|
||||
|
||||
|
||||
def _parse_png_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 24:
|
||||
return None
|
||||
if buf[:4] != b"\x89PNG":
|
||||
return None
|
||||
w = struct.unpack(">I", buf[16:20])[0]
|
||||
h = struct.unpack(">I", buf[20:24])[0]
|
||||
return {"width": w, "height": h}
|
||||
|
||||
|
||||
def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 4 or buf[0] != 0xFF or buf[1] != 0xD8:
|
||||
return None
|
||||
i = 2
|
||||
while i < len(buf) - 9:
|
||||
if buf[i] != 0xFF:
|
||||
i += 1
|
||||
continue
|
||||
marker = buf[i + 1]
|
||||
if marker in {0xC0, 0xC2}:
|
||||
h = struct.unpack(">H", buf[i + 5: i + 7])[0]
|
||||
w = struct.unpack(">H", buf[i + 7: i + 9])[0]
|
||||
return {"width": w, "height": h}
|
||||
if i + 3 < len(buf):
|
||||
i += 2 + struct.unpack(">H", buf[i + 2: i + 4])[0]
|
||||
else:
|
||||
break
|
||||
return None
|
||||
|
||||
|
||||
def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 10:
|
||||
return None
|
||||
sig = buf[:6].decode("ascii", errors="replace")
|
||||
if sig not in {"GIF87a", "GIF89a"}:
|
||||
return None
|
||||
w = struct.unpack("<H", buf[6:8])[0]
|
||||
h = struct.unpack("<H", buf[8:10])[0]
|
||||
return {"width": w, "height": h}
|
||||
|
||||
|
||||
def _parse_webp_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 16:
|
||||
return None
|
||||
if buf[:4] != b"RIFF" or buf[8:12] != b"WEBP":
|
||||
return None
|
||||
chunk = buf[12:16].decode("ascii", errors="replace")
|
||||
if chunk == "VP8 ":
|
||||
if len(buf) >= 30 and buf[23] == 0x9D and buf[24] == 0x01 and buf[25] == 0x2A:
|
||||
w = struct.unpack("<H", buf[26:28])[0] & 0x3FFF
|
||||
h = struct.unpack("<H", buf[28:30])[0] & 0x3FFF
|
||||
return {"width": w, "height": h}
|
||||
elif chunk == "VP8L":
|
||||
if len(buf) >= 25 and buf[20] == 0x2F:
|
||||
bits = struct.unpack("<I", buf[21:25])[0]
|
||||
w = (bits & 0x3FFF) + 1
|
||||
h = ((bits >> 14) & 0x3FFF) + 1
|
||||
return {"width": w, "height": h}
|
||||
elif chunk == "VP8X":
|
||||
if len(buf) >= 30:
|
||||
w = (buf[24] | (buf[25] << 8) | (buf[26] << 16)) + 1
|
||||
h = (buf[27] | (buf[28] << 8) | (buf[29] << 16)) + 1
|
||||
return {"width": w, "height": h}
|
||||
return None
|
||||
|
||||
|
||||
# ============ URL 下载 ============
|
||||
|
||||
async def download_url(
|
||||
url: str,
|
||||
max_size_mb: int = DEFAULT_MAX_SIZE_MB,
|
||||
) -> tuple[bytes, str]:
|
||||
"""
|
||||
下载 URL 内容,返回 (bytes, content_type)。
|
||||
|
||||
Args:
|
||||
url: HTTP(S) URL
|
||||
max_size_mb: 最大允许大小(MB),超过则抛出异常
|
||||
|
||||
Returns:
|
||||
(data_bytes, content_type_string)
|
||||
|
||||
Raises:
|
||||
ValueError: 内容超过大小限制
|
||||
httpx.HTTPError: 网络/HTTP 错误
|
||||
"""
|
||||
max_bytes = max_size_mb * 1024 * 1024
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
# 先 HEAD 检查大小
|
||||
try:
|
||||
head = await client.head(url)
|
||||
content_length = int(head.headers.get("content-length", 0) or 0)
|
||||
if content_length > 0 and content_length > max_bytes:
|
||||
raise ValueError(
|
||||
f"文件过大: {content_length / 1024 / 1024:.1f} MB > {max_size_mb} MB"
|
||||
)
|
||||
except httpx.HTTPStatusError:
|
||||
pass # 部分服务器不支持 HEAD,忽略
|
||||
|
||||
# GET 下载(流式读取,防止超限)
|
||||
async with client.stream("GET", url) as resp:
|
||||
resp.raise_for_status()
|
||||
|
||||
content_type = resp.headers.get("content-type", "").split(";")[0].strip()
|
||||
|
||||
chunks: list[bytes] = []
|
||||
downloaded = 0
|
||||
async for chunk in resp.aiter_bytes(65536):
|
||||
downloaded += len(chunk)
|
||||
if downloaded > max_bytes:
|
||||
raise ValueError(
|
||||
f"文件过大: 已超过 {max_size_mb} MB 限制"
|
||||
)
|
||||
chunks.append(chunk)
|
||||
|
||||
data = b"".join(chunks)
|
||||
return data, content_type
|
||||
|
||||
|
||||
# ============ COS 鉴权(HMAC-SHA1) ============
|
||||
|
||||
def _cos_sign(
|
||||
method: str,
|
||||
path: str,
|
||||
params: dict[str, str],
|
||||
headers: dict[str, str],
|
||||
secret_id: str,
|
||||
secret_key: str,
|
||||
start_time: Optional[int] = None,
|
||||
expire_seconds: int = 3600,
|
||||
) -> str:
|
||||
"""
|
||||
构建 COS 请求签名(q-sign-algorithm=sha1 方案)。
|
||||
参考:https://cloud.tencent.com/document/product/436/7778
|
||||
|
||||
Args:
|
||||
method: HTTP 方法(小写,如 "put")
|
||||
path: URL 路径(URL encode 后的小写)
|
||||
params: URL 查询参数 dict(用于签名)
|
||||
headers: 参与签名的请求头 dict(key 需小写)
|
||||
secret_id: 临时 SecretId(tmpSecretId)
|
||||
secret_key: 临时 SecretKey(tmpSecretKey)
|
||||
start_time: 签名起始 Unix 时间戳(默认 now)
|
||||
expire_seconds: 签名有效期(秒,默认 3600)
|
||||
|
||||
Returns:
|
||||
Authorization header 值(完整字符串)
|
||||
"""
|
||||
now = int(time.time())
|
||||
q_sign_time = f"{start_time or now};{(start_time or now) + expire_seconds}"
|
||||
|
||||
# Step 1: SignKey = HMAC-SHA1(SecretKey, q-sign-time)
|
||||
sign_key = hmac.new(
|
||||
secret_key.encode("utf-8"),
|
||||
q_sign_time.encode("utf-8"),
|
||||
hashlib.sha1,
|
||||
).hexdigest()
|
||||
|
||||
# Step 2: HttpString
|
||||
# 参数和头部需按字典序排列,key 小写
|
||||
sorted_params = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in params.items())
|
||||
sorted_headers = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in headers.items())
|
||||
|
||||
url_param_list = ";".join(k for k, _ in sorted_params)
|
||||
url_params = "&".join(f"{k}={v}" for k, v in sorted_params)
|
||||
header_list = ";".join(k for k, _ in sorted_headers)
|
||||
header_str = "&".join(f"{k}={v}" for k, v in sorted_headers)
|
||||
|
||||
http_string = "\n".join([
|
||||
method.lower(),
|
||||
path,
|
||||
url_params,
|
||||
header_str,
|
||||
"",
|
||||
])
|
||||
|
||||
# Step 3: StringToSign = sha1 hash of HttpString
|
||||
sha1_of_http = hashlib.sha1(http_string.encode("utf-8")).hexdigest()
|
||||
string_to_sign = "\n".join([
|
||||
"sha1",
|
||||
q_sign_time,
|
||||
sha1_of_http,
|
||||
"",
|
||||
])
|
||||
|
||||
# Step 4: Signature = HMAC-SHA1(SignKey, StringToSign)
|
||||
signature = hmac.new(
|
||||
sign_key.encode("utf-8"),
|
||||
string_to_sign.encode("utf-8"),
|
||||
hashlib.sha1,
|
||||
).hexdigest()
|
||||
|
||||
return (
|
||||
f"q-sign-algorithm=sha1"
|
||||
f"&q-ak={secret_id}"
|
||||
f"&q-sign-time={q_sign_time}"
|
||||
f"&q-key-time={q_sign_time}"
|
||||
f"&q-header-list={header_list}"
|
||||
f"&q-url-param-list={url_param_list}"
|
||||
f"&q-signature={signature}"
|
||||
)
|
||||
|
||||
|
||||
# ============ 主要公开 API ============
|
||||
|
||||
async def get_cos_credentials(
|
||||
app_key: str,
|
||||
api_domain: str,
|
||||
token: str,
|
||||
filename: str = "file",
|
||||
file_id: Optional[str] = None,
|
||||
bot_id: str = "",
|
||||
route_env: str = "",
|
||||
) -> dict:
|
||||
"""
|
||||
调用 genUploadInfo 接口获取 COS 临时密钥及上传配置。
|
||||
|
||||
Args:
|
||||
app_key: 应用 Key(用于 X-ID 头)
|
||||
api_domain: API 域名(如 https://bot.yuanbao.tencent.com)
|
||||
token: 当前有效的签票 token(X-Token 头)
|
||||
filename: 待上传的文件名(含扩展名)
|
||||
file_id: 客户端生成的唯一文件 ID(不传则自动生成)
|
||||
bot_id: Bot 账号 ID(用于 X-ID 头)
|
||||
|
||||
Returns:
|
||||
COS 上传配置 dict,包含以下字段:
|
||||
bucketName (str) — COS Bucket 名称
|
||||
region (str) — COS 地域
|
||||
location (str) — 上传 Key(对象路径)
|
||||
encryptTmpSecretId (str) — 临时 SecretId
|
||||
encryptTmpSecretKey(str) — 临时 SecretKey
|
||||
encryptToken (str) — SessionToken
|
||||
startTime (int) — 凭证起始时间戳(Unix)
|
||||
expiredTime (int) — 凭证过期时间戳(Unix)
|
||||
resourceUrl (str) — 上传后的公网访问 URL
|
||||
resourceID (str) — 资源 ID(可选)
|
||||
|
||||
Raises:
|
||||
RuntimeError: 接口返回非 0 code 或字段缺失
|
||||
"""
|
||||
if file_id is None:
|
||||
file_id = generate_file_id()
|
||||
|
||||
upload_url = f"{api_domain.rstrip('/')}{UPLOAD_INFO_PATH}"
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Token": token,
|
||||
"X-ID": bot_id or app_key,
|
||||
"X-Source": "web",
|
||||
}
|
||||
if route_env:
|
||||
headers["X-Route-Env"] = route_env
|
||||
body = {
|
||||
"fileName": filename,
|
||||
"fileId": file_id,
|
||||
"docFrom": "localDoc",
|
||||
"docOpenId": "",
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||
resp = await client.post(upload_url, json=body, headers=headers)
|
||||
resp.raise_for_status()
|
||||
result: dict[str, Any] = resp.json()
|
||||
|
||||
code = result.get("code")
|
||||
if code != 0 and code is not None:
|
||||
raise RuntimeError(
|
||||
f"genUploadInfo 失败: code={code}, msg={result.get('msg', '')}"
|
||||
)
|
||||
|
||||
data = result.get("data") or result
|
||||
required_fields = ["bucketName", "location"]
|
||||
missing = [f for f in required_fields if not data.get(f)]
|
||||
if missing:
|
||||
raise RuntimeError(
|
||||
f"genUploadInfo 返回字段不完整: 缺少字段 {missing}"
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
async def upload_to_cos(
|
||||
file_bytes: bytes,
|
||||
filename: str,
|
||||
content_type: str,
|
||||
credentials: dict,
|
||||
bucket: str,
|
||||
region: str,
|
||||
) -> dict:
|
||||
"""
|
||||
通过 httpx PUT 请求将文件上传到 COS。
|
||||
使用临时凭证(tmpSecretId/tmpSecretKey/sessionToken)构建 HMAC-SHA1 签名。
|
||||
|
||||
Args:
|
||||
file_bytes: 文件二进制内容
|
||||
filename: 文件名(用于辅助计算 MIME、UUID)
|
||||
content_type: MIME 类型(如 "image/jpeg")
|
||||
credentials: get_cos_credentials() 返回的 dict,包含:
|
||||
encryptTmpSecretId → tmpSecretId
|
||||
encryptTmpSecretKey → tmpSecretKey
|
||||
encryptToken → sessionToken
|
||||
location → COS key(对象路径)
|
||||
resourceUrl → 上传后公网 URL
|
||||
startTime → 凭证起始时间(Unix)
|
||||
expiredTime → 凭证过期时间(Unix)
|
||||
bucket: COS Bucket 名称(如 chatbot-1234567890)
|
||||
region: COS 地域(如 ap-guangzhou)
|
||||
|
||||
Returns:
|
||||
上传结果 dict,包含:
|
||||
url (str) — COS 公网访问 URL
|
||||
uuid (str) — 文件内容 MD5
|
||||
size (int) — 文件大小(字节)
|
||||
width (int, optional) — 图片宽度(仅图片)
|
||||
height (int, optional) — 图片高度(仅图片)
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: COS 返回非 2xx 状态
|
||||
RuntimeError: credentials 字段缺失
|
||||
"""
|
||||
secret_id: str = credentials.get("encryptTmpSecretId", "")
|
||||
secret_key: str = credentials.get("encryptTmpSecretKey", "")
|
||||
session_token: str = credentials.get("encryptToken", "")
|
||||
cos_key: str = credentials.get("location", "")
|
||||
resource_url: str = credentials.get("resourceUrl", "")
|
||||
start_time: Optional[int] = credentials.get("startTime")
|
||||
expired_time: Optional[int] = credentials.get("expiredTime")
|
||||
|
||||
if not secret_id or not secret_key or not cos_key:
|
||||
raise RuntimeError(
|
||||
f"COS credentials 不完整: secretId={bool(secret_id)}, "
|
||||
f"secretKey={bool(secret_key)}, location={bool(cos_key)}"
|
||||
)
|
||||
|
||||
# 构建 COS 上传 URL(优先使用全球加速域名)
|
||||
if COS_USE_ACCELERATE:
|
||||
cos_host = f"{bucket}.cos.accelerate.myqcloud.com"
|
||||
else:
|
||||
cos_host = f"{bucket}.cos.{region}.myqcloud.com"
|
||||
|
||||
# URL encode cos_key(保留 /)
|
||||
encoded_key = urllib.parse.quote(cos_key, safe="/")
|
||||
cos_url = f"https://{cos_host}/{encoded_key.lstrip('/')}"
|
||||
|
||||
# 确定 Content-Type
|
||||
if not content_type or content_type == "application/octet-stream":
|
||||
if is_image(filename):
|
||||
content_type = guess_mime_type(filename)
|
||||
else:
|
||||
content_type = "application/octet-stream"
|
||||
|
||||
# 计算文件 MD5 + size
|
||||
file_uuid = md5_hex(file_bytes)
|
||||
file_size = len(file_bytes)
|
||||
|
||||
# 参与签名的请求头
|
||||
sign_headers = {
|
||||
"host": cos_host,
|
||||
"content-type": content_type,
|
||||
"x-cos-security-token": session_token,
|
||||
}
|
||||
|
||||
# 计算签名有效期
|
||||
now = int(time.time())
|
||||
sign_start = start_time if start_time else now
|
||||
sign_expire = (expired_time - now) if expired_time and expired_time > now else 3600
|
||||
|
||||
authorization = _cos_sign(
|
||||
method="put",
|
||||
path=f"/{encoded_key.lstrip('/')}",
|
||||
params={},
|
||||
headers=sign_headers,
|
||||
secret_id=secret_id,
|
||||
secret_key=secret_key,
|
||||
start_time=sign_start,
|
||||
expire_seconds=sign_expire,
|
||||
)
|
||||
|
||||
put_headers = {
|
||||
"Authorization": authorization,
|
||||
"Content-Type": content_type,
|
||||
"x-cos-security-token": session_token,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"COS PUT: bucket=%s region=%s key=%s size=%d mime=%s",
|
||||
bucket, region, cos_key, file_size, content_type,
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.put(
|
||||
cos_url,
|
||||
content=file_bytes,
|
||||
headers=put_headers,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
# 解析图片尺寸(仅图片类型)
|
||||
result: dict[str, Any] = {
|
||||
"url": resource_url or cos_url,
|
||||
"uuid": file_uuid,
|
||||
"size": file_size,
|
||||
}
|
||||
|
||||
if content_type.startswith("image/"):
|
||||
size_info = parse_image_size(file_bytes)
|
||||
if size_info:
|
||||
result["width"] = size_info["width"]
|
||||
result["height"] = size_info["height"]
|
||||
|
||||
logger.info(
|
||||
"COS 上传成功: url=%s size=%d",
|
||||
result["url"], file_size,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# ============ TIM 媒体消息构建 ============
|
||||
|
||||
def build_image_msg_body(
|
||||
url: str,
|
||||
uuid: Optional[str] = None,
|
||||
filename: Optional[str] = None,
|
||||
size: int = 0,
|
||||
width: int = 0,
|
||||
height: int = 0,
|
||||
mime_type: str = "",
|
||||
) -> list[dict]:
|
||||
"""
|
||||
构建腾讯 IM TIMImageElem 消息体。
|
||||
参考:https://cloud.tencent.com/document/product/269/2720
|
||||
|
||||
Args:
|
||||
url: 图片公网访问 URL(COS resourceUrl)
|
||||
uuid: 文件 UUID(MD5 或其他唯一标识)
|
||||
filename: 文件名(uuid 为空时作为备用)
|
||||
size: 文件大小(字节)
|
||||
width: 图片宽度(像素)
|
||||
height: 图片高度(像素)
|
||||
mime_type: MIME 类型(用于确定 image_format)
|
||||
|
||||
Returns:
|
||||
TIMImageElem 消息体列表(适合直接放入 msg_body)
|
||||
"""
|
||||
_uuid = uuid or filename or _basename_from_url(url) or "image"
|
||||
image_format = get_image_format(mime_type) if mime_type else 255
|
||||
|
||||
return [
|
||||
{
|
||||
"msg_type": "TIMImageElem",
|
||||
"msg_content": {
|
||||
"uuid": _uuid,
|
||||
"image_format": image_format,
|
||||
"image_info_array": [
|
||||
{
|
||||
"type": 1, # 1 = 原图
|
||||
"size": size,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"url": url,
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def build_file_msg_body(
|
||||
url: str,
|
||||
filename: str,
|
||||
uuid: Optional[str] = None,
|
||||
size: int = 0,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
构建腾讯 IM TIMFileElem 消息体。
|
||||
参考:https://cloud.tencent.com/document/product/269/2720
|
||||
|
||||
Args:
|
||||
url: 文件公网访问 URL(COS resourceUrl)
|
||||
filename: 文件名(含扩展名)
|
||||
uuid: 文件 UUID(MD5 或其他唯一标识,不传则使用 filename)
|
||||
size: 文件大小(字节)
|
||||
|
||||
Returns:
|
||||
TIMFileElem 消息体列表(适合直接放入 msg_body)
|
||||
"""
|
||||
_uuid = uuid or filename
|
||||
|
||||
return [
|
||||
{
|
||||
"msg_type": "TIMFileElem",
|
||||
"msg_content": {
|
||||
"uuid": _uuid,
|
||||
"file_name": filename,
|
||||
"file_size": size,
|
||||
"url": url,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
# ============ 内部工具 ============
|
||||
|
||||
def _basename_from_url(url: str) -> str:
|
||||
"""从 URL 提取文件名。"""
|
||||
try:
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
return os.path.basename(parsed.path)
|
||||
except Exception:
|
||||
return ""
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,558 +0,0 @@
|
||||
"""
|
||||
Yuanbao sticker (TIMFaceElem) support.
|
||||
|
||||
Ported from yuanbao-openclaw-plugin/src/sticker/.
|
||||
|
||||
TIMFaceElem wire format:
|
||||
{
|
||||
"msg_type": "TIMFaceElem",
|
||||
"msg_content": {
|
||||
"index": 0, # always 0 per Yuanbao convention
|
||||
"data": "<json>", # serialised sticker metadata
|
||||
}
|
||||
}
|
||||
|
||||
The `data` field carries a JSON string with the sticker's metadata so the
|
||||
receiver can look up the correct asset in the emoji pack.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import unicodedata
|
||||
from typing import Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sticker catalogue – ported from builtin-stickers.json
|
||||
# Key : canonical name (Chinese)
|
||||
# Value : {sticker_id, package_id, name, description, width, height, formats}
|
||||
# ---------------------------------------------------------------------------
|
||||
STICKER_MAP: dict[str, dict] = {
|
||||
"六六六": {
|
||||
"sticker_id": "278", "package_id": "1003", "name": "六六六",
|
||||
"description": "666 厉害 牛 棒 绝了 好强 awesome",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"我想开了": {
|
||||
"sticker_id": "262", "package_id": "1003", "name": "我想开了",
|
||||
"description": "想开 佛系 释怀 顿悟 看淡了 无所谓",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"害羞": {
|
||||
"sticker_id": "130", "package_id": "1003", "name": "害羞",
|
||||
"description": "腼腆 不好意思 脸红 娇羞 羞涩 捂脸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"比心": {
|
||||
"sticker_id": "252", "package_id": "1003", "name": "比心",
|
||||
"description": "笔芯 爱你 爱心手势 love heart 喜欢你",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"委屈": {
|
||||
"sticker_id": "125", "package_id": "1003", "name": "委屈",
|
||||
"description": "难过 想哭 可怜巴巴 瘪嘴 受伤 被欺负",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"亲亲": {
|
||||
"sticker_id": "146", "package_id": "1003", "name": "亲亲",
|
||||
"description": "么么 mua 亲一下 kiss 飞吻 啵",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"酷": {
|
||||
"sticker_id": "131", "package_id": "1003", "name": "酷",
|
||||
"description": "帅 墨镜 cool 高冷 有型 swagger",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"睡": {
|
||||
"sticker_id": "145", "package_id": "1003", "name": "睡",
|
||||
"description": "睡觉 困 zzZ 打盹 躺平 休眠 sleepy",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"发呆": {
|
||||
"sticker_id": "152", "package_id": "1003", "name": "发呆",
|
||||
"description": "懵 愣住 放空 呆滞 出神 脑子空白",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"可怜": {
|
||||
"sticker_id": "157", "package_id": "1003", "name": "可怜",
|
||||
"description": "卖萌 求饶 委屈巴巴 弱小 拜托 眼巴巴",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"摊手": {
|
||||
"sticker_id": "200", "package_id": "1003", "name": "摊手",
|
||||
"description": "无奈 没办法 耸肩 随便 那咋整 whatever",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"头大": {
|
||||
"sticker_id": "213", "package_id": "1003", "name": "头大",
|
||||
"description": "头疼 烦恼 郁闷 难搞 崩溃 一团乱",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吓": {
|
||||
"sticker_id": "256", "package_id": "1003", "name": "吓",
|
||||
"description": "害怕 惊恐 震惊 吓一跳 恐怖 怂",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吐血": {
|
||||
"sticker_id": "203", "package_id": "1003", "name": "吐血",
|
||||
"description": "无语 崩溃 被雷 内伤 一口老血 屮",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哼": {
|
||||
"sticker_id": "185", "package_id": "1003", "name": "哼",
|
||||
"description": "傲娇 生气 不满 撇嘴 不理 赌气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"嘿嘿": {
|
||||
"sticker_id": "220", "package_id": "1003", "name": "嘿嘿",
|
||||
"description": "坏笑 猥琐笑 偷笑 憨笑 得意 你懂的",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"头秃": {
|
||||
"sticker_id": "218", "package_id": "1003", "name": "头秃",
|
||||
"description": "程序员 加班 焦虑 没头发 秃了 肝爆",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"暗中观察": {
|
||||
"sticker_id": "221", "package_id": "1003", "name": "暗中观察",
|
||||
"description": "窥屏 潜水 偷偷看 角落 围观 屏住呼吸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"我酸了": {
|
||||
"sticker_id": "224", "package_id": "1003", "name": "我酸了",
|
||||
"description": "嫉妒 柠檬精 羡慕 吃柠檬 眼红 恰柠檬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"打call": {
|
||||
"sticker_id": "246", "package_id": "1003", "name": "打call",
|
||||
"description": "应援 加油 支持 喝彩 助威 call",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"庆祝": {
|
||||
"sticker_id": "251", "package_id": "1003", "name": "庆祝",
|
||||
"description": "祝贺 开心 耶 party 胜利 干杯",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"奋斗": {
|
||||
"sticker_id": "151", "package_id": "1003", "name": "奋斗",
|
||||
"description": "努力 加油 拼搏 冲 干劲 卷起来",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"惊讶": {
|
||||
"sticker_id": "143", "package_id": "1003", "name": "惊讶",
|
||||
"description": "震惊 哇 不敢相信 OMG 居然 这么离谱",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"疑问": {
|
||||
"sticker_id": "144", "package_id": "1003", "name": "疑问",
|
||||
"description": "问号 不懂 啥 为什么 啥情况 懵逼问",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"仔细分析": {
|
||||
"sticker_id": "248", "package_id": "1003", "name": "仔细分析",
|
||||
"description": "思考 推敲 认真 研究 琢磨 让我想想",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"撅嘴": {
|
||||
"sticker_id": "184", "package_id": "1003", "name": "撅嘴",
|
||||
"description": "嘟嘴 卖萌 不高兴 撒娇 嘴翘",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"泪奔": {
|
||||
"sticker_id": "199", "package_id": "1003", "name": "泪奔",
|
||||
"description": "大哭 伤心 破防 感动哭 泪流满面 呜呜",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"尊嘟假嘟": {
|
||||
"sticker_id": "276", "package_id": "1003", "name": "尊嘟假嘟",
|
||||
"description": "真的假的 真假 可爱问 你骗我 是不是",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"略略略": {
|
||||
"sticker_id": "113", "package_id": "1003", "name": "略略略",
|
||||
"description": "调皮 吐舌 不服 略 气死你 鬼脸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"困": {
|
||||
"sticker_id": "180", "package_id": "1003", "name": "困",
|
||||
"description": "想睡 倦 打哈欠 睁不开眼 好困啊 sleepy",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"折磨": {
|
||||
"sticker_id": "181", "package_id": "1003", "name": "折磨",
|
||||
"description": "难受 痛苦 煎熬 蚌埠住了 受不了 要命",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"抠鼻": {
|
||||
"sticker_id": "182", "package_id": "1003", "name": "抠鼻",
|
||||
"description": "不屑 无聊 淡定 无所谓 鄙视 挖鼻",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"鼓掌": {
|
||||
"sticker_id": "183", "package_id": "1003", "name": "鼓掌",
|
||||
"description": "拍手 叫好 赞同 666 喝彩 掌声",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"斜眼笑": {
|
||||
"sticker_id": "204", "package_id": "1003", "name": "斜眼笑",
|
||||
"description": "滑稽 坏笑 doge 意味深长 阴阳怪气 嘿嘿嘿",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"辣眼睛": {
|
||||
"sticker_id": "216", "package_id": "1003", "name": "辣眼睛",
|
||||
"description": "看不下去 cringe 毁三观 太丑了 瞎了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哦哟": {
|
||||
"sticker_id": "217", "package_id": "1003", "name": "哦哟",
|
||||
"description": "惊讶 起哄 哇哦 有戏 不简单 哟",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吃瓜": {
|
||||
"sticker_id": "222", "package_id": "1003", "name": "吃瓜",
|
||||
"description": "围观 看戏 八卦 路人 看热闹 板凳",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"狗头": {
|
||||
"sticker_id": "225", "package_id": "1003", "name": "狗头",
|
||||
"description": "doge 保命 开玩笑 滑稽 反讽 懂的都懂",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"敬礼": {
|
||||
"sticker_id": "227", "package_id": "1003", "name": "敬礼",
|
||||
"description": "salute 尊重 收到 遵命 致敬 报告",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哦": {
|
||||
"sticker_id": "231", "package_id": "1003", "name": "哦",
|
||||
"description": "知道了 明白 敷衍 嗯 这样啊 收到",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"拿到红包": {
|
||||
"sticker_id": "236", "package_id": "1003", "name": "拿到红包",
|
||||
"description": "红包 谢谢老板 发财 开心 抢到了 欧气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"牛吖": {
|
||||
"sticker_id": "239", "package_id": "1003", "name": "牛吖",
|
||||
"description": "牛 厉害 强 666 佩服 大佬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"贴贴": {
|
||||
"sticker_id": "272", "package_id": "1003", "name": "贴贴",
|
||||
"description": "抱抱 亲昵 蹭蹭 亲密 靠靠 撒娇贴",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"爱心": {
|
||||
"sticker_id": "138", "package_id": "1003", "name": "爱心",
|
||||
"description": "心 love 喜欢你 红心 示爱 么么哒",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"晚安": {
|
||||
"sticker_id": "170", "package_id": "1003", "name": "晚安",
|
||||
"description": "好梦 睡了 night 早点休息 安啦 moon",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"太阳": {
|
||||
"sticker_id": "176", "package_id": "1003", "name": "太阳",
|
||||
"description": "晴天 早上好 阳光 morning 好天气 日",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"柠檬": {
|
||||
"sticker_id": "266", "package_id": "1003", "name": "柠檬",
|
||||
"description": "酸 嫉妒 柠檬精 羡慕 我酸 恰柠檬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"大冤种": {
|
||||
"sticker_id": "267", "package_id": "1003", "name": "大冤种",
|
||||
"description": "倒霉 吃亏 自嘲 好心没好报 背锅 工具人",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吐了": {
|
||||
"sticker_id": "132", "package_id": "1003", "name": "吐了",
|
||||
"description": "恶心 yue 受不了 嫌弃 想吐 生理不适",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"怒": {
|
||||
"sticker_id": "134", "package_id": "1003", "name": "怒",
|
||||
"description": "生气 愤怒 火大 暴躁 气炸 怼",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"玫瑰": {
|
||||
"sticker_id": "165", "package_id": "1003", "name": "玫瑰",
|
||||
"description": "花 示爱 表白 浪漫 送你花 情人节",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"凋谢": {
|
||||
"sticker_id": "119", "package_id": "1003", "name": "凋谢",
|
||||
"description": "花谢 失恋 难过 枯萎 心碎 凉了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"点赞": {
|
||||
"sticker_id": "159", "package_id": "1003", "name": "点赞",
|
||||
"description": "赞 认同 好棒 good like 大拇指 顶",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"握手": {
|
||||
"sticker_id": "164", "package_id": "1003", "name": "握手",
|
||||
"description": "合作 你好 商务 hello deal 成交 友好",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"抱拳": {
|
||||
"sticker_id": "163", "package_id": "1003", "name": "抱拳",
|
||||
"description": "谢谢 失敬 江湖 承让 拜托 有礼",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"ok": {
|
||||
"sticker_id": "169", "package_id": "1003", "name": "ok",
|
||||
"description": "好的 收到 没问题 okay 行 可以 懂了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"拳头": {
|
||||
"sticker_id": "174", "package_id": "1003", "name": "拳头",
|
||||
"description": "加油 干 冲 fight 力量 击拳 硬气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"鞭炮": {
|
||||
"sticker_id": "191", "package_id": "1003", "name": "鞭炮",
|
||||
"description": "过年 喜庆 爆竹 春节 噼里啪啦 红",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"烟花": {
|
||||
"sticker_id": "258", "package_id": "1003", "name": "烟花",
|
||||
"description": "庆典 漂亮 新年 嘭 绽放 节日快乐",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_sticker_by_name(name: str) -> Optional[dict]:
|
||||
"""
|
||||
按名称查找贴纸,支持模糊匹配。
|
||||
|
||||
匹配优先级:
|
||||
1. 完全相等(name)
|
||||
2. name 包含查询词(前缀/子串)
|
||||
3. description 包含查询词(同义词搜索)
|
||||
4. 通用模糊评分(与 sticker-search 同算法),命中即返回得分最高的一条
|
||||
|
||||
返回 sticker dict,找不到返回 None。
|
||||
"""
|
||||
if not name:
|
||||
return None
|
||||
|
||||
query = name.strip()
|
||||
|
||||
if query in STICKER_MAP:
|
||||
return STICKER_MAP[query]
|
||||
|
||||
for key, sticker in STICKER_MAP.items():
|
||||
if query in key or key in query:
|
||||
return sticker
|
||||
|
||||
for sticker in STICKER_MAP.values():
|
||||
desc = sticker.get("description", "")
|
||||
if query in desc:
|
||||
return sticker
|
||||
|
||||
matches = search_stickers(query, limit=1)
|
||||
return matches[0] if matches else None
|
||||
|
||||
|
||||
def get_random_sticker(category: str = None) -> dict:
|
||||
"""
|
||||
随机返回一个贴纸。
|
||||
|
||||
若指定 category,则在 description 中含有该关键词的贴纸里随机选取;
|
||||
category 为 None 时从全表随机。
|
||||
"""
|
||||
if category:
|
||||
candidates = [
|
||||
s for s in STICKER_MAP.values()
|
||||
if category in s.get("description", "") or category in s.get("name", "")
|
||||
]
|
||||
if candidates:
|
||||
return random.choice(candidates)
|
||||
return random.choice(list(STICKER_MAP.values()))
|
||||
|
||||
|
||||
def get_sticker_by_id(sticker_id: str) -> Optional[dict]:
|
||||
"""按 sticker_id 精确查找贴纸。"""
|
||||
if not sticker_id:
|
||||
return None
|
||||
sid = str(sticker_id).strip()
|
||||
for sticker in STICKER_MAP.values():
|
||||
if sticker.get("sticker_id") == sid:
|
||||
return sticker
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 模糊搜索(对齐 chatbot-web yuanbao-openclaw-plugin/sticker-cache.ts.searchStickers)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PUNCT_RE = re.compile(r"[\s\u3000\-_·.,,。!!??\"“”'‘’、/\\]+")
|
||||
|
||||
|
||||
def _normalize_text(raw: str) -> str:
|
||||
return unicodedata.normalize("NFKC", str(raw or "")).strip().lower()
|
||||
|
||||
|
||||
def _compact_text(raw: str) -> str:
|
||||
return _PUNCT_RE.sub("", _normalize_text(raw))
|
||||
|
||||
|
||||
def _multiset_char_hit_ratio(needle: str, haystack: str) -> float:
|
||||
if not needle:
|
||||
return 0.0
|
||||
bag: dict[str, int] = {}
|
||||
for ch in haystack:
|
||||
bag[ch] = bag.get(ch, 0) + 1
|
||||
hits = 0
|
||||
for ch in needle:
|
||||
n = bag.get(ch, 0)
|
||||
if n > 0:
|
||||
hits += 1
|
||||
bag[ch] = n - 1
|
||||
return hits / len(needle)
|
||||
|
||||
|
||||
def _bigram_jaccard(a: str, b: str) -> float:
|
||||
if len(a) < 2 or len(b) < 2:
|
||||
return 0.0
|
||||
A = {a[i:i + 2] for i in range(len(a) - 1)}
|
||||
B = {b[i:i + 2] for i in range(len(b) - 1)}
|
||||
inter = len(A & B)
|
||||
union = len(A) + len(B) - inter
|
||||
return inter / union if union else 0.0
|
||||
|
||||
|
||||
def _longest_subsequence_ratio(needle: str, haystack: str) -> float:
|
||||
if not needle:
|
||||
return 0.0
|
||||
j = 0
|
||||
for ch in haystack:
|
||||
if j >= len(needle):
|
||||
break
|
||||
if ch == needle[j]:
|
||||
j += 1
|
||||
return j / len(needle)
|
||||
|
||||
|
||||
def _score_field(haystack: str, query: str) -> float:
|
||||
hay = _normalize_text(haystack)
|
||||
q = _normalize_text(query)
|
||||
if not hay or not q:
|
||||
return 0.0
|
||||
hay_c = _compact_text(haystack)
|
||||
q_c = _compact_text(query)
|
||||
best = 0.0
|
||||
if hay == q:
|
||||
best = max(best, 100.0)
|
||||
if q in hay:
|
||||
best = max(best, 92 + min(6, len(q)))
|
||||
if len(q) >= 2 and hay.startswith(q):
|
||||
best = max(best, 88.0)
|
||||
if q_c and q_c in hay_c:
|
||||
best = max(best, 86.0)
|
||||
best = max(best, _multiset_char_hit_ratio(q_c, hay_c) * 62)
|
||||
best = max(best, _bigram_jaccard(q_c, hay_c) * 58)
|
||||
best = max(best, _longest_subsequence_ratio(q_c, hay_c) * 52)
|
||||
if len(q) == 1 and q in hay:
|
||||
best = max(best, 68.0)
|
||||
return best
|
||||
|
||||
|
||||
def search_stickers(query: str, limit: int = 10) -> list[dict]:
|
||||
"""
|
||||
在内置贴纸表中按模糊匹配排序返回前 N 条结果。
|
||||
|
||||
评分综合 name/description 字段的子串、字符多重集覆盖、bigram Jaccard、子序列比例。
|
||||
name 权重略高于 description(×0.88)。空 query 时按字典顺序返回前 N 条。
|
||||
"""
|
||||
safe_limit = max(1, min(500, int(limit) if limit else 10))
|
||||
if not query or not _normalize_text(query):
|
||||
return list(STICKER_MAP.values())[:safe_limit]
|
||||
|
||||
scored: list[tuple[float, dict]] = []
|
||||
for sticker in STICKER_MAP.values():
|
||||
name_s = _score_field(sticker.get("name", ""), query)
|
||||
desc_s = _score_field(sticker.get("description", ""), query) * 0.88
|
||||
sid = str(sticker.get("sticker_id", "")).strip()
|
||||
q_norm = _normalize_text(query)
|
||||
id_s = 0.0
|
||||
if sid and q_norm:
|
||||
sid_norm = _normalize_text(sid)
|
||||
if sid_norm == q_norm:
|
||||
id_s = 100.0
|
||||
elif q_norm in sid_norm:
|
||||
id_s = 84.0
|
||||
scored.append((max(name_s, desc_s, id_s), sticker))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
top = scored[0][0] if scored else 0
|
||||
if top <= 0:
|
||||
return [s for _, s in scored[:safe_limit]]
|
||||
|
||||
if top >= 22:
|
||||
floor = 18.0
|
||||
elif top >= 12:
|
||||
floor = max(10.0, top * 0.5)
|
||||
else:
|
||||
floor = max(6.0, top * 0.35)
|
||||
|
||||
filtered = [pair for pair in scored if pair[0] >= floor]
|
||||
out = filtered if filtered else scored
|
||||
return [s for _, s in out[:safe_limit]]
|
||||
|
||||
|
||||
def build_face_msg_body(
|
||||
face_index: int,
|
||||
face_type: int = 1,
|
||||
data: Optional[str] = None,
|
||||
) -> list:
|
||||
"""
|
||||
构造 TIMFaceElem 消息体。
|
||||
|
||||
Yuanbao 约定:
|
||||
- index 固定传 0(服务端通过 data 字段识别具体表情)
|
||||
- data 为 JSON 字符串,包含 sticker_id / package_id 等字段
|
||||
|
||||
Args:
|
||||
face_index: 保留字段,暂时不影响 wire format(Yuanbao 固定 index=0)。
|
||||
当 face_index > 0 时视为旧版 QQ 表情 ID,直接放入 index。
|
||||
face_type: 保留字段(兼容旧接口,当前未使用)。
|
||||
data: 已序列化的 JSON 字符串;为 None 时仅传 index。
|
||||
|
||||
Returns:
|
||||
符合 Yuanbao TIM 协议的 msg_body list,如::
|
||||
|
||||
[{"msg_type": "TIMFaceElem", "msg_content": {"index": 0, "data": "..."}}]
|
||||
"""
|
||||
msg_content: dict = {"index": face_index}
|
||||
if data is not None:
|
||||
msg_content["data"] = data
|
||||
return [{"msg_type": "TIMFaceElem", "msg_content": msg_content}]
|
||||
|
||||
|
||||
def build_sticker_msg_body(sticker: dict) -> list:
|
||||
"""
|
||||
从 STICKER_MAP 中的 sticker dict 直接构造 TIMFaceElem 消息体。
|
||||
|
||||
这是 send_sticker() 的内部辅助,确保 data 字段与原始 JS 插件一致。
|
||||
"""
|
||||
data_payload = json.dumps(
|
||||
{
|
||||
"sticker_id": sticker["sticker_id"],
|
||||
"package_id": sticker["package_id"],
|
||||
"width": sticker.get("width", 128),
|
||||
"height": sticker.get("height", 128),
|
||||
"formats": sticker.get("formats", "png"),
|
||||
"name": sticker["name"],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
separators=(",", ":"),
|
||||
)
|
||||
return build_face_msg_body(face_index=0, data=data_payload)
|
||||
@@ -1,150 +0,0 @@
|
||||
"""Gateway runtime-metadata footer.
|
||||
|
||||
Renders a compact footer showing runtime state (model, context %, cwd) and
|
||||
appends it to the FINAL message of an agent turn when enabled. Off by default
|
||||
to keep replies minimal.
|
||||
|
||||
Config (``~/.hermes/config.yaml``)::
|
||||
|
||||
display:
|
||||
runtime_footer:
|
||||
enabled: true # off by default
|
||||
fields: [model, context_pct, cwd] # order shown; drop any to hide
|
||||
|
||||
Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
|
||||
Users can toggle the global setting with ``/footer on|off`` from both the CLI
|
||||
and any gateway platform.
|
||||
|
||||
The footer is appended to the final response text in ``gateway/run.py`` right
|
||||
before returning the response to the adapter send path — so it only lands on
|
||||
the final message a user sees, not on tool-progress updates or streaming
|
||||
partials. When streaming is on and the final text has already been delivered
|
||||
piecemeal, the footer is sent as a separate trailing message via
|
||||
``send_trailing_footer()``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
|
||||
_SEP = " · "
|
||||
|
||||
|
||||
def _home_relative_cwd(cwd: str) -> str:
|
||||
"""Return *cwd* with ``$HOME`` collapsed to ``~``. Empty string if unset."""
|
||||
if not cwd:
|
||||
return ""
|
||||
try:
|
||||
home = os.path.expanduser("~")
|
||||
p = os.path.abspath(cwd)
|
||||
if home and (p == home or p.startswith(home + os.sep)):
|
||||
return "~" + p[len(home):]
|
||||
return p
|
||||
except Exception:
|
||||
return cwd
|
||||
|
||||
|
||||
def _model_short(model: Optional[str]) -> str:
|
||||
"""Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
|
||||
if not model:
|
||||
return ""
|
||||
return model.rsplit("/", 1)[-1]
|
||||
|
||||
|
||||
def resolve_footer_config(
|
||||
user_config: dict[str, Any] | None,
|
||||
platform_key: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Resolve effective runtime-footer config for *platform_key*.
|
||||
|
||||
Merge order (later wins):
|
||||
1. Built-in defaults (enabled=False)
|
||||
2. ``display.runtime_footer``
|
||||
3. ``display.platforms.<platform_key>.runtime_footer``
|
||||
"""
|
||||
resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
|
||||
cfg = (user_config or {}).get("display") or {}
|
||||
|
||||
global_cfg = cfg.get("runtime_footer")
|
||||
if isinstance(global_cfg, dict):
|
||||
if "enabled" in global_cfg:
|
||||
resolved["enabled"] = bool(global_cfg.get("enabled"))
|
||||
if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
|
||||
resolved["fields"] = [str(f) for f in global_cfg["fields"]]
|
||||
|
||||
if platform_key:
|
||||
platforms = cfg.get("platforms") or {}
|
||||
plat_cfg = platforms.get(platform_key)
|
||||
if isinstance(plat_cfg, dict):
|
||||
plat_footer = plat_cfg.get("runtime_footer")
|
||||
if isinstance(plat_footer, dict):
|
||||
if "enabled" in plat_footer:
|
||||
resolved["enabled"] = bool(plat_footer.get("enabled"))
|
||||
if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
|
||||
resolved["fields"] = [str(f) for f in plat_footer["fields"]]
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def format_runtime_footer(
|
||||
*,
|
||||
model: Optional[str],
|
||||
context_tokens: int,
|
||||
context_length: Optional[int],
|
||||
cwd: Optional[str] = None,
|
||||
fields: Iterable[str] = _DEFAULT_FIELDS,
|
||||
) -> str:
|
||||
"""Render the footer line, or return "" if no fields have data.
|
||||
|
||||
Fields are skipped silently when their underlying data is missing — a
|
||||
partially-populated footer is better than a line with ``?%`` or empty slots.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
for field in fields:
|
||||
if field == "model":
|
||||
m = _model_short(model)
|
||||
if m:
|
||||
parts.append(m)
|
||||
elif field == "context_pct":
|
||||
if context_length and context_length > 0 and context_tokens >= 0:
|
||||
pct = max(0, min(100, round((context_tokens / context_length) * 100)))
|
||||
parts.append(f"{pct}%")
|
||||
elif field == "cwd":
|
||||
rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
|
||||
if rel:
|
||||
parts.append(rel)
|
||||
# Unknown field names are silently ignored.
|
||||
|
||||
if not parts:
|
||||
return ""
|
||||
return _SEP.join(parts)
|
||||
|
||||
|
||||
def build_footer_line(
|
||||
*,
|
||||
user_config: dict[str, Any] | None,
|
||||
platform_key: str | None,
|
||||
model: Optional[str],
|
||||
context_tokens: int,
|
||||
context_length: Optional[int],
|
||||
cwd: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Top-level entry point used by gateway/run.py.
|
||||
|
||||
Returns the footer text (empty string when disabled or no data). Callers
|
||||
append this to the final response themselves, preserving a single blank
|
||||
line of separation.
|
||||
"""
|
||||
cfg = resolve_footer_config(user_config, platform_key)
|
||||
if not cfg.get("enabled"):
|
||||
return ""
|
||||
return format_runtime_footer(
|
||||
model=model,
|
||||
context_tokens=context_tokens,
|
||||
context_length=context_length,
|
||||
cwd=cwd,
|
||||
fields=cfg.get("fields") or _DEFAULT_FIELDS,
|
||||
)
|
||||
@@ -1,462 +0,0 @@
|
||||
"""Shutdown forensics — capture context when the gateway receives SIGTERM/SIGINT.
|
||||
|
||||
The gateway's ``shutdown_signal_handler`` runs synchronously inside the
|
||||
asyncio event loop. We can't safely block it for long, but we DO want a
|
||||
durable record of who/what triggered the shutdown so that "the gateway
|
||||
keeps dying" incidents can be diagnosed after the fact.
|
||||
|
||||
This module exposes :func:`snapshot_shutdown_context`, a fast (<10ms),
|
||||
non-blocking probe that returns a structured dict the signal handler can
|
||||
log immediately, plus :func:`spawn_async_diagnostic`, a fire-and-forget
|
||||
``ps`` walk that runs as a detached subprocess so it can't block teardown
|
||||
even if /proc is wedged.
|
||||
|
||||
Anything that needs to wait (e.g. shelling out to ``ps aux``) belongs in
|
||||
the async helper, never in the synchronous probe.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
_SIGNAL_NAME_BY_NUM: Dict[int, str] = {}
|
||||
for _name in ("SIGTERM", "SIGINT", "SIGHUP", "SIGQUIT", "SIGUSR1", "SIGUSR2"):
|
||||
_val = getattr(signal, _name, None)
|
||||
if _val is not None:
|
||||
_SIGNAL_NAME_BY_NUM[int(_val)] = _name
|
||||
|
||||
|
||||
def _signal_name(sig: Any) -> str:
|
||||
"""Return a human-readable signal name (or ``str(sig)`` as fallback)."""
|
||||
if sig is None:
|
||||
return "UNKNOWN"
|
||||
try:
|
||||
sig_int = int(sig)
|
||||
except (TypeError, ValueError):
|
||||
return str(sig)
|
||||
return _SIGNAL_NAME_BY_NUM.get(sig_int, f"signal#{sig_int}")
|
||||
|
||||
|
||||
def _read_proc_field(pid: int, key: str) -> Optional[str]:
|
||||
"""Read a single field from /proc/<pid>/status. Linux only; None elsewhere."""
|
||||
try:
|
||||
with open(f"/proc/{pid}/status", encoding="utf-8") as fh:
|
||||
for line in fh:
|
||||
if line.startswith(key + ":"):
|
||||
return line.split(":", 1)[1].strip()
|
||||
except (FileNotFoundError, PermissionError, OSError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _read_proc_cmdline(pid: int) -> Optional[str]:
|
||||
"""Read /proc/<pid>/cmdline as a printable string. Linux only; None elsewhere."""
|
||||
try:
|
||||
with open(f"/proc/{pid}/cmdline", "rb") as fh:
|
||||
data = fh.read()
|
||||
except (FileNotFoundError, PermissionError, OSError):
|
||||
return None
|
||||
if not data:
|
||||
return None
|
||||
# cmdline uses NUL separators
|
||||
return data.replace(b"\x00", b" ").decode("utf-8", errors="replace").strip()
|
||||
|
||||
|
||||
def _proc_summary(pid: int) -> Dict[str, Any]:
|
||||
"""Compact /proc/<pid> snapshot: pid, ppid, state, uid, cmdline.
|
||||
|
||||
Best-effort. Missing fields are simply omitted rather than raising.
|
||||
"""
|
||||
summary: Dict[str, Any] = {"pid": pid}
|
||||
if pid <= 0:
|
||||
return summary
|
||||
name = _read_proc_field(pid, "Name")
|
||||
if name is not None:
|
||||
summary["name"] = name
|
||||
state = _read_proc_field(pid, "State")
|
||||
if state is not None:
|
||||
summary["state"] = state
|
||||
ppid = _read_proc_field(pid, "PPid")
|
||||
if ppid is not None:
|
||||
try:
|
||||
summary["ppid"] = int(ppid)
|
||||
except ValueError:
|
||||
pass
|
||||
uid = _read_proc_field(pid, "Uid")
|
||||
if uid is not None:
|
||||
# "real effective saved fs"
|
||||
summary["uid"] = uid.split()[0] if uid else uid
|
||||
cmdline = _read_proc_cmdline(pid)
|
||||
if cmdline:
|
||||
# Truncate aggressively — these can be 4KB
|
||||
summary["cmdline"] = cmdline[:300]
|
||||
return summary
|
||||
|
||||
|
||||
def snapshot_shutdown_context(received_signal: Any = None) -> Dict[str, Any]:
|
||||
"""Fast (<10ms) snapshot of who/what is asking us to shut down.
|
||||
|
||||
Captures:
|
||||
|
||||
* The signal number/name (so SIGINT vs SIGTERM is visible)
|
||||
* Our own PID/ppid + parent process info from /proc (Linux)
|
||||
* Whether systemd is our parent (``ppid==1`` or ``INVOCATION_ID`` set)
|
||||
* Whether takeover/planned-stop markers exist (consumed lazily by the caller)
|
||||
* /proc/self limits + load average (1-min)
|
||||
* Wall-clock and monotonic timestamps for cross-correlating later phases
|
||||
|
||||
Pure stdlib, never raises, never blocks on subprocesses.
|
||||
"""
|
||||
now = time.time()
|
||||
monotonic = time.monotonic()
|
||||
pid = os.getpid()
|
||||
ppid = os.getppid()
|
||||
|
||||
ctx: Dict[str, Any] = {
|
||||
"ts": now,
|
||||
"ts_monotonic": monotonic,
|
||||
"signal": _signal_name(received_signal),
|
||||
"signal_num": int(received_signal) if received_signal is not None else None,
|
||||
"pid": pid,
|
||||
"ppid": ppid,
|
||||
"parent": _proc_summary(ppid),
|
||||
"self": _proc_summary(pid),
|
||||
}
|
||||
|
||||
# systemd context. If we were started by a systemd unit, INVOCATION_ID
|
||||
# is set in our env. ppid==1 (init) is also a strong signal that
|
||||
# systemd reaped+forwarded the SIGTERM.
|
||||
invocation_id = os.environ.get("INVOCATION_ID")
|
||||
if invocation_id:
|
||||
ctx["systemd_invocation_id"] = invocation_id
|
||||
journal_stream = os.environ.get("JOURNAL_STREAM")
|
||||
if journal_stream:
|
||||
ctx["systemd_journal_stream"] = journal_stream
|
||||
ctx["under_systemd"] = bool(invocation_id) or ppid == 1
|
||||
|
||||
# Load average — high load points the finger at "something else
|
||||
# crushing the box" rather than "external killer".
|
||||
try:
|
||||
ctx["loadavg_1m"] = os.getloadavg()[0]
|
||||
except (OSError, AttributeError):
|
||||
pass
|
||||
|
||||
# /proc/self/status TracerPid: nonzero means a debugger / strace is
|
||||
# attached. Useful when "phantom SIGKILL" turns out to be a manual
|
||||
# gdb session.
|
||||
try:
|
||||
tracer = _read_proc_field(pid, "TracerPid")
|
||||
if tracer is not None and tracer != "0":
|
||||
ctx["tracer_pid"] = int(tracer) if tracer.isdigit() else tracer
|
||||
ctx["tracer"] = _proc_summary(int(tracer)) if tracer.isdigit() else None
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Race-detection hint: did somebody recently start a sibling gateway
|
||||
# with --replace? We can't see the new process directly here, but if
|
||||
# there's a takeover marker on disk that DOESN'T name us, that's a
|
||||
# smoking gun for "another --replace instance is killing us".
|
||||
# Filenames mirror gateway.status (._TAKEOVER_MARKER_FILENAME /
|
||||
# _PLANNED_STOP_MARKER_FILENAME); we use string literals here so the
|
||||
# signal-handler path stays import-light.
|
||||
try:
|
||||
hermes_home_str = os.environ.get("HERMES_HOME")
|
||||
if hermes_home_str:
|
||||
takeover_path = Path(hermes_home_str) / ".gateway-takeover.json"
|
||||
if takeover_path.exists():
|
||||
try:
|
||||
raw = takeover_path.read_text(encoding="utf-8")
|
||||
ctx["takeover_marker"] = raw[:300]
|
||||
ctx["takeover_marker_for_self"] = (
|
||||
f'"target_pid": {pid}' in raw
|
||||
or f"'target_pid': {pid}" in raw
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
planned_stop_path = Path(hermes_home_str) / ".gateway-planned-stop.json"
|
||||
if planned_stop_path.exists():
|
||||
try:
|
||||
raw = planned_stop_path.read_text(encoding="utf-8")
|
||||
ctx["planned_stop_marker"] = raw[:300]
|
||||
except OSError:
|
||||
pass
|
||||
except Exception: # noqa: BLE001 — never raise from a signal handler
|
||||
pass
|
||||
|
||||
return ctx
|
||||
|
||||
|
||||
def spawn_async_diagnostic(
|
||||
log_path: Path,
|
||||
signal_name: str,
|
||||
*,
|
||||
timeout_seconds: float = 5.0,
|
||||
) -> Optional[int]:
|
||||
"""Fire-and-forget ``ps``-style snapshot written to ``log_path``.
|
||||
|
||||
Runs as a detached subprocess so it can't block the asyncio event loop
|
||||
or compete with platform teardown. The subprocess uses its own
|
||||
``timeout`` so a wedged ``ps`` still self-cleans within
|
||||
``timeout_seconds``.
|
||||
|
||||
Returns the subprocess PID on success, ``None`` on failure. Never
|
||||
raises.
|
||||
|
||||
We deliberately avoid ``subprocess.run(["ps", "aux"])`` from inside the
|
||||
signal handler (the pre-existing pattern): on a busy host with hundreds
|
||||
of processes, ``ps aux`` can take >2s to walk /proc, during which the
|
||||
asyncio loop is frozen and adapter teardown can't begin.
|
||||
"""
|
||||
try:
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
# Inline shell so we don't have to ship a helper script. bash -c is
|
||||
# available on every POSIX target we support; on Windows we just skip
|
||||
# the snapshot (the platform doesn't ship ps anyway).
|
||||
if sys.platform == "win32":
|
||||
return None
|
||||
|
||||
script = (
|
||||
f"echo '=== shutdown diagnostic @ {signal_name} ==='; "
|
||||
"echo '--- date ---'; date -u +%Y-%m-%dT%H:%M:%SZ; "
|
||||
"echo '--- ps auxf (top 60 by cpu) ---'; "
|
||||
"ps auxf --sort=-pcpu 2>/dev/null | head -60; "
|
||||
"echo '--- pstree of self ---'; "
|
||||
f"pstree -plau {os.getpid()} 2>/dev/null | head -40 || true; "
|
||||
"echo '--- /proc/loadavg ---'; "
|
||||
"cat /proc/loadavg 2>/dev/null || true; "
|
||||
"echo '--- recent dmesg (oom/killed) ---'; "
|
||||
"dmesg -T 2>/dev/null | tail -20 || journalctl --user -n 20 --no-pager 2>/dev/null | tail -20 || true; "
|
||||
"echo '=== end ==='"
|
||||
)
|
||||
|
||||
try:
|
||||
# Open the log file in append mode and let the subprocess inherit.
|
||||
# We use os.O_APPEND so concurrent diagnostics from rapid signals
|
||||
# don't trample each other.
|
||||
fd = os.open(str(log_path), os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o644)
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Detach from our process group so the subprocess survives even
|
||||
# if systemd kills our cgroup with KillMode=control-group (which
|
||||
# would also reap us anyway, but defense in depth). Without
|
||||
# start_new_session, a SIGKILL on our cgroup takes the diag down
|
||||
# before it can flush.
|
||||
proc = subprocess.Popen(
|
||||
["timeout", f"{timeout_seconds:.0f}", "bash", "-c", script],
|
||||
stdout=fd,
|
||||
stderr=subprocess.STDOUT,
|
||||
stdin=subprocess.DEVNULL,
|
||||
start_new_session=True,
|
||||
close_fds=True,
|
||||
)
|
||||
except (FileNotFoundError, OSError):
|
||||
try:
|
||||
os.close(fd)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
finally:
|
||||
# Subprocess inherited the fd; we can drop our handle.
|
||||
try:
|
||||
os.close(fd)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return proc.pid
|
||||
|
||||
|
||||
def format_context_for_log(ctx: Dict[str, Any]) -> str:
|
||||
"""Render a shutdown context dict as a single, scannable log line."""
|
||||
sig = ctx.get("signal", "?")
|
||||
parent = ctx.get("parent") or {}
|
||||
parent_cmd = parent.get("cmdline", "(unknown)")
|
||||
parent_name = parent.get("name") or "?"
|
||||
parent_pid = parent.get("pid") or "?"
|
||||
under_systemd = "yes" if ctx.get("under_systemd") else "no"
|
||||
load = ctx.get("loadavg_1m")
|
||||
load_str = f"{load:.2f}" if isinstance(load, (int, float)) else "?"
|
||||
extras: List[str] = []
|
||||
if ctx.get("takeover_marker") is not None:
|
||||
for_self = ctx.get("takeover_marker_for_self")
|
||||
extras.append(
|
||||
f"takeover_marker_present={'self' if for_self else 'other'}"
|
||||
)
|
||||
if ctx.get("planned_stop_marker") is not None:
|
||||
extras.append("planned_stop_marker_present=yes")
|
||||
if ctx.get("tracer_pid"):
|
||||
extras.append(f"tracer_pid={ctx['tracer_pid']}")
|
||||
extras_str = (" " + " ".join(extras)) if extras else ""
|
||||
# Parent cmdline is the most useful single signal — log it prominently.
|
||||
return (
|
||||
f"signal={sig} "
|
||||
f"under_systemd={under_systemd} "
|
||||
f"parent_pid={parent_pid} "
|
||||
f"parent_name={parent_name} "
|
||||
f"loadavg_1m={load_str}"
|
||||
f"{extras_str} "
|
||||
f"parent_cmdline={parent_cmd!r}"
|
||||
)
|
||||
|
||||
|
||||
def context_as_json(ctx: Dict[str, Any]) -> str:
|
||||
"""JSON-serialise a context dict for structured ingestion. Never raises."""
|
||||
try:
|
||||
return json.dumps(ctx, default=str, sort_keys=True)
|
||||
except (TypeError, ValueError):
|
||||
return "{}"
|
||||
|
||||
|
||||
def check_systemd_timing_alignment(drain_timeout: float) -> Optional[Dict[str, Any]]:
|
||||
"""At startup, sanity-check that systemd's TimeoutStopSec >= drain_timeout.
|
||||
|
||||
When the gateway is run under a stale systemd unit file (e.g. the user
|
||||
upgraded hermes-agent but never re-ran ``hermes setup`` to regenerate
|
||||
the unit), ``TimeoutStopSec`` can be smaller than the configured
|
||||
``restart_drain_timeout``. Result: SIGTERM arrives, the drain starts,
|
||||
and systemd SIGKILLs the cgroup mid-drain — looks like a phantom kill
|
||||
in the journal because the journal only logs ``code=killed status=9``.
|
||||
|
||||
Returns ``None`` when the alignment is fine OR we can't determine it
|
||||
(not running under systemd, ``systemctl`` unavailable, etc.). Returns
|
||||
a dict with ``timeout_stop_sec`` + ``drain_timeout`` + ``mismatch``
|
||||
bool when we have data to report.
|
||||
|
||||
Best-effort. Never raises.
|
||||
"""
|
||||
invocation_id = os.environ.get("INVOCATION_ID")
|
||||
if not invocation_id:
|
||||
return None # Not running under systemd (or at least not directly)
|
||||
|
||||
# Try to identify our unit name and ask systemctl for its config.
|
||||
unit_name: Optional[str] = None
|
||||
try:
|
||||
# /proc/self/cgroup gives us "0::/user.slice/.../hermes-gateway.service"
|
||||
with open("/proc/self/cgroup", encoding="utf-8") as fh:
|
||||
for line in fh:
|
||||
# systemd cgroup line ends with the unit name
|
||||
if ".service" in line:
|
||||
parts = line.strip().split("/")
|
||||
for p in reversed(parts):
|
||||
if p.endswith(".service"):
|
||||
unit_name = p
|
||||
break
|
||||
if unit_name:
|
||||
break
|
||||
except (OSError, FileNotFoundError):
|
||||
pass
|
||||
if not unit_name:
|
||||
return None
|
||||
|
||||
# Query systemctl for TimeoutStopUSec. Use --user OR system depending
|
||||
# on which manager actually owns the unit. Try user first since
|
||||
# that's the common case for hermes.
|
||||
timeout_us: Optional[int] = None
|
||||
for flag in (["--user"], []):
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["systemctl", *flag, "show", unit_name, "--property=TimeoutStopUSec"],
|
||||
capture_output=True, text=True, timeout=2.0,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||
continue
|
||||
if result.returncode != 0:
|
||||
continue
|
||||
# Output: "TimeoutStopUSec=1min 30s" or "TimeoutStopUSec=90000000"
|
||||
for line in result.stdout.splitlines():
|
||||
if line.startswith("TimeoutStopUSec="):
|
||||
value = line.split("=", 1)[1].strip()
|
||||
# Try numeric microseconds first
|
||||
if value.isdigit():
|
||||
timeout_us = int(value)
|
||||
else:
|
||||
timeout_us = _parse_systemd_duration_to_us(value)
|
||||
if timeout_us is not None:
|
||||
break
|
||||
if timeout_us is not None:
|
||||
break
|
||||
|
||||
if timeout_us is None:
|
||||
return None
|
||||
|
||||
timeout_stop_sec = timeout_us / 1_000_000.0
|
||||
# systemd needs headroom for: post-interrupt kill, adapter disconnect,
|
||||
# SessionDB close, file unlinks, etc. 30s matches the unit-template
|
||||
# constant in hermes_cli/gateway.py.
|
||||
headroom = 30.0
|
||||
expected = drain_timeout + headroom
|
||||
return {
|
||||
"unit": unit_name,
|
||||
"timeout_stop_sec": timeout_stop_sec,
|
||||
"drain_timeout": drain_timeout,
|
||||
"expected_min": expected,
|
||||
"mismatch": timeout_stop_sec < expected,
|
||||
}
|
||||
|
||||
|
||||
def _parse_systemd_duration_to_us(raw: str) -> Optional[int]:
|
||||
"""Parse 'TimeoutStopUSec=1min 30s' / '90s' style values to microseconds.
|
||||
|
||||
systemd accepts a wide grammar; we cover the common cases (s, ms, min,
|
||||
h) and return None on anything unexpected. Never raises.
|
||||
"""
|
||||
if not raw:
|
||||
return None
|
||||
units = {
|
||||
"us": 1,
|
||||
"ms": 1_000,
|
||||
"s": 1_000_000,
|
||||
"sec": 1_000_000,
|
||||
"min": 60_000_000,
|
||||
"h": 3_600_000_000,
|
||||
"hr": 3_600_000_000,
|
||||
}
|
||||
total_us = 0
|
||||
token = ""
|
||||
digits = ""
|
||||
for ch in raw + " ":
|
||||
if ch.isdigit() or ch == ".":
|
||||
if token:
|
||||
# End previous unit, start new number
|
||||
multiplier = units.get(token.lower())
|
||||
if multiplier is None or not digits:
|
||||
return None
|
||||
try:
|
||||
total_us += int(float(digits) * multiplier)
|
||||
except ValueError:
|
||||
return None
|
||||
digits = ""
|
||||
token = ""
|
||||
digits += ch
|
||||
elif ch.isalpha():
|
||||
token += ch
|
||||
elif digits and token:
|
||||
multiplier = units.get(token.lower())
|
||||
if multiplier is None:
|
||||
return None
|
||||
try:
|
||||
total_us += int(float(digits) * multiplier)
|
||||
except ValueError:
|
||||
return None
|
||||
digits = ""
|
||||
token = ""
|
||||
elif digits and not token:
|
||||
# Bare number = seconds (rare but valid)
|
||||
try:
|
||||
total_us += int(float(digits) * 1_000_000)
|
||||
except ValueError:
|
||||
return None
|
||||
digits = ""
|
||||
return total_us if total_us > 0 else None
|
||||
@@ -1,229 +0,0 @@
|
||||
"""Per-platform slash command access control.
|
||||
|
||||
This module sits beside the existing per-platform allowlist (``allow_from``)
|
||||
and adds a second axis: of the users who are *allowed to talk to the
|
||||
gateway*, which ones can run *which slash commands*.
|
||||
|
||||
Two lists per platform scope (DM vs group, mirroring ``allow_from`` vs
|
||||
``group_allow_from``):
|
||||
|
||||
- ``allow_admin_from`` — user IDs that get every registered slash
|
||||
command (built-in + plugin-registered).
|
||||
- ``user_allowed_commands`` — slash command names non-admin users may
|
||||
run. Empty / unset → non-admins get no
|
||||
slash commands.
|
||||
|
||||
Backward compatibility:
|
||||
|
||||
If ``allow_admin_from`` is not set for a scope, slash command gating
|
||||
is disabled entirely for that scope. Every allowed user can run every
|
||||
slash command, exactly like before. This means existing installs are
|
||||
unaffected until an operator opts in by listing at least one admin.
|
||||
|
||||
The gate is applied at the slash command dispatch site in
|
||||
``gateway/run.py`` so it covers BOTH built-in and plugin-registered
|
||||
commands via the live registry. Gating slash commands does not affect
|
||||
plain chat — non-admin users can still talk to the agent normally,
|
||||
they just can't trigger commands outside ``user_allowed_commands``.
|
||||
|
||||
Authored as a slimmed-down salvage of PR #4443's permission tiers
|
||||
(co-authored by @ReqX). The full tier system, audit log, usage
|
||||
tracking, rate limiting, and tool filtering from that PR are not
|
||||
included here — only the slash-command access split.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, FrozenSet, Iterable, Optional, Tuple
|
||||
|
||||
|
||||
# Slash commands that MUST stay reachable for any allowed user, even when
|
||||
# slash gating is enabled and the user has no commands listed. Without this
|
||||
# carve-out, a non-admin user has no way to discover what they can or
|
||||
# can't do (``/help``, ``/whoami``) and no way to see what state the agent
|
||||
# is in (``/status``). These mirror the smallest set of read-only commands
|
||||
# we'd hand to a guest. Operators can still narrow this further by writing
|
||||
# their own ``user_allowed_commands`` (this set is only the implicit
|
||||
# fallback floor — anything in ``user_allowed_commands`` overrides it
|
||||
# additively, never restrictively).
|
||||
_ALWAYS_ALLOWED_FOR_USERS: FrozenSet[str] = frozenset({
|
||||
"help",
|
||||
"whoami",
|
||||
})
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SlashAccessPolicy:
|
||||
"""Resolved access policy for a single (platform, scope) pair.
|
||||
|
||||
``scope`` is ``"dm"`` for direct messages and ``"group"`` for groups,
|
||||
channels, threads, and any other multi-user context. The mapping from
|
||||
SessionSource.chat_type → scope happens in ``policy_for_source``.
|
||||
"""
|
||||
|
||||
enabled: bool # gating active for this scope?
|
||||
admin_user_ids: FrozenSet[str]
|
||||
user_allowed_commands: FrozenSet[str]
|
||||
|
||||
def is_admin(self, user_id: Optional[str]) -> bool:
|
||||
if not self.enabled:
|
||||
# Gating disabled → treat every allowed user as admin so
|
||||
# downstream code can keep using ``is_admin`` / ``can_run``
|
||||
# uniformly.
|
||||
return True
|
||||
if not user_id:
|
||||
return False
|
||||
return str(user_id) in self.admin_user_ids
|
||||
|
||||
def can_run(self, user_id: Optional[str], canonical_cmd: str) -> bool:
|
||||
if not self.enabled:
|
||||
return True
|
||||
if self.is_admin(user_id):
|
||||
return True
|
||||
if not canonical_cmd:
|
||||
return False
|
||||
if canonical_cmd in _ALWAYS_ALLOWED_FOR_USERS:
|
||||
return True
|
||||
return canonical_cmd in self.user_allowed_commands
|
||||
|
||||
|
||||
_DM_CHAT_TYPES = frozenset({"dm", "direct", "private", ""})
|
||||
|
||||
|
||||
def _coerce_id_list(raw: Any) -> FrozenSet[str]:
|
||||
"""Normalize a YAML-loaded admin/user list into a frozenset of strings.
|
||||
|
||||
Accepts ``None``, list, tuple, or comma-separated string. Stringifies
|
||||
each entry and strips whitespace; empty entries are dropped.
|
||||
"""
|
||||
if raw is None:
|
||||
return frozenset()
|
||||
if isinstance(raw, (list, tuple, set, frozenset)):
|
||||
items: Iterable[Any] = raw
|
||||
elif isinstance(raw, str):
|
||||
items = (s for s in raw.split(",") if s.strip())
|
||||
else:
|
||||
# single scalar (int user id, etc.)
|
||||
items = (raw,)
|
||||
out: list[str] = []
|
||||
for it in items:
|
||||
s = str(it).strip()
|
||||
if s:
|
||||
out.append(s)
|
||||
return frozenset(out)
|
||||
|
||||
|
||||
def _coerce_command_list(raw: Any) -> FrozenSet[str]:
|
||||
"""Normalize a slash command allowlist.
|
||||
|
||||
Strips leading slashes so YAML can read either ``["help", "status"]``
|
||||
or ``["/help", "/status"]``. Lowercase canonicalization matches how
|
||||
``resolve_command()`` stores names.
|
||||
"""
|
||||
if raw is None:
|
||||
return frozenset()
|
||||
if isinstance(raw, (list, tuple, set, frozenset)):
|
||||
items: Iterable[Any] = raw
|
||||
elif isinstance(raw, str):
|
||||
items = (s for s in raw.split(",") if s.strip())
|
||||
else:
|
||||
items = (raw,)
|
||||
out: list[str] = []
|
||||
for it in items:
|
||||
s = str(it).strip().lstrip("/").lower()
|
||||
if s:
|
||||
out.append(s)
|
||||
return frozenset(out)
|
||||
|
||||
|
||||
def _scope_for_chat_type(chat_type: Optional[str]) -> str:
|
||||
if chat_type and chat_type.lower() in _DM_CHAT_TYPES:
|
||||
return "dm"
|
||||
return "group"
|
||||
|
||||
|
||||
def _platform_extra(platform_config: Any) -> dict:
|
||||
"""Return the ``extra`` dict from a PlatformConfig-like object.
|
||||
|
||||
Defensively handles None and non-PlatformConfig shapes so calling
|
||||
code can stay simple.
|
||||
"""
|
||||
if platform_config is None:
|
||||
return {}
|
||||
extra = getattr(platform_config, "extra", None)
|
||||
if isinstance(extra, dict):
|
||||
return extra
|
||||
if isinstance(platform_config, dict):
|
||||
# Some test harnesses pass dicts directly.
|
||||
return platform_config
|
||||
return {}
|
||||
|
||||
|
||||
def _keys_for_scope(scope: str) -> Tuple[str, str]:
|
||||
"""Return (admin_key, user_cmd_key) names for a scope."""
|
||||
if scope == "group":
|
||||
return ("group_allow_admin_from", "group_user_allowed_commands")
|
||||
return ("allow_admin_from", "user_allowed_commands")
|
||||
|
||||
|
||||
def policy_from_extra(extra: dict, scope: str) -> SlashAccessPolicy:
|
||||
"""Build a policy from a platform's ``extra`` dict for one scope.
|
||||
|
||||
DM scope falls back to group scope keys ONLY for ``user_allowed_commands``
|
||||
when the DM scope didn't specify its own. This keeps the common case
|
||||
(operator wants the same command set DM and group) ergonomic without
|
||||
forcing duplication. Admin lists are NOT cross-scope: an admin in
|
||||
DMs is not implicitly an admin in a group.
|
||||
"""
|
||||
admin_key, cmd_key = _keys_for_scope(scope)
|
||||
admin_ids = _coerce_id_list(extra.get(admin_key))
|
||||
cmds = _coerce_command_list(extra.get(cmd_key))
|
||||
|
||||
if scope == "dm" and not cmds:
|
||||
# DM didn't specify — let group's user_allowed_commands fall through
|
||||
# so operators only need to list it once if it's the same.
|
||||
cmds = _coerce_command_list(extra.get("group_user_allowed_commands"))
|
||||
|
||||
enabled = bool(admin_ids)
|
||||
return SlashAccessPolicy(
|
||||
enabled=enabled,
|
||||
admin_user_ids=admin_ids,
|
||||
user_allowed_commands=cmds,
|
||||
)
|
||||
|
||||
|
||||
def policy_for_source(gateway_config: Any, source: Any) -> SlashAccessPolicy:
|
||||
"""Resolve the access policy for a SessionSource.
|
||||
|
||||
Returns a "disabled" policy (gating off, allow everything) when:
|
||||
- gateway_config is None
|
||||
- the platform has no PlatformConfig
|
||||
- the platform's PlatformConfig has no admin list set for the scope
|
||||
|
||||
Callers should treat the returned policy as authoritative for slash
|
||||
command gating only. It does not gate plain chat messages.
|
||||
"""
|
||||
if gateway_config is None or source is None:
|
||||
return SlashAccessPolicy(
|
||||
enabled=False,
|
||||
admin_user_ids=frozenset(),
|
||||
user_allowed_commands=frozenset(),
|
||||
)
|
||||
platforms = getattr(gateway_config, "platforms", None)
|
||||
platform_config = None
|
||||
if platforms is not None:
|
||||
try:
|
||||
platform_config = platforms.get(source.platform)
|
||||
except Exception:
|
||||
platform_config = None
|
||||
extra = _platform_extra(platform_config)
|
||||
scope = _scope_for_chat_type(getattr(source, "chat_type", None))
|
||||
return policy_from_extra(extra, scope)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SlashAccessPolicy",
|
||||
"policy_from_extra",
|
||||
"policy_for_source",
|
||||
]
|
||||
@@ -1,155 +0,0 @@
|
||||
"""Shared helpers for canonicalising WhatsApp sender identity.
|
||||
|
||||
WhatsApp's bridge can surface the same human under two different JID shapes
|
||||
within a single conversation:
|
||||
|
||||
- LID form: ``999999999999999@lid``
|
||||
- Phone form: ``15551234567@s.whatsapp.net``
|
||||
|
||||
Both the authorisation path (:mod:`gateway.run`) and the session-key path
|
||||
(:mod:`gateway.session`) need to collapse these aliases to a single stable
|
||||
identity. This module is the single source of truth for that resolution so
|
||||
the two paths can never drift apart.
|
||||
|
||||
Public helpers:
|
||||
|
||||
- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
|
||||
down to the bare numeric identifier.
|
||||
- :func:`canonical_whatsapp_identifier` — walk the bridge's
|
||||
``lid-mapping-*.json`` files and return a stable canonical identity
|
||||
across phone/LID variants.
|
||||
- :func:`expand_whatsapp_aliases` — return the full alias set for an
|
||||
identifier. Used by authorisation code that needs to match any known
|
||||
form of a sender against an allow-list.
|
||||
|
||||
Plugins that need per-sender behaviour on WhatsApp (role-based routing,
|
||||
per-contact authorisation, policy gating in a gateway hook) should use
|
||||
``canonical_whatsapp_identifier`` so their bookkeeping lines up with
|
||||
Hermes' own session keys.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Set
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional
|
||||
# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so
|
||||
# full-width digits / Unicode word chars can't sneak through.
|
||||
_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$")
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
def normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||
|
||||
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||
``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
|
||||
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||
equality comparisons.
|
||||
|
||||
Useful for plugins that want to match sender IDs against
|
||||
user-supplied config (phone numbers in ``config.yaml``) without
|
||||
worrying about which variant the bridge happens to deliver.
|
||||
"""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
|
||||
|
||||
def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
||||
"""Resolve WhatsApp phone/LID aliases via bridge session mapping files.
|
||||
|
||||
Returns the set of all identifiers transitively reachable through the
|
||||
bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
|
||||
starting from ``identifier``. The result always includes the
|
||||
normalized input itself, so callers can safely ``in`` check against
|
||||
the return value without a separate fallback branch.
|
||||
|
||||
Returns an empty set if ``identifier`` normalizes to empty.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||
resolved: Set[str] = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
# Defense-in-depth: reject identifiers that could sneak path
|
||||
# separators / traversal segments into the ``lid-mapping-{current}``
|
||||
# filename below. The hardcoded ``lid-mapping-`` prefix already
|
||||
# prevents escape via pathlib's component split (an attacker can't
|
||||
# create ``lid-mapping-..`` as a real directory in session_dir), but
|
||||
# this keeps the identifier space to the characters WhatsApp JIDs
|
||||
# actually use and avoids depending on that filesystem-layout
|
||||
# invariant.
|
||||
if not _SAFE_IDENTIFIER_RE.match(current):
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
logger.debug("whatsapp_identity: failed to read %s: %s", mapping_path, exc)
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||
|
||||
WhatsApp may surface the same person under either a phone-format JID
|
||||
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||
member inside a group chat — both represent a user identity, and the
|
||||
bridge may flip between the two for the same human.
|
||||
|
||||
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||
files, walks the mapping transitively, and picks the shortest
|
||||
(numeric-preferred) alias as the canonical identity.
|
||||
:func:`gateway.session.build_session_key` uses this for both WhatsApp
|
||||
DM chat_ids and WhatsApp group participant_ids, so callers get the
|
||||
same session-key identity Hermes itself uses.
|
||||
|
||||
Plugins that need per-sender behaviour (role-based routing,
|
||||
authorisation, per-contact policy) should use this so their
|
||||
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||
the bridge reshuffles aliases.
|
||||
|
||||
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||
mapping files exist yet (fresh bridge install), returns the
|
||||
normalized input unchanged.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
# expand_whatsapp_aliases always includes `normalized` itself in the
|
||||
# returned set, so the min() below degrades gracefully to `normalized`
|
||||
# when no lid-mapping files are present.
|
||||
aliases = expand_whatsapp_aliases(normalized)
|
||||
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||
2
hermes
2
hermes
@@ -7,5 +7,5 @@ subcommands such as `gateway`, `cron`, and `doctor`.
|
||||
"""
|
||||
|
||||
if __name__ == "__main__":
|
||||
from hermes_cli.main import main
|
||||
from hermes_agent.cli.main import main
|
||||
main()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user