Compare commits

..

1 Commits

Author SHA1 Message Date
alt-glitch e5492dc002 chore(docs): remove stale documentation files
Remove outdated docs that no longer reflect the current architecture:
ACP setup guide, Honcho integration spec, OpenClaw migration notes,
pricing architecture design, ink-gateway TUI migration plan,
example skin config, and container CLI review fixes.
2026-04-20 01:34:30 +05:30
1363 changed files with 20393 additions and 223223 deletions
-3
View File
@@ -14,6 +14,3 @@ node_modules
.env
*.md
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
data/
-8
View File
@@ -1,8 +0,0 @@
name: 'Setup Nix'
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
runs:
using: composite
steps:
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
-3
View File
@@ -53,9 +53,6 @@ jobs:
- name: Extract skill metadata for dashboard
run: python3 website/scripts/extract-skills.py
- name: Regenerate per-skill docs pages + catalogs
run: python3 website/scripts/generate-skill-docs.py
- name: Build skills index (if not already present)
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+2 -15
View File
@@ -3,13 +3,8 @@ name: Docker Build and Publish
on:
push:
branches: [main]
paths:
- '**/*.py'
- 'pyproject.toml'
- 'uv.lock'
- 'Dockerfile'
- 'docker/**'
- '.github/workflows/docker-publish.yml'
pull_request:
branches: [main]
release:
types: [published]
@@ -54,14 +49,6 @@ jobs:
- name: Test image starts
run: |
# The image runs as the hermes user (UID 10000). GitHub Actions
# creates /tmp/hermes-test root-owned by default, which hermes
# can't write to — chown it to match the in-container UID before
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
# with their own UID hit the same issue and have their own
# remediations (HERMES_UID env var, or chown locally).
mkdir -p /tmp/hermes-test
sudo chown -R 10000:10000 /tmp/hermes-test
docker run --rm \
-v /tmp/hermes-test:/opt/data \
--entrypoint /opt/hermes/docker/entrypoint.sh \
-3
View File
@@ -36,9 +36,6 @@ jobs:
- name: Extract skill metadata for dashboard
run: python3 website/scripts/extract-skills.py
- name: Regenerate per-skill docs pages + catalogs
run: python3 website/scripts/generate-skill-docs.py
- name: Lint docs diagrams
run: npm run lint:diagrams
working-directory: website
-68
View File
@@ -1,68 +0,0 @@
name: Nix Lockfile Check
on:
pull_request:
workflow_dispatch:
permissions:
contents: read
pull-requests: write
concurrency:
group: nix-lockfile-check-${{ github.ref }}
cancel-in-progress: true
jobs:
check:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: ./.github/actions/nix-setup
- name: Resolve head SHA
id: sha
shell: bash
run: |
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
echo "full=$FULL" >> "$GITHUB_OUTPUT"
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
- name: Check lockfile hashes
id: check
continue-on-error: true
env:
LINK_SHA: ${{ steps.sha.outputs.full }}
run: nix run .#fix-lockfiles -- --check
- name: Post sticky PR comment (stale)
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
message: |
### ⚠️ npm lockfile hash out of date
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
${{ steps.check.outputs.report }}
#### Apply the fix
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
- name: Clear sticky PR comment (resolved)
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
delete: true
- name: Fail if stale
if: steps.check.outputs.stale == 'true'
run: exit 1
-149
View File
@@ -1,149 +0,0 @@
name: Nix Lockfile Fix
on:
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to fix (leave empty to run on the selected branch)'
required: false
type: string
issue_comment:
types: [edited]
permissions:
contents: write
pull-requests: write
concurrency:
group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
cancel-in-progress: false
jobs:
fix:
# Run on manual dispatch OR when a task-list checkbox in the sticky
# lockfile-check comment flips from `[ ]` to `[x]`.
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'issue_comment'
&& github.event.issue.pull_request != null
&& contains(github.event.comment.body, '[x] **Apply lockfile fix**')
&& !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
runs-on: ubuntu-latest
timeout-minutes: 25
steps:
- name: Authorize & resolve PR
id: resolve
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
// 1. Verify the actor has write access — applies to both checkbox
// clicks and manual dispatch.
const { data: perm } =
await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: context.actor,
});
if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
core.setFailed(
`${context.actor} lacks write access (has: ${perm.permission})`
);
return;
}
// 2. Resolve which ref to check out.
let prNumber = '';
if (context.eventName === 'issue_comment') {
prNumber = String(context.payload.issue.number);
} else if (context.eventName === 'workflow_dispatch') {
prNumber = context.payload.inputs.pr_number || '';
}
if (!prNumber) {
core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
core.setOutput('repo', context.repo.repo);
core.setOutput('owner', context.repo.owner);
core.setOutput('pr', '');
return;
}
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: Number(prNumber),
});
core.setOutput('ref', pr.head.ref);
core.setOutput('repo', pr.head.repo.name);
core.setOutput('owner', pr.head.repo.owner.login);
core.setOutput('pr', String(pr.number));
# Wipe the sticky lockfile-check comment to a "running" state as soon
# as the job is authorized, so the user sees their click was picked up
# before the ~minute of nix build work.
- name: Mark sticky as running
if: steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### 🔄 Applying lockfile fix…
Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
ref: ${{ steps.resolve.outputs.ref }}
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0
- uses: ./.github/actions/nix-setup
- name: Apply lockfile hashes
id: apply
run: nix run .#fix-lockfiles -- --apply
- name: Commit & push
if: steps.apply.outputs.changed == 'true'
shell: bash
run: |
set -euo pipefail
git config user.name 'github-actions[bot]'
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
git add nix/tui.nix nix/web.nix
git commit -m "fix(nix): refresh npm lockfile hashes"
git push
- name: Update sticky (applied)
if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ✅ Lockfile fix applied
Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- name: Update sticky (already current)
if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ✅ Lockfile hashes already current
Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- name: Update sticky (failed)
if: failure() && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ❌ Lockfile fix failed
See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
+12 -2
View File
@@ -4,6 +4,15 @@ on:
push:
branches: [main]
pull_request:
paths:
- 'flake.nix'
- 'flake.lock'
- 'nix/**'
- 'pyproject.toml'
- 'uv.lock'
- 'hermes_cli/**'
- 'run_agent.py'
- 'acp_adapter/**'
permissions:
contents: read
@@ -20,8 +29,9 @@ jobs:
runs-on: ${{ matrix.os }}
timeout-minutes: 30
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: ./.github/actions/nix-setup
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
- name: Check flake
if: runner.os == 'Linux'
run: nix flake check --print-build-logs
+146 -37
View File
@@ -3,31 +3,14 @@ name: Supply Chain Audit
on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- '**/*.py'
- '**/*.pth'
- '**/setup.py'
- '**/setup.cfg'
- '**/sitecustomize.py'
- '**/usercustomize.py'
- '**/__init__.pth'
permissions:
pull-requests: write
contents: read
# Narrow, high-signal scanner. Only fires on critical indicators of supply
# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
# Actions version unpinning, outbound POST/PUT) were intentionally
# removed — they fired on nearly every PR and trained reviewers to ignore
# the scanner. Keep this file's checks ruthlessly narrow: if you find
# yourself adding WARNING-tier patterns here again, make a separate
# advisory-only workflow instead.
jobs:
scan:
name: Scan PR for critical supply chain risks
name: Scan PR for supply chain risks
runs-on: ubuntu-latest
steps:
- name: Checkout
@@ -35,7 +18,7 @@ jobs:
with:
fetch-depth: 0
- name: Scan diff for critical patterns
- name: Scan diff for suspicious patterns
id: scan
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -45,19 +28,19 @@ jobs:
BASE="${{ github.event.pull_request.base.sha }}"
HEAD="${{ github.event.pull_request.head.sha }}"
# Added lines only, excluding lockfiles.
# Get the full diff (added lines only)
DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
FINDINGS=""
CRITICAL=false
# --- .pth files (auto-execute on Python startup) ---
# The exact mechanism used in the litellm supply chain attack:
# https://github.com/BerriAI/litellm/issues/24512
PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
if [ -n "$PTH_FILES" ]; then
CRITICAL=true
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: .pth file added or modified
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required.
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512).
**Files:**
\`\`\`
@@ -66,12 +49,13 @@ jobs:
"
fi
# --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
# --- base64 + exec/eval combo (the litellm attack pattern) ---
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
if [ -n "$B64_EXEC_HITS" ]; then
CRITICAL=true
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: base64 decode + exec/eval combo
Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads.
This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads.
**Matches:**
\`\`\`
@@ -80,12 +64,41 @@ jobs:
"
fi
# --- subprocess with encoded/obfuscated command argument ---
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
# --- base64 decode/encode (alone — legitimate uses exist) ---
B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true)
if [ -n "$B64_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: base64 encoding/decoding detected
Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate.
**Matches (first 20):**
\`\`\`
${B64_HITS}
\`\`\`
"
fi
# --- exec/eval with string arguments ---
EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true)
if [ -n "$EXEC_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: exec() or eval() usage
Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches.
**Matches (first 20):**
\`\`\`
${EXEC_HITS}
\`\`\`
"
fi
# --- subprocess with encoded/obfuscated commands ---
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true)
if [ -n "$PROC_HITS" ]; then
CRITICAL=true
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: subprocess with encoded/obfuscated command
Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution.
Subprocess calls with encoded arguments are a strong indicator of payload execution.
**Matches:**
\`\`\`
@@ -94,12 +107,25 @@ jobs:
"
fi
# --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
# These execute during pip install or interpreter startup.
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
# --- Network calls to non-standard domains ---
EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true)
if [ -n "$EXFIL_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: Outbound network calls (POST/PUT)
Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate.
**Matches (first 10):**
\`\`\`
${EXFIL_HITS}
\`\`\`
"
fi
# --- setup.py / setup.cfg install hooks ---
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true)
if [ -n "$SETUP_HITS" ]; then
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: Install-hook file added or modified
### ⚠️ WARNING: Install hook files modified
These files can execute code during package installation or interpreter startup.
**Files:**
@@ -109,31 +135,114 @@ jobs:
"
fi
# --- Compile/marshal/pickle (code object injection) ---
MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true)
if [ -n "$MARSHAL_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: marshal/pickle/compile usage
These can deserialize or construct executable code objects.
**Matches:**
\`\`\`
${MARSHAL_HITS}
\`\`\`
"
fi
# --- CI/CD workflow files modified ---
WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true)
if [ -n "$WORKFLOW_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: CI/CD workflow files modified
Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added.
**Files:**
\`\`\`
${WORKFLOW_HITS}
\`\`\`
"
fi
# --- Dockerfile / container build files modified ---
DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true)
if [ -n "$DOCKER_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: Container build files modified
Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands.
**Files:**
\`\`\`
${DOCKER_HITS}
\`\`\`
"
fi
# --- Dependency manifest files modified ---
DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true)
if [ -n "$DEP_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: Dependency manifest files modified
Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources.
**Files:**
\`\`\`
${DEP_HITS}
\`\`\`
"
fi
# --- GitHub Actions version unpinning (mutable tags instead of SHAs) ---
ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true)
if [ -n "$ACTIONS_UNPIN" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: GitHub Actions with mutable version tags
Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised.
**Matches:**
\`\`\`
${ACTIONS_UNPIN}
\`\`\`
"
fi
# --- Output results ---
if [ -n "$FINDINGS" ]; then
echo "found=true" >> "$GITHUB_OUTPUT"
if [ "$CRITICAL" = true ]; then
echo "critical=true" >> "$GITHUB_OUTPUT"
else
echo "critical=false" >> "$GITHUB_OUTPUT"
fi
# Write findings to a file (multiline env vars are fragile)
echo "$FINDINGS" > /tmp/findings.md
else
echo "found=false" >> "$GITHUB_OUTPUT"
echo "critical=false" >> "$GITHUB_OUTPUT"
fi
- name: Post critical finding comment
- name: Post warning comment
if: steps.scan.outputs.found == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
BODY="## 🚨 CRITICAL Supply Chain Risk Detected
SEVERITY="⚠️ Supply Chain Risk Detected"
if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
SEVERITY="🚨 CRITICAL Supply Chain Risk Detected"
fi
This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging.
BODY="## ${SEVERITY}
This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging.
$(cat /tmp/findings.md)
---
*Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*"
*Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"
- name: Fail on critical findings
if: steps.scan.outputs.found == 'true'
if: steps.scan.outputs.critical == 'true'
run: |
echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
exit 1
+1 -7
View File
@@ -3,14 +3,8 @@ name: Tests
on:
push:
branches: [main]
paths-ignore:
- '**/*.md'
- 'docs/**'
pull_request:
branches: [main]
paths-ignore:
- '**/*.md'
- 'docs/**'
permissions:
contents: read
@@ -23,7 +17,7 @@ concurrency:
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 20
timeout-minutes: 10
steps:
- name: Checkout code
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-7
View File
@@ -1,4 +1,3 @@
.DS_Store
/venv/
/_pycache/
*.pyc*
@@ -55,11 +54,6 @@ environments/benchmarks/evals/
# Web UI build output
hermes_cli/web_dist/
# Web UI assets — synced from @nous-research/ui at build time via
# `npm run sync-assets` (see web/package.json).
web/public/fonts/
web/public/ds-assets/
# Release script temp files
.release_notes.md
mini-swe-agent/
@@ -69,4 +63,3 @@ mini-swe-agent/
.nix-stamps/
result
website/static/api/skills-index.json
models-dev-upstream/
+77 -273
View File
@@ -5,61 +5,78 @@ Instructions for AI coding assistants and developers working on the hermes-agent
## Development Environment
```bash
# Prefer .venv; fall back to venv if that's what your checkout has.
source .venv/bin/activate # or: source venv/bin/activate
source venv/bin/activate # ALWAYS activate before running Python
```
`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
main checkout).
## Project Structure
File counts shift constantly — don't treat the tree below as exhaustive.
The canonical source is the filesystem. The notes call out the load-bearing
entry points you'll actually edit.
```
hermes-agent/
├── run_agent.py # AIAgent class — core conversation loop (~12k LOC)
├── run_agent.py # AIAgent class — core conversation loop
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC)
├── cli.py # HermesCLI class — interactive CLI orchestrator
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths
├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
├── batch_runner.py # Parallel batch processing
├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.)
├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine
├── tools/ # Tool implementations — auto-discovered via tools/registry.py
├── agent/ # Agent internals
│ ├── prompt_builder.py # System prompt assembly
│ ├── context_compressor.py # Auto context compression
│ ├── prompt_caching.py # Anthropic prompt caching
│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization)
│ ├── model_metadata.py # Model context lengths, token estimation
│ ├── models_dev.py # models.dev registry integration (provider-aware context)
│ ├── display.py # KawaiiSpinner, tool preview formatting
│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway)
│ └── trajectory.py # Trajectory saving helpers
├── hermes_cli/ # CLI subcommands and setup
│ ├── main.py # Entry point — all `hermes` subcommands
│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
│ ├── commands.py # Slash command definitions + SlashCommandCompleter
│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval)
│ ├── setup.py # Interactive setup wizard
│ ├── skin_engine.py # Skin/theme engine — CLI visual customization
│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform
│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform
│ ├── skills_hub.py # `/skills` slash command (search, browse, install)
│ ├── models.py # Model catalog, provider model lists
│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway)
│ └── auth.py # Provider credential resolution
├── tools/ # Tool implementations (one file per tool)
│ ├── registry.py # Central tool registry (schemas, handlers, dispatch)
│ ├── approval.py # Dangerous command detection
│ ├── terminal_tool.py # Terminal orchestration
│ ├── process_registry.py # Background process management
│ ├── file_tools.py # File read/write/search/patch
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
│ ├── browser_tool.py # Browserbase browser automation
│ ├── code_execution_tool.py # execute_code sandbox
│ ├── delegate_tool.py # Subagent delegation
│ ├── mcp_tool.py # MCP client (~1050 lines)
│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity)
├── gateway/ # Messaging gateway — run.py + session.py + platforms/
│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp,
│ # homeassistant, signal, matrix, mattermost, email, sms,
│ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
├── plugins/ # Plugin system (see "Plugins" section below)
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
│ ├── context_engine/ # Context-engine plugins
│ └── <others>/ # Dashboard, image-gen, disk-cleanup, examples, ...
├── optional-skills/ # Heavier/niche skills shipped but NOT active by default
├── skills/ # Built-in skills bundled with the repo
├── gateway/ # Messaging platform gateway
│ ├── run.py # Main loop, slash commands, message dispatch
├── session.py # SessionStore — conversation persistence
└── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
── src/entry.tsx # TTY gate + render()
│ ├── src/app.tsx # Main state machine and UI
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
├── tui_gateway/ # Python JSON-RPC backend for the TUI
│ ├── entry.py # stdio entrypoint
│ ├── server.py # RPC handlers and session logic
│ ├── render.py # Optional rich/ANSI bridge
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
├── cron/ # Scheduler jobs.py, scheduler.py
├── cron/ # Scheduler (jobs.py, scheduler.py)
├── environments/ # RL training environments (Atropos)
├── scripts/ # run_tests.sh, release.py, auxiliary scripts
── website/ # Docusaurus docs site
└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026)
├── tests/ # Pytest suite (~3000 tests)
── batch_runner.py # Parallel batch processing
```
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
**Logs:** `~/.hermes/logs/``agent.log` (INFO+), `errors.log` (WARNING+),
`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
## File Dependency Chain
@@ -77,30 +94,20 @@ run_agent.py, cli.py, batch_runner.py, environments/
## AIAgent Class (run_agent.py)
The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
session context, budget, credential pool, etc.). The signature below is the
minimum subset you'll usually touch — read `run_agent.py` for the full list.
```python
class AIAgent:
def __init__(self,
base_url: str = None,
api_key: str = None,
provider: str = None,
api_mode: str = None, # "chat_completions" | "codex_responses" | ...
model: str = "", # empty → resolved from config/provider later
max_iterations: int = 90, # tool-calling iterations (shared with subagents)
model: str = "anthropic/claude-opus-4.6",
max_iterations: int = 90,
enabled_toolsets: list = None,
disabled_toolsets: list = None,
quiet_mode: bool = False,
save_trajectories: bool = False,
platform: str = None, # "cli", "telegram", etc.
platform: str = None, # "cli", "telegram", etc.
session_id: str = None,
skip_context_files: bool = False,
skip_memory: bool = False,
credential_pool=None,
# ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
# checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
# ... plus provider, api_mode, callbacks, routing params
): ...
def chat(self, message: str) -> str:
@@ -113,13 +120,10 @@ class AIAgent:
### Agent Loop
The core loop is inside `run_conversation()` — entirely synchronous, with
interrupt checks, budget tracking, and a one-turn grace call:
The core loop is inside `run_conversation()` — entirely synchronous:
```python
while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
or self._budget_grace_call:
if self._interrupt_requested: break
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
if response.tool_calls:
for tool_call in response.tool_calls:
@@ -130,8 +134,7 @@ while (api_call_count < self.max_iterations and self.iteration_budget.remaining
return response.content
```
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
Reasoning content is stored in `assistant_msg["reasoning"]`.
Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
---
@@ -240,19 +243,6 @@ npm run fmt # prettier
npm test # vitest
```
### TUI in the Dashboard (`hermes dashboard` → `/chat`)
The dashboard embeds the real `hermes --tui`**not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
---
## Adding New Tools
@@ -290,7 +280,7 @@ The registry handles schema collection, dispatch, availability checking, and err
**State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
---
@@ -298,13 +288,9 @@ The registry handles schema collection, dispatch, availability checking, and err
### config.yaml options:
1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
ONLY if you need to actively migrate/transform existing user config
(renaming keys, changing structure). Adding a new key to an existing
section is handled automatically by the deep-merge and does NOT require
a version bump.
2. Bump `_config_version` (currently 5) to trigger migration for existing users
### .env variables (SECRETS ONLY — API keys, tokens, passwords):
### .env variables:
1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
```python
"NEW_API_KEY": {
@@ -316,29 +302,13 @@ The registry handles schema collection, dispatch, availability checking, and err
},
```
Non-secret settings (timeouts, thresholds, feature flags, paths, display
preferences) belong in `config.yaml`, not `.env`. If internal code needs an
env var mirror for backward compatibility, bridge it from `config.yaml` to
the env var in code (see `gateway_timeout`, `terminal.cwd``TERMINAL_CWD`).
### Config loaders (three paths — know which one you're in):
### Config loaders (two separate systems):
| Loader | Used by | Location |
|--------|---------|----------|
| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
If you add a new key and the CLI sees it but the gateway doesn't (or vice
versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
### Working directory:
- **CLI** — uses the process's current directory (`os.getcwd()`).
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
removed** — the config loader prints a deprecation warning if it's set in
`.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
`terminal.cwd` in `config.yaml`.
| `load_cli_config()` | CLI mode | `cli.py` |
| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
| Direct YAML load | Gateway | `gateway/run.py` |
---
@@ -431,95 +401,7 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
---
## Plugins
Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
repo-shipped plugins can be discovered alongside user-installed ones in
`~/.hermes/plugins/` and pip-installed entry points.
### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
and pip entry points. Each plugin exposes a `register(ctx)` function that
can:
- Register Python-callback lifecycle hooks:
`pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
`on_session_start`, `on_session_end`
- Register new tools via `ctx.register_tool(...)`
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
plugin's argparse tree is wired into `hermes` at startup so
`hermes <pluginname> <subcmd>` works with no change to `main.py`
Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
as a side effect of importing `model_tools.py`. Code paths that read plugin
state without importing `model_tools.py` first must call `discover_plugins()`
explicitly (it's idempotent).
### Memory-provider plugins (`plugins/memory/<name>/`)
Separate discovery system for pluggable memory backends. Current built-in
providers include **honcho, mem0, supermemory, byterover, hindsight,
holographic, openviking, retaindb**.
Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
`post_setup(hermes_home, config)` for setup-wizard integration.
**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
it at argparse setup time and wires it into `hermes <plugin>`. The
framework only exposes CLI commands for the **currently active** memory
provider (read from `memory.provider` in config.yaml), so disabled
providers don't clutter `hermes --help`.
**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
If a plugin needs a capability the framework doesn't expose, expand the
generic plugin surface (new hook, new ctx method) — never hardcode
plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
honcho argparse from `main.py` for exactly this reason.
### Dashboard / context-engine / image-gen plugin directories
`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
Context engines plug into `agent/context_engine.py`; image-gen providers
into `agent/image_gen_provider.py`.
---
## Skills
Two parallel surfaces:
- **`skills/`** — built-in skills shipped and loadable by default.
Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
NOT active by default. Installed explicitly via
`hermes skills install official/<category>/<skill>`. Adapter lives in
`tools/skills_hub.py` (`OptionalSkillSource`). Categories include
`autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
`devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
`research`, `security`, `web-development`.
When reviewing skill PRs, check which directory they target — heavy-dep or
niche skills belong in `optional-skills/`.
### SKILL.md frontmatter
Standard fields: `name`, `description`, `version`, `platforms`
(OS-gating list: `[macos]`, `[linux, macos]`, ...),
`metadata.hermes.tags`, `metadata.hermes.category`,
`metadata.hermes.config` (config.yaml settings the skill needs — stored
under `skills.config.<key>`, prompted during setup, injected at load time).
---
## Important Policies
### Prompt Caching Must Not Break
Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
@@ -529,10 +411,9 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i
Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.
Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
must be **cache-aware**: default to deferred invalidation (change takes
effect next session), with an opt-in `--now` flag for immediate
invalidation. See `/skills install --now` for the canonical pattern.
### Working Directory Behavior
- **CLI**: Uses current directory (`.``os.getcwd()`)
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
### Background Process Notifications (Gateway)
@@ -554,7 +435,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
`HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).
The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
`HERMES_HOME` before any module imports. All `get_hermes_home()` references
`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
automatically scope to the active profile.
### Rules for profile-safe code
@@ -611,12 +492,8 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.
### DO NOT introduce new `simple_term_menu` usage
Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
the preferred UI is curses (stdlib) because `simple_term_menu` has
ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
interactive menus must use `hermes_cli/curses_ui.py` — see
`hermes_cli/tools_config.py` for the canonical pattern.
### DO NOT use `simple_term_menu` for interactive menus
Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
@@ -627,30 +504,6 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
### DO NOT hardcode cross-tool references in schema descriptions
Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.
### The gateway has TWO message guards — both must bypass approval/control commands
When an agent is running, messages pass through two sequential guards:
(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
`_pending_messages` when `session_key in self._active_sessions`, and
(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
`/queue`, `/status`, `/approve`, `/deny` before they reach
`running_agent.interrupt()`. Any new command that must reach the runner
while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
guards and be dispatched inline, not via `_process_message_background()`
(which races session lifecycle).
### Squash merges from stale branches silently revert recent fixes
Before squash-merging a PR, ensure the branch is up to date with `main`
(`git fetch origin main && git reset --hard origin/main` in the worktree,
then re-apply the PR's commits). A stale branch's version of an unrelated
file will silently overwrite recent fixes on main when squashed. Verify
with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
red flag.
### Don't wire in dead code without E2E validation
Unused code that was never shipped was dead for a reason. Before wiring an
unused module into a live code path, E2E test the real resolution chain
with actual imports (not mocks) against a temp `HERMES_HOME`.
### Tests must not write to `~/.hermes/`
The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.
@@ -706,59 +559,10 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
pytest directly), at minimum activate the venv and pass `-n 4`:
```bash
source .venv/bin/activate # or: source venv/bin/activate
source venv/bin/activate
python -m pytest tests/ -q -n 4
```
Worker count above 4 will surface test-ordering flakes that CI never sees.
Always run the full suite before pushing changes.
### Don't write change-detector tests
A test is a **change-detector** if it fails whenever data that is **expected
to change** gets updated — model catalogs, config version numbers,
enumeration counts, hardcoded lists of provider models. These tests add no
behavioral coverage; they just guarantee that routine source updates break
CI and cost engineering time to "fix."
**Do not write:**
```python
# catalog snapshot — breaks every model release
assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
assert "MiniMax-M2.7" in models
# config version literal — breaks every schema bump
assert DEFAULT_CONFIG["_config_version"] == 21
# enumeration count — breaks every time a skill/provider is added
assert len(_PROVIDER_MODELS["huggingface"]) == 8
```
**Do write:**
```python
# behavior: does the catalog plumbing work at all?
assert "gemini" in _PROVIDER_MODELS
assert len(_PROVIDER_MODELS["gemini"]) >= 1
# behavior: does migration bump the user's version to current latest?
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
# invariant: no plan-only model leaks into the legacy list
assert not (set(moonshot_models) & coding_plan_only_models)
# invariant: every model in the catalog has a context-length entry
for m in _PROVIDER_MODELS["huggingface"]:
assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
```
The rule: if the test reads like a snapshot of current data, delete it. If
it reads like a contract about how two pieces of data must relate, keep it.
When a PR adds a new provider/model and you want a test, make the test
assert the relationship (e.g. "catalog entries all have context lengths"),
not the specific names.
Reviewers should reject new change-detector tests; authors should convert
them into invariants before re-requesting review.
+6 -6
View File
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
We value contributions in this order:
1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
2. **Cross-platform compatibility** Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
4. **Performance and robustness** — retry logic, error handling, graceful degradation.
5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite
| Requirement | Notes |
|-------------|-------|
| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
| **Git** | With `--recurse-submodules` support |
| **Python 3.11+** | uv will install it if missing |
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
### Clone and install
@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
touch ~/.hermes/.env
# Add at minimum an LLM provider key:
echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
```
### Run
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl
## Cross-Platform Compatibility
Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
### Critical rules
@@ -597,7 +597,7 @@ refactor/description # Code restructuring
1. **Run tests**: `pytest tests/ -v`
2. **Test manually**: Run `hermes` and exercise the code path you changed
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
### PR description
+8 -19
View File
@@ -10,11 +10,9 @@ ENV PYTHONUNBUFFERED=1
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
# Install system dependencies in one layer, clear APT cache
# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
rm -rf /var/lib/apt/lists/*
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -29,39 +27,30 @@ WORKDIR /opt/hermes
# Copy only package manifests first so npm install + Playwright are cached
# unless the lockfiles themselves change.
COPY package.json package-lock.json ./
COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
COPY web/package.json web/package-lock.json web/
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/
RUN npm install --prefer-offline --no-audit && \
npx playwright install --with-deps chromium --only-shell && \
(cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
(cd web && npm install --prefer-offline --no-audit) && \
(cd ui-tui && npm install --prefer-offline --no-audit) && \
npm cache clean --force
# ---------- Source code ----------
# .dockerignore excludes node_modules, so the installs above survive.
COPY --chown=hermes:hermes . .
# Build browser dashboard and terminal UI assets.
RUN cd web && npm run build && \
cd ../ui-tui && npm run build
# ---------- Permissions ----------
# Make install dir world-readable so any HERMES_UID can read it at runtime.
# The venv needs to be traversable too.
USER root
RUN chmod -R a+rX /opt/hermes
# Start as root so the entrypoint can usermod/groupmod + gosu.
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
RUN cd web && npm run build
# ---------- Python virtualenv ----------
RUN chown hermes:hermes /opt/hermes
USER hermes
RUN uv venv && \
uv pip install --no-cache-dir -e ".[all]"
# ---------- Runtime ----------
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
ENV HERMES_HOME=/opt/data
ENV PATH="/opt/data/.local/bin:${PATH}"
VOLUME [ "/opt/data" ]
ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
+8 -3
View File
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
| Set a personality | `/personality [name]` | `/personality [name]` |
| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
| Platform-specific status | `/platforms` | `/status`, `/sethome` |
@@ -157,10 +157,14 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv venv --python 3.11
source venv/bin/activate
uv pip install -e ".[all,dev]"
scripts/run_tests.sh
python -m pytest tests/ -q
```
> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
> ```bash
> git submodule update --init tinker-atropos
> uv pip install -e "./tinker-atropos"
> ```
---
@@ -169,6 +173,7 @@ scripts/run_tests.sh
- 💬 [Discord](https://discord.gg/NousResearch)
- 📚 [Skills Hub](https://agentskills.io)
- 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
---
-453
View File
@@ -1,453 +0,0 @@
# Hermes Agent v0.11.0 (v2026.4.23)
**Release Date:** April 23, 2026
**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
---
## ✨ Highlights
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
---
## 🏗️ Core Agent & Architecture
### Transport Layer (NEW)
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
### Provider & Model Support
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
### Agent Loop & Conversation
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
### Session & Memory
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
---
## 🖥️ New Ink-based TUI
A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
### TUI Foundations
- New TUI based on Ink + Python JSON-RPC backend
- Prettier + ESLint + vitest tooling for `ui-tui/`
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
- Persistent `_SlashWorker` subprocess for slash command dispatch
### UX & Features
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
- Sticky composer that freezes during scroll
- OSC-52 clipboard support for copy across SSH sessions
- Virtualized history rendering for performance
- Slash command autocomplete via `complete.slash` RPC
- Path autocomplete via `complete.path` RPC
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
### Structural Refactors
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
---
## 📱 Messaging Platforms (Gateway)
### New Platforms
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
### Telegram
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
- Fix: Markdown special char escaping in `send_exec_approval`
- Fix: parentheses in URLs during MarkdownV2 link conversion
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
- Many platform hint / streaming / session-key fixes
### Discord
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
### Feishu
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
### DingTalk
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
### WhatsApp
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
### WeCom / Weixin
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
### Signal
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
### Slack
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
### BlueBubbles (iMessage)
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
### Gateway Core
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
- Fix: unlink stale PID + lock files on cleanup
- Fix: force-unlink stale PID file after `--replace` takeover
---
## 🔧 Tool System
### Plugin Surface (major expansion)
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
### Browser
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
- Camofox hardening + connection stability across the window
### Execute Code
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
### Image Generation
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
### TTS / STT / Voice
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
### Webhook / Cron
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
### Delegate
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
### File / Patch
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
### API Server
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
### Docker / Podman
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
### MCP
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
---
## 🧩 Skills Ecosystem
### Skill System
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
### New Skills
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
---
## 📊 Web Dashboard
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
---
## 🖱️ CLI & User Experience
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
---
## 🔒 Security & Reliability
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
---
## 🐛 Notable Bug Fixes
The `fix:` category in this window covers 482 PRs. Highlights:
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
- Doctor checks for Kimi China credentials fixed
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
- Rapid Telegram follow-ups no longer get cut off
---
## 🧪 Testing & CI
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
---
## 📚 Documentation
- Atropos + wandb links in user guide
- ACP / VS Code / Zed / JetBrains integration docs refresh
- Webhook subscription docs updated for direct-delivery mode
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
- Transport layer developer guide added
- Website removed Discussions link from README
---
## 👥 Contributors
### Core
- **@teknium1** (Teknium)
### Top Community Contributors (by merged PR count)
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
- **@ethernet8023** — 3 PRs
- **@benbarclay** — 3 PRs
- **@Aslaaen** — 2 PRs
### Also contributing
@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
### All Contributors (alphabetical)
@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
---
**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
-632
View File
@@ -1,632 +0,0 @@
"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
backend. Each request starts a short-lived ACP session, sends the formatted
conversation as a single prompt, collects text chunks, and converts the result
back into the minimal shape Hermes expects from an OpenAI client.
"""
from __future__ import annotations
import json
import os
import queue
import re
import shlex
import subprocess
import threading
import time
from collections import deque
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
_TOOL_CALL_JSON_RE = re.compile(
r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}",
re.DOTALL,
)
def _resolve_command() -> str:
return (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
or os.getenv("COPILOT_CLI_PATH", "").strip()
or "copilot"
)
def _resolve_args() -> list[str]:
raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
if not raw:
return ["--acp", "--stdio"]
return shlex.split(raw)
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"error": {
"code": code,
"message": message,
},
}
def _permission_denied(message_id: Any) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "cancelled",
}
},
}
def _format_messages_as_prompt(
messages: list[dict[str, Any]],
model: str | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
) -> str:
sections: list[str] = [
"You are being used as the active ACP agent backend for Hermes.",
"Use ACP capabilities to complete tasks.",
"IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
"If no tool is needed, answer normally.",
]
if model:
sections.append(f"Hermes requested model hint: {model}")
if isinstance(tools, list) and tools:
tool_specs: list[dict[str, Any]] = []
for t in tools:
if not isinstance(t, dict):
continue
fn = t.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
continue
tool_specs.append(
{
"name": name.strip(),
"description": fn.get("description", ""),
"parameters": fn.get("parameters", {}),
}
)
if tool_specs:
sections.append(
"Available tools (OpenAI function schema). "
"When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
"containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
+ json.dumps(tool_specs, ensure_ascii=False)
)
if tool_choice is not None:
sections.append(
f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}"
)
transcript: list[str] = []
for message in messages:
if not isinstance(message, dict):
continue
role = str(message.get("role") or "unknown").strip().lower()
if role == "tool":
role = "tool"
elif role not in {"system", "user", "assistant"}:
role = "context"
content = message.get("content")
rendered = _render_message_content(content)
if not rendered:
continue
label = {
"system": "System",
"user": "User",
"assistant": "Assistant",
"tool": "Tool",
"context": "Context",
}.get(role, role.title())
transcript.append(f"{label}:\n{rendered}")
if transcript:
sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
sections.append("Continue the conversation from the latest user request.")
return "\n\n".join(
section.strip() for section in sections if section and section.strip()
)
def _render_message_content(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content.strip()
if isinstance(content, dict):
if "text" in content:
return str(content.get("text") or "").strip()
if "content" in content and isinstance(content.get("content"), str):
return str(content.get("content") or "").strip()
return json.dumps(content, ensure_ascii=True)
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str) and text.strip():
parts.append(text.strip())
return "\n".join(parts).strip()
return str(content).strip()
def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
if not isinstance(text, str) or not text.strip():
return [], ""
extracted: list[SimpleNamespace] = []
consumed_spans: list[tuple[int, int]] = []
def _try_add_tool_call(raw_json: str) -> None:
try:
obj = json.loads(raw_json)
except Exception:
return
if not isinstance(obj, dict):
return
fn = obj.get("function")
if not isinstance(fn, dict):
return
fn_name = fn.get("name")
if not isinstance(fn_name, str) or not fn_name.strip():
return
fn_args = fn.get("arguments", "{}")
if not isinstance(fn_args, str):
fn_args = json.dumps(fn_args, ensure_ascii=False)
call_id = obj.get("id")
if not isinstance(call_id, str) or not call_id.strip():
call_id = f"acp_call_{len(extracted) + 1}"
extracted.append(
SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=None,
type="function",
function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
)
)
for m in _TOOL_CALL_BLOCK_RE.finditer(text):
raw = m.group(1)
_try_add_tool_call(raw)
consumed_spans.append((m.start(), m.end()))
# Only try bare-JSON fallback when no XML blocks were found.
if not extracted:
for m in _TOOL_CALL_JSON_RE.finditer(text):
raw = m.group(0)
_try_add_tool_call(raw)
consumed_spans.append((m.start(), m.end()))
if not consumed_spans:
return extracted, text.strip()
consumed_spans.sort()
merged: list[tuple[int, int]] = []
for start, end in consumed_spans:
if not merged or start > merged[-1][1]:
merged.append((start, end))
else:
merged[-1] = (merged[-1][0], max(merged[-1][1], end))
parts: list[str] = []
cursor = 0
for start, end in merged:
if cursor < start:
parts.append(text[cursor:start])
cursor = max(cursor, end)
if cursor < len(text):
parts.append(text[cursor:])
cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
return extracted, cleaned
def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
candidate = Path(path_text)
if not candidate.is_absolute():
raise PermissionError("ACP file-system paths must be absolute.")
resolved = candidate.resolve()
root = Path(cwd).resolve()
try:
resolved.relative_to(root)
except ValueError as exc:
raise PermissionError(
f"Path '{resolved}' is outside the session cwd '{root}'."
) from exc
return resolved
class _ACPChatCompletions:
def __init__(self, client: CopilotACPClient):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _ACPChatNamespace:
def __init__(self, client: CopilotACPClient):
self.completions = _ACPChatCompletions(client)
class CopilotACPClient:
"""Minimal OpenAI-client-compatible facade for Copilot ACP."""
def __init__(
self,
*,
api_key: str | None = None,
base_url: str | None = None,
default_headers: dict[str, str] | None = None,
acp_command: str | None = None,
acp_args: list[str] | None = None,
acp_cwd: str | None = None,
command: str | None = None,
args: list[str] | None = None,
**_: Any,
):
self.api_key = api_key or "copilot-acp"
self.base_url = base_url or ACP_MARKER_BASE_URL
self._default_headers = dict(default_headers or {})
self._acp_command = acp_command or command or _resolve_command()
self._acp_args = list(acp_args or args or _resolve_args())
self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
self.chat = _ACPChatNamespace(self)
self.is_closed = False
self._active_process: subprocess.Popen[str] | None = None
self._active_process_lock = threading.Lock()
def close(self) -> None:
proc: subprocess.Popen[str] | None
with self._active_process_lock:
proc = self._active_process
self._active_process = None
self.is_closed = True
if proc is None:
return
try:
proc.terminate()
proc.wait(timeout=2)
except Exception:
try:
proc.kill()
except Exception:
pass
def _create_chat_completion(
self,
*,
model: str | None = None,
messages: list[dict[str, Any]] | None = None,
timeout: float | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
**_: Any,
) -> Any:
prompt_text = _format_messages_as_prompt(
messages or [],
model=model,
tools=tools,
tool_choice=tool_choice,
)
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
# (used natively by the OpenAI SDK) rather than a plain float.
if timeout is None:
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
elif isinstance(timeout, (int, float)):
_effective_timeout = float(timeout)
else:
# httpx.Timeout or similar — pick the largest component so the
# subprocess has enough wall-clock time for the full response.
_candidates = [
getattr(timeout, attr, None)
for attr in ("read", "write", "connect", "pool", "timeout")
]
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
response_text, reasoning_text = self._run_prompt(
prompt_text,
timeout_seconds=_effective_timeout,
)
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
usage = SimpleNamespace(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
assistant_message = SimpleNamespace(
content=cleaned_text,
tool_calls=tool_calls,
reasoning=reasoning_text or None,
reasoning_content=reasoning_text or None,
reasoning_details=None,
)
finish_reason = "tool_calls" if tool_calls else "stop"
choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
return SimpleNamespace(
choices=[choice],
usage=usage,
model=model or "copilot-acp",
)
def _run_prompt(
self, prompt_text: str, *, timeout_seconds: float
) -> tuple[str, str]:
try:
proc = subprocess.Popen(
[self._acp_command] + self._acp_args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1,
cwd=self._acp_cwd,
)
except FileNotFoundError as exc:
raise RuntimeError(
f"Could not start Copilot ACP command '{self._acp_command}'. "
"Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
) from exc
if proc.stdin is None or proc.stdout is None:
proc.kill()
raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
self.is_closed = False
with self._active_process_lock:
self._active_process = proc
inbox: queue.Queue[dict[str, Any]] = queue.Queue()
stderr_tail: deque[str] = deque(maxlen=40)
def _stdout_reader() -> None:
if proc.stdout is None:
return
for line in proc.stdout:
try:
inbox.put(json.loads(line))
except Exception:
inbox.put({"raw": line.rstrip("\n")})
def _stderr_reader() -> None:
if proc.stderr is None:
return
for line in proc.stderr:
stderr_tail.append(line.rstrip("\n"))
out_thread = threading.Thread(target=_stdout_reader, daemon=True)
err_thread = threading.Thread(target=_stderr_reader, daemon=True)
out_thread.start()
err_thread.start()
next_id = 0
def _request(
method: str,
params: dict[str, Any],
*,
text_parts: list[str] | None = None,
reasoning_parts: list[str] | None = None,
) -> Any:
nonlocal next_id
next_id += 1
request_id = next_id
payload = {
"jsonrpc": "2.0",
"id": request_id,
"method": method,
"params": params,
}
assert proc.stdin is not None # always set: Popen(stdin=PIPE)
proc.stdin.write(json.dumps(payload) + "\n")
proc.stdin.flush()
deadline = time.time() + timeout_seconds
while time.time() < deadline:
if proc.poll() is not None:
break
try:
msg = inbox.get(timeout=0.1)
except queue.Empty:
continue
if self._handle_server_message(
msg,
process=proc,
cwd=self._acp_cwd,
text_parts=text_parts,
reasoning_parts=reasoning_parts,
):
continue
if msg.get("id") != request_id:
continue
if "error" in msg:
err = msg.get("error") or {}
raise RuntimeError(
f"Copilot ACP {method} failed: {err.get('message') or err}"
)
return msg.get("result")
stderr_text = "\n".join(stderr_tail).strip()
if proc.poll() is not None and stderr_text:
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
raise TimeoutError(
f"Timed out waiting for Copilot ACP response to {method}."
)
try:
_request(
"initialize",
{
"protocolVersion": 1,
"clientCapabilities": {
"fs": {
"readTextFile": True,
"writeTextFile": True,
}
},
"clientInfo": {
"name": "hermes-agent",
"title": "Hermes Agent",
"version": "0.0.0",
},
},
)
session = (
_request(
"session/new",
{
"cwd": self._acp_cwd,
"mcpServers": [],
},
)
or {}
)
session_id = str(session.get("sessionId") or "").strip()
if not session_id:
raise RuntimeError("Copilot ACP did not return a sessionId.")
text_parts: list[str] = []
reasoning_parts: list[str] = []
_request(
"session/prompt",
{
"sessionId": session_id,
"prompt": [
{
"type": "text",
"text": prompt_text,
}
],
},
text_parts=text_parts,
reasoning_parts=reasoning_parts,
)
return "".join(text_parts), "".join(reasoning_parts)
finally:
self.close()
def _handle_server_message(
self,
msg: dict[str, Any],
*,
process: subprocess.Popen[str],
cwd: str,
text_parts: list[str] | None,
reasoning_parts: list[str] | None,
) -> bool:
method = msg.get("method")
if not isinstance(method, str):
return False
if method == "session/update":
params = msg.get("params") or {}
update = params.get("update") or {}
kind = str(update.get("sessionUpdate") or "").strip()
content = update.get("content") or {}
chunk_text = ""
if isinstance(content, dict):
chunk_text = str(content.get("text") or "")
if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
text_parts.append(chunk_text)
elif (
kind == "agent_thought_chunk"
and chunk_text
and reasoning_parts is not None
):
reasoning_parts.append(chunk_text)
return True
if process.stdin is None:
return True
message_id = msg.get("id")
params = msg.get("params") or {}
if method == "session/request_permission":
response = _permission_denied(message_id)
elif method == "fs/read_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
block_error = get_read_block_error(str(path))
if block_error:
raise PermissionError(block_error)
content = path.read_text() if path.exists() else ""
line = params.get("line")
limit = params.get("limit")
if isinstance(line, int) and line > 1:
lines = content.splitlines(keepends=True)
start = line - 1
end = (
start + limit if isinstance(limit, int) and limit > 0 else None
)
content = "".join(lines[start:end])
if content:
content = redact_sensitive_text(content)
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"content": content,
},
}
except Exception as exc:
response = _jsonrpc_error(message_id, -32602, str(exc))
elif method == "fs/write_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
if is_write_denied(str(path)):
raise PermissionError(
f"Write denied: '{path}' is a protected system/credential file."
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(params.get("content") or ""))
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": None,
}
except Exception as exc:
response = _jsonrpc_error(message_id, -32602, str(exc))
else:
response = _jsonrpc_error(
message_id,
-32601,
f"ACP client method '{method}' is not supported by Hermes yet.",
)
process.stdin.write(json.dumps(response) + "\n")
process.stdin.flush()
return True
-41
View File
@@ -20,46 +20,6 @@ from pathlib import Path
from hermes_constants import get_hermes_home
# Methods clients send as periodic liveness probes. They are not part of the
# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the
# caller — but the supervisor task that dispatches the request then surfaces
# the raised RequestError via ``logging.exception("Background task failed")``,
# which dumps a traceback to stderr every probe interval. Clients like
# acp-bridge already treat the -32601 response as "agent alive", so the
# traceback is pure noise. We keep the protocol response intact and only
# silence the stderr noise for this specific benign case.
_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"})
class _BenignProbeMethodFilter(logging.Filter):
"""Suppress acp 'Background task failed' tracebacks caused by unknown
liveness-probe methods (e.g. ``ping``) while leaving every other
background-task error — including method_not_found for any non-probe
method — visible in stderr.
"""
def filter(self, record: logging.LogRecord) -> bool:
if record.getMessage() != "Background task failed":
return True
exc_info = record.exc_info
if not exc_info:
return True
exc = exc_info[1]
# Imported lazily so this module stays importable when the optional
# ``agent-client-protocol`` dependency is not installed.
try:
from acp.exceptions import RequestError
except ImportError:
return True
if not isinstance(exc, RequestError):
return True
if getattr(exc, "code", None) != -32601:
return True
data = getattr(exc, "data", None)
method = data.get("method") if isinstance(data, dict) else None
return method not in _BENIGN_PROBE_METHODS
def _setup_logging() -> None:
"""Route all logging to stderr so stdout stays clean for ACP stdio."""
handler = logging.StreamHandler(sys.stderr)
@@ -69,7 +29,6 @@ def _setup_logging() -> None:
datefmt="%Y-%m-%d %H:%M:%S",
)
)
handler.addFilter(_BenignProbeMethodFilter())
root = logging.getLogger()
root.handlers.clear()
root.addHandler(handler)
-3
View File
@@ -63,9 +63,6 @@ def make_approval_callback(
logger.warning("Permission request timed out or failed: %s", exc)
return "deny"
if response is None:
return "deny"
outcome = response.outcome
if isinstance(outcome, AllowedOutcome):
option_id = outcome.option_id
+17 -83
View File
@@ -4,7 +4,6 @@ from __future__ import annotations
import asyncio
import logging
import os
from collections import defaultdict, deque
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Deque, Optional
@@ -52,7 +51,7 @@ try:
except ImportError:
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
from acp_adapter.auth import detect_provider
from acp_adapter.auth import detect_provider, has_provider
from acp_adapter.events import (
make_message_cb,
make_step_cb,
@@ -60,7 +59,7 @@ from acp_adapter.events import (
make_tool_progress_cb,
)
from acp_adapter.permissions import make_approval_callback
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
from acp_adapter.session import SessionManager, SessionState
logger = logging.getLogger(__name__)
@@ -72,11 +71,6 @@ except Exception:
# Thread pool for running AIAgent (synchronous) in parallel.
_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
# does not expose a client-side limit, so this is a fixed cap that clients
# paginate against using `cursor` / `next_cursor`.
_LIST_SESSIONS_PAGE_SIZE = 50
def _extract_text(
prompt: list[
@@ -287,11 +281,7 @@ class HermesACPAgent(acp.Agent):
try:
from model_tools import get_tool_definitions
enabled_toolsets = _expand_acp_enabled_toolsets(
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
mcp_server_names=[server.name for server in mcp_servers],
)
state.agent.enabled_toolsets = enabled_toolsets
enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
state.agent.tools = get_tool_definitions(
enabled_toolsets=enabled_toolsets,
@@ -361,18 +351,9 @@ class HermesACPAgent(acp.Agent):
)
async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
# Only accept authenticate() calls whose method_id matches the
# provider we advertised in initialize(). Without this check,
# authenticate() would acknowledge any method_id as long as the
# server has provider credentials configured — harmless under
# Hermes' threat model (ACP is stdio-only, local-trust), but poor
# API hygiene and confusing if ACP ever grows multi-method auth.
provider = detect_provider()
if not provider:
return None
if not isinstance(method_id, str) or method_id.strip().lower() != provider:
return None
return AuthenticateResponse()
if has_provider():
return AuthenticateResponse()
return None
# ---- Session management -------------------------------------------------
@@ -456,28 +437,7 @@ class HermesACPAgent(acp.Agent):
cwd: str | None = None,
**kwargs: Any,
) -> ListSessionsResponse:
"""List ACP sessions with optional ``cwd`` filtering and cursor pagination.
``cwd`` is passed through to ``SessionManager.list_sessions`` which already
normalizes and filters by working directory. ``cursor`` is a ``session_id``
previously returned as ``next_cursor``; results resume after that entry.
Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
results remain, ``next_cursor`` is set to the last returned ``session_id``.
"""
infos = self.session_manager.list_sessions(cwd=cwd)
if cursor:
for idx, s in enumerate(infos):
if s["session_id"] == cursor:
infos = infos[idx + 1:]
break
else:
# Unknown cursor -> empty page (do not fall back to full list).
infos = []
has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
sessions = []
for s in infos:
updated_at = s.get("updated_at")
@@ -491,9 +451,7 @@ class HermesACPAgent(acp.Agent):
updated_at=updated_at,
)
)
next_cursor = sessions[-1].session_id if has_more and sessions else None
return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
return ListSessionsResponse(sessions=sessions)
# ---- Prompt (core) ------------------------------------------------------
@@ -559,32 +517,15 @@ class HermesACPAgent(acp.Agent):
agent.step_callback = step_cb
agent.message_callback = message_cb
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
# Set it INSIDE _run_agent so the TLS write happens in the executor
# thread — setting it here would write to the event-loop thread's TLS,
# not the executor's. Also set HERMES_INTERACTIVE so approval.py
# takes the CLI-interactive path (which calls the registered
# callback via prompt_dangerous_approval) instead of the
# non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
# ACP's conn.request_permission maps cleanly to the interactive
# callback shape — not the gateway-queue HERMES_EXEC_ASK path,
# which requires a notify_cb registered in _gateway_notify_cbs.
previous_approval_cb = None
previous_interactive = None
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
def _run_agent() -> dict:
nonlocal previous_approval_cb, previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = _terminal_tool._get_approval_callback()
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
# Signal to tools.approval that we have an interactive callback
# and the non-interactive auto-approve path must not fire.
previous_interactive = os.environ.get("HERMES_INTERACTIVE")
os.environ["HERMES_INTERACTIVE"] = "1"
try:
result = agent.run_conversation(
user_message=user_text,
@@ -596,11 +537,6 @@ class HermesACPAgent(acp.Agent):
logger.exception("Agent error in session %s", session_id)
return {"final_response": f"Error: {e}", "messages": state.history}
finally:
# Restore HERMES_INTERACTIVE.
if previous_interactive is None:
os.environ.pop("HERMES_INTERACTIVE", None)
else:
os.environ["HERMES_INTERACTIVE"] = previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
@@ -677,8 +613,8 @@ class HermesACPAgent(acp.Agent):
await self._conn.session_update(
session_id=session_id,
update=AvailableCommandsUpdate(
session_update="available_commands_update",
available_commands=self._available_commands(),
sessionUpdate="available_commands_update",
availableCommands=self._available_commands(),
),
)
except Exception:
@@ -758,9 +694,7 @@ class HermesACPAgent(acp.Agent):
def _cmd_tools(self, args: str, state: SessionState) -> str:
try:
from model_tools import get_tool_definitions
toolsets = _expand_acp_enabled_toolsets(
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
)
toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
if not tools:
return "No tools available."
+1 -28
View File
@@ -106,24 +106,6 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
logger.debug("Failed to register ACP task cwd override", exc_info=True)
def _expand_acp_enabled_toolsets(
toolsets: List[str] | None = None,
mcp_server_names: List[str] | None = None,
) -> List[str]:
"""Return ACP toolsets plus explicit MCP server toolsets for this session."""
expanded: List[str] = []
for name in list(toolsets or ["hermes-acp"]):
if name and name not in expanded:
expanded.append(name)
for server_name in list(mcp_server_names or []):
toolset_name = f"mcp-{server_name}"
if server_name and toolset_name not in expanded:
expanded.append(toolset_name)
return expanded
def _clear_task_cwd(task_id: str) -> None:
"""Remove task-specific cwd overrides for an ACP session."""
if not task_id:
@@ -555,18 +537,9 @@ class SessionManager:
elif isinstance(model_cfg, str) and model_cfg.strip():
default_model = model_cfg.strip()
configured_mcp_servers = [
name
for name, cfg in (config.get("mcp_servers") or {}).items()
if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
]
kwargs = {
"platform": "acp",
"enabled_toolsets": _expand_acp_enabled_toolsets(
["hermes-acp"],
mcp_server_names=configured_mcp_servers,
),
"enabled_toolsets": ["hermes-acp"],
"quiet_mode": True,
"session_id": session_id,
"model": model or default_model,
-326
View File
@@ -1,326 +0,0 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Optional
import httpx
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
from hermes_cli.runtime_provider import resolve_runtime_provider
def _utc_now() -> datetime:
return datetime.now(timezone.utc)
@dataclass(frozen=True)
class AccountUsageWindow:
label: str
used_percent: Optional[float] = None
reset_at: Optional[datetime] = None
detail: Optional[str] = None
@dataclass(frozen=True)
class AccountUsageSnapshot:
provider: str
source: str
fetched_at: datetime
title: str = "Account limits"
plan: Optional[str] = None
windows: tuple[AccountUsageWindow, ...] = ()
details: tuple[str, ...] = ()
unavailable_reason: Optional[str] = None
@property
def available(self) -> bool:
return bool(self.windows or self.details) and not self.unavailable_reason
def _title_case_slug(value: Optional[str]) -> Optional[str]:
cleaned = str(value or "").strip()
if not cleaned:
return None
return cleaned.replace("_", " ").replace("-", " ").title()
def _parse_dt(value: Any) -> Optional[datetime]:
if value in (None, ""):
return None
if isinstance(value, (int, float)):
return datetime.fromtimestamp(float(value), tz=timezone.utc)
if isinstance(value, str):
text = value.strip()
if not text:
return None
if text.endswith("Z"):
text = text[:-1] + "+00:00"
try:
dt = datetime.fromisoformat(text)
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
except ValueError:
return None
return None
def _format_reset(dt: Optional[datetime]) -> str:
if not dt:
return "unknown"
local_dt = dt.astimezone()
delta = dt - _utc_now()
total_seconds = int(delta.total_seconds())
if total_seconds <= 0:
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
hours, rem = divmod(total_seconds, 3600)
minutes = rem // 60
if hours >= 24:
days, hours = divmod(hours, 24)
rel = f"in {days}d {hours}h"
elif hours > 0:
rel = f"in {hours}h {minutes}m"
else:
rel = f"in {minutes}m"
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
if not snapshot:
return []
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
lines = [header]
if snapshot.plan:
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
else:
lines.append(f"Provider: {snapshot.provider}")
for window in snapshot.windows:
if window.used_percent is None:
base = f"{window.label}: unavailable"
else:
remaining = max(0, round(100 - float(window.used_percent)))
used = max(0, round(float(window.used_percent)))
base = f"{window.label}: {remaining}% remaining ({used}% used)"
if window.reset_at:
base += f" • resets {_format_reset(window.reset_at)}"
elif window.detail:
base += f"{window.detail}"
lines.append(base)
for detail in snapshot.details:
lines.append(detail)
if snapshot.unavailable_reason:
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
return lines
def _resolve_codex_usage_url(base_url: str) -> str:
normalized = (base_url or "").strip().rstrip("/")
if not normalized:
normalized = "https://chatgpt.com/backend-api/codex"
if normalized.endswith("/codex"):
normalized = normalized[: -len("/codex")]
if "/backend-api" in normalized:
return normalized + "/wham/usage"
return normalized + "/api/codex/usage"
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
token_data = _read_codex_tokens()
tokens = token_data.get("tokens") or {}
account_id = str(tokens.get("account_id", "") or "").strip() or None
headers = {
"Authorization": f"Bearer {creds['api_key']}",
"Accept": "application/json",
"User-Agent": "codex-cli",
}
if account_id:
headers["ChatGPT-Account-Id"] = account_id
with httpx.Client(timeout=15.0) as client:
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
response.raise_for_status()
payload = response.json() or {}
rate_limit = payload.get("rate_limit") or {}
windows: list[AccountUsageWindow] = []
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
window = rate_limit.get(key) or {}
used = window.get("used_percent")
if used is None:
continue
windows.append(
AccountUsageWindow(
label=label,
used_percent=float(used),
reset_at=_parse_dt(window.get("reset_at")),
)
)
details: list[str] = []
credits = payload.get("credits") or {}
if credits.get("has_credits"):
balance = credits.get("balance")
if isinstance(balance, (int, float)):
details.append(f"Credits balance: ${float(balance):.2f}")
elif credits.get("unlimited"):
details.append("Credits balance: unlimited")
return AccountUsageSnapshot(
provider="openai-codex",
source="usage_api",
fetched_at=_utc_now(),
plan=_title_case_slug(payload.get("plan_type")),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
token = (resolve_anthropic_token() or "").strip()
if not token:
return None
if not _is_oauth_token(token):
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
)
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
"Content-Type": "application/json",
"anthropic-beta": "oauth-2025-04-20",
"User-Agent": "claude-code/2.1.0",
}
with httpx.Client(timeout=15.0) as client:
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
response.raise_for_status()
payload = response.json() or {}
windows: list[AccountUsageWindow] = []
mapping = (
("five_hour", "Current session"),
("seven_day", "Current week"),
("seven_day_opus", "Opus week"),
("seven_day_sonnet", "Sonnet week"),
)
for key, label in mapping:
window = payload.get(key) or {}
util = window.get("utilization")
if util is None:
continue
used = float(util) * 100 if float(util) <= 1 else float(util)
windows.append(
AccountUsageWindow(
label=label,
used_percent=used,
reset_at=_parse_dt(window.get("resets_at")),
)
)
details: list[str] = []
extra = payload.get("extra_usage") or {}
if extra.get("is_enabled"):
used_credits = extra.get("used_credits")
monthly_limit = extra.get("monthly_limit")
currency = extra.get("currency") or "USD"
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
details.append(
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
)
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
runtime = resolve_runtime_provider(
requested="openrouter",
explicit_base_url=base_url,
explicit_api_key=api_key,
)
token = str(runtime.get("api_key", "") or "").strip()
if not token:
return None
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
credits_url = f"{normalized}/credits"
key_url = f"{normalized}/key"
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
}
with httpx.Client(timeout=10.0) as client:
credits_resp = client.get(credits_url, headers=headers)
credits_resp.raise_for_status()
credits = (credits_resp.json() or {}).get("data") or {}
try:
key_resp = client.get(key_url, headers=headers)
key_resp.raise_for_status()
key_data = (key_resp.json() or {}).get("data") or {}
except Exception:
key_data = {}
total_credits = float(credits.get("total_credits") or 0.0)
total_usage = float(credits.get("total_usage") or 0.0)
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
windows: list[AccountUsageWindow] = []
limit = key_data.get("limit")
limit_remaining = key_data.get("limit_remaining")
limit_reset = str(key_data.get("limit_reset") or "").strip()
usage = key_data.get("usage")
if (
isinstance(limit, (int, float))
and float(limit) > 0
and isinstance(limit_remaining, (int, float))
and 0 <= float(limit_remaining) <= float(limit)
):
limit_value = float(limit)
remaining_value = float(limit_remaining)
used_percent = ((limit_value - remaining_value) / limit_value) * 100
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
if limit_reset:
detail_parts.append(f"resets {limit_reset}")
windows.append(
AccountUsageWindow(
label="API key quota",
used_percent=used_percent,
detail="".join(detail_parts),
)
)
if isinstance(usage, (int, float)):
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
for value, label in (
(key_data.get("usage_daily"), "today"),
(key_data.get("usage_weekly"), "this week"),
(key_data.get("usage_monthly"), "this month"),
):
if isinstance(value, (int, float)) and float(value) > 0:
usage_parts.append(f"${float(value):.2f} {label}")
details.append("".join(usage_parts))
return AccountUsageSnapshot(
provider="openrouter",
source="credits_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def fetch_account_usage(
provider: Optional[str],
*,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Optional[AccountUsageSnapshot]:
normalized = str(provider or "").strip().lower()
if normalized in {"", "auto", "custom"}:
return None
try:
if normalized == "openai-codex":
return _fetch_codex_account_usage()
if normalized == "anthropic":
return _fetch_anthropic_account_usage()
if normalized == "openrouter":
return _fetch_openrouter_account_usage(base_url, api_key)
except Exception:
return None
return None
+82 -277
View File
@@ -14,13 +14,11 @@ import copy
import json
import logging
import os
import platform
import subprocess
from pathlib import Path
from hermes_constants import get_hermes_home
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from utils import normalize_proxy_env_vars
try:
import anthropic as _anthropic_sdk
@@ -118,63 +116,6 @@ def _get_anthropic_max_output(model: str) -> int:
return best_val
def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
"""Return ``value`` floored to a positive int, or ``None`` if it is not a
finite positive number. Ported from openclaw/openclaw#66664.
Anthropic's Messages API rejects ``max_tokens`` values that are 0,
negative, non-integer, or non-finite with HTTP 400. Python's ``or``
idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
negative ints and fractional floats (``-1``, ``0.5``) through to the
API, producing a user-visible failure instead of a local error.
"""
# Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
# silently become 1 and ``False`` doesn't become 0.
if isinstance(value, bool):
return None
if not isinstance(value, (int, float)):
return None
try:
import math
if not math.isfinite(value):
return None
except Exception:
return None
floored = int(value) # truncates toward zero for floats
return floored if floored > 0 else None
def _resolve_anthropic_messages_max_tokens(
requested,
model: str,
context_length: Optional[int] = None,
) -> int:
"""Resolve the ``max_tokens`` budget for an Anthropic Messages call.
Prefers ``requested`` when it is a positive finite number; otherwise
falls back to the model's output ceiling. Raises ``ValueError`` if no
positive budget can be resolved (should not happen with current model
table defaults, but guards against a future regression where
``_get_anthropic_max_output`` could return ``0``).
Separately, callers apply a context-window clamp — this resolver does
not, to keep the positive-value contract independent of endpoint
specifics.
Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
"""
resolved = _resolve_positive_anthropic_max_tokens(requested)
if resolved is not None:
return resolved
fallback = _get_anthropic_max_output(model)
if fallback > 0:
return fallback
raise ValueError(
f"Anthropic Messages adapter requires a positive max_tokens value for "
f"model {model!r}; got {requested!r} and no model default resolved."
)
def _supports_adaptive_thinking(model: str) -> bool:
"""Return True for Claude 4.6+ models that support adaptive thinking."""
return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -279,9 +220,8 @@ def _is_oauth_token(key: str) -> bool:
Positively identifies Anthropic OAuth tokens by their key format:
- ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
- ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
- ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
and correctly return False.
"""
if not key:
@@ -295,9 +235,6 @@ def _is_oauth_token(key: str) -> bool:
# JWTs from Anthropic OAuth flow
if key.startswith("eyJ"):
return True
# Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
if key.startswith("cc-"):
return True
return False
@@ -328,14 +265,6 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
return True # Any other endpoint is a third-party proxy
def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
"""Return True for Kimi's /coding endpoint that requires claude-code UA."""
normalized = _normalize_base_url_text(base_url)
if not normalized:
return False
return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
def _requires_bearer_auth(base_url: str | None) -> bool:
"""Return True for Anthropic-compatible providers that require Bearer auth.
@@ -363,15 +292,9 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
return _COMMON_BETAS
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
def build_anthropic_client(api_key: str, base_url: str = None):
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
If *timeout* is provided it overrides the default 900s read timeout. The
connect timeout stays at 10s. Callers pass this from the per-provider /
per-model ``request_timeout_seconds`` config so Anthropic-native and
Anthropic-compatible providers respect the same knob as OpenAI-wire
providers.
Returns an anthropic.Anthropic instance.
"""
if _anthropic_sdk is None:
@@ -379,41 +302,19 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
"The 'anthropic' package is required for the Anthropic provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
normalize_proxy_env_vars()
from httpx import Timeout
normalized_base_url = _normalize_base_url_text(base_url)
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
kwargs = {
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
"timeout": Timeout(timeout=900.0, connect=10.0),
}
if normalized_base_url:
# Azure Anthropic endpoints require an ``api-version`` query parameter.
# Pass it via default_query so the SDK appends it to every request URL
# without corrupting the base_url (appending it directly produces
# malformed paths like /anthropic?api-version=.../v1/messages).
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
if _is_azure_endpoint and "api-version" not in normalized_base_url:
kwargs["base_url"] = normalized_base_url.rstrip("/")
kwargs["default_query"] = {"api-version": "2025-04-15"}
else:
kwargs["base_url"] = normalized_base_url
kwargs["base_url"] = normalized_base_url
common_betas = _common_betas_for_base_url(normalized_base_url)
if _is_kimi_coding_endpoint(base_url):
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
# to be recognized as a valid Coding Agent. Without it, returns 403.
# Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
kwargs["api_key"] = api_key
kwargs["default_headers"] = {
"User-Agent": "claude-code/0.1.0",
**( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
}
elif _requires_bearer_auth(normalized_base_url):
if _requires_bearer_auth(normalized_base_url):
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
# Authorization: Bearer *** for regular API keys. Route those endpoints
# Authorization: Bearer even for regular API keys. Route those endpoints
# through auth_token so the SDK sends Bearer auth instead of x-api-key.
# Check this before OAuth token shape detection because MiniMax secrets do
# not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -476,72 +377,8 @@ def build_anthropic_bedrock_client(region: str):
)
def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
"""Read Claude Code OAuth credentials from the macOS Keychain.
Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
service name "Claude Code-credentials" rather than (or in addition to)
the JSON file at ~/.claude/.credentials.json.
The password field contains a JSON string with the same claudeAiOauth
structure as the JSON file.
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
"""
import platform
import subprocess
if platform.system() != "Darwin":
return None
try:
# Read the "Claude Code-credentials" generic password entry
result = subprocess.run(
["security", "find-generic-password",
"-s", "Claude Code-credentials",
"-w"],
capture_output=True,
text=True,
timeout=5,
)
except (OSError, subprocess.TimeoutExpired):
logger.debug("Keychain: security command not available or timed out")
return None
if result.returncode != 0:
logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
return None
raw = result.stdout.strip()
if not raw:
return None
try:
data = json.loads(raw)
except json.JSONDecodeError:
logger.debug("Keychain: credentials payload is not valid JSON")
return None
oauth_data = data.get("claudeAiOauth")
if oauth_data and isinstance(oauth_data, dict):
access_token = oauth_data.get("accessToken", "")
if access_token:
return {
"accessToken": access_token,
"refreshToken": oauth_data.get("refreshToken", ""),
"expiresAt": oauth_data.get("expiresAt", 0),
"source": "macos_keychain",
}
return None
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
"""Read refreshable Claude Code OAuth credentials.
Checks two sources in order:
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
2. ~/.claude/.credentials.json file
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
subscription flow is OAuth/setup-token based with refreshable credentials,
@@ -550,12 +387,6 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
"""
# Try macOS Keychain first (covers Claude Code >=2.1.114)
kc_creds = _read_claude_code_credentials_from_keychain()
if kc_creds:
return kc_creds
# Fall back to JSON file
cred_path = Path.home() / ".claude" / ".credentials.json"
if cred_path.exists():
try:
@@ -726,9 +557,7 @@ def _write_claude_code_credentials(
existing["claudeAiOauth"] = oauth_data
cred_path.parent.mkdir(parents=True, exist_ok=True)
_tmp_cred = cred_path.with_suffix(".tmp")
_tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
_tmp_cred.replace(cred_path)
cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
# Restrict permissions (credentials file)
cred_path.chmod(0o600)
except (OSError, IOError) as e:
@@ -995,26 +824,6 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
# ---------------------------------------------------------------------------
def _is_bedrock_model_id(model: str) -> bool:
"""Detect AWS Bedrock model IDs that use dots as namespace separators.
Bedrock model IDs come in two forms:
- Bare: ``anthropic.claude-opus-4-7``
- Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
In both cases the dots separate namespace components, not version
numbers, and must be preserved verbatim for the Bedrock API.
"""
lower = model.lower()
# Regional inference-profile prefixes
if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
return True
# Bare Bedrock model IDs: provider.model-family
if lower.startswith("anthropic."):
return True
return False
def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
"""Normalize a model name for the Anthropic API.
@@ -1022,19 +831,11 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
- Converts dots to hyphens in version numbers (OpenRouter uses dots,
Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
- Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
regional inference profiles (``us.anthropic.claude-*``) whose dots
are namespace separators, not version separators.
"""
lower = model.lower()
if lower.startswith("anthropic/"):
model = model[len("anthropic/"):]
if not preserve_dots:
# Bedrock model IDs use dots as namespace separators
# (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
# These must not be converted to hyphens. See issue #12295.
if _is_bedrock_model_id(model):
return model
# OpenRouter uses dots for version separators (claude-opus-4.6),
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
model = model.replace(".", "-")
@@ -1254,31 +1055,6 @@ def convert_messages_to_anthropic(
"name": fn.get("name", ""),
"input": parsed_args,
})
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
# tool-call messages to carry reasoning_content when thinking is
# enabled server-side. Preserve it as a thinking block so Kimi
# can validate the message history. See hermes-agent#13848.
#
# Accept empty string "" — _copy_reasoning_content_for_api()
# injects "" as a tier-3 fallback for Kimi tool-call messages
# that had no reasoning. Kimi requires the field to exist, even
# if empty.
#
# Prepend (not append): Anthropic protocol requires thinking
# blocks before text and tool_use blocks.
#
# Guard: only add when reasoning_details didn't already contribute
# thinking blocks. On native Anthropic, reasoning_details produces
# signed thinking blocks — adding another unsigned one from
# reasoning_content would create a duplicate (same text) that gets
# downgraded to a spurious text block on the last assistant message.
reasoning_content = m.get("reasoning_content")
_already_has_thinking = any(
isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
for b in blocks
)
if isinstance(reasoning_content, str) and not _already_has_thinking:
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
# Anthropic rejects empty assistant content
effective = blocks or content
if not effective or effective == "":
@@ -1434,7 +1210,6 @@ def convert_messages_to_anthropic(
# cache markers can interfere with signature validation.
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
_is_kimi = _is_kimi_coding_endpoint(base_url)
last_assistant_idx = None
for i in range(len(result) - 1, -1, -1):
@@ -1446,25 +1221,7 @@ def convert_messages_to_anthropic(
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
continue
if _is_kimi:
# Kimi's /coding endpoint enables thinking server-side and
# requires unsigned thinking blocks on replayed assistant
# tool-call messages. Strip signed Anthropic blocks (Kimi
# can't validate signatures) but preserve the unsigned ones
# we synthesised from reasoning_content above.
new_content = []
for b in m["content"]:
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
new_content.append(b)
continue
if b.get("signature") or b.get("data"):
# Anthropic-signed block — Kimi can't validate, strip
continue
# Unsigned thinking (synthesised from reasoning_content) —
# keep it: Kimi needs it for message-history validation.
new_content.append(b)
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
elif _is_third_party or idx != last_assistant_idx:
if _is_third_party or idx != last_assistant_idx:
# Third-party endpoint: strip ALL thinking blocks from every
# assistant message — signatures are Anthropic-proprietary.
# Direct Anthropic: strip from non-latest assistant messages only.
@@ -1562,12 +1319,7 @@ def build_anthropic_kwargs(
model = normalize_model_name(model, preserve_dots=preserve_dots)
# effective_max_tokens = output cap for this call (≠ total context window)
# Use the resolver helper so non-positive values (negative ints,
# fractional floats, NaN, non-numeric) fail locally with a clear error
# rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
effective_max_tokens = _resolve_anthropic_messages_max_tokens(
max_tokens, model, context_length=context_length
)
effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
# Clamp output cap to fit inside the total context window.
# Only matters for small custom endpoints where context_length < native
@@ -1646,25 +1398,11 @@ def build_anthropic_kwargs(
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
#
# Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
# its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
# validates the message history and requires every prior assistant
# tool-call message to carry OpenAI-style ``reasoning_content``. The
# Anthropic path never populates that field, and
# ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
# on third-party endpoints — so the request fails with HTTP 400
# "thinking is enabled but reasoning_content is missing in assistant
# tool call message at index N". Kimi's reasoning is driven server-side
# on the /coding route, so skip Anthropic's thinking parameter entirely
# for that host. (Kimi on chat_completions enables thinking via
# extra_body in the ChatCompletionsTransport — see #13503.)
#
# On 4.7+ the `thinking.display` field defaults to "omitted", which
# silently hides reasoning text that Hermes surfaces in its CLI. We
# request "summarized" so the reasoning blocks stay populated — matching
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
effort = str(reasoning_config.get("effort", "medium")).lower()
budget = THINKING_BUDGET.get(effort, 8000)
@@ -1689,9 +1427,9 @@ def build_anthropic_kwargs(
# ── Strip sampling params on 4.7+ ─────────────────────────────────
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
# Callers (auxiliary_client, etc.) may set these for older models;
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
# don't require coordinated edits everywhere.
# Callers (auxiliary_client, flush_memories, etc.) may set these for
# older models; drop them here as a safety net so upstream 4.6 → 4.7
# migrations don't require coordinated edits everywhere.
if _forbids_sampling_params(model):
for _sampling_key in ("temperature", "top_p", "top_k"):
kwargs.pop(_sampling_key, None)
@@ -1713,3 +1451,70 @@ def build_anthropic_kwargs(
return kwargs
def normalize_anthropic_response(
response,
strip_tool_prefix: bool = False,
) -> Tuple[SimpleNamespace, str]:
"""Normalize Anthropic response to match the shape expected by AIAgent.
Returns (assistant_message, finish_reason) where assistant_message has
.content, .tool_calls, and .reasoning attributes.
When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
added to tool names for OAuth Claude Code compatibility.
"""
text_parts = []
reasoning_parts = []
reasoning_details = []
tool_calls = []
for block in response.content:
if block.type == "text":
text_parts.append(block.text)
elif block.type == "thinking":
reasoning_parts.append(block.thinking)
block_dict = _to_plain_data(block)
if isinstance(block_dict, dict):
reasoning_details.append(block_dict)
elif block.type == "tool_use":
name = block.name
if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
name = name[len(_MCP_TOOL_PREFIX):]
tool_calls.append(
SimpleNamespace(
id=block.id,
type="function",
function=SimpleNamespace(
name=name,
arguments=json.dumps(block.input),
),
)
)
# Map Anthropic stop_reason to OpenAI finish_reason.
# Newer stop reasons added in Claude 4.5+ / 4.7:
# - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
# - model_context_window_exceeded: hit context limit (not max_tokens)
# Both need distinct handling upstream — a refusal should surface to the
# user with a clear message, and a context-window overflow should trigger
# compression/truncation rather than be treated as normal end-of-turn.
stop_reason_map = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
return (
SimpleNamespace(
content="\n".join(text_parts) if text_parts else None,
tool_calls=tool_calls or None,
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
reasoning_content=None,
reasoning_details=reasoning_details or None,
),
finish_reason,
)
+194 -936
View File
File diff suppressed because it is too large Load Diff
+2 -130
View File
@@ -87,114 +87,6 @@ def reset_client_cache():
_bedrock_control_client_cache.clear()
def invalidate_runtime_client(region: str) -> bool:
"""Evict the cached ``bedrock-runtime`` client for a single region.
Per-region counterpart to :func:`reset_client_cache`. Used by the converse
call wrappers to discard clients whose underlying HTTP connection has
gone stale, so the next call allocates a fresh client (with a fresh
connection pool) instead of reusing a dead socket.
Returns True if a cached entry was evicted, False if the region was not
cached.
"""
existed = region in _bedrock_runtime_client_cache
_bedrock_runtime_client_cache.pop(region, None)
return existed
# ---------------------------------------------------------------------------
# Stale-connection detection
# ---------------------------------------------------------------------------
#
# boto3 caches its HTTPS connection pool inside the client object. When a
# pooled connection is killed out from under us (NAT timeout, VPN flap,
# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
# one of a handful of low-level exceptions — most commonly
# ``botocore.exceptions.ConnectionClosedError`` or
# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
# ``assert`` in a couple of paths (connection pool state checks, chunked
# response readers) which bubbles up as a bare ``AssertionError`` with an
# empty ``str(exc)``.
#
# In all of these cases the client is the problem, not the request: retrying
# with the same cached client reproduces the failure until the process
# restarts. The fix is to evict the region's cached client so the next
# attempt builds a new one.
_STALE_LIB_MODULE_PREFIXES = (
"urllib3.",
"botocore.",
"boto3.",
)
def _traceback_frames_modules(exc: BaseException):
"""Yield ``__name__``-style module strings for each frame in exc's traceback."""
tb = getattr(exc, "__traceback__", None)
while tb is not None:
frame = tb.tb_frame
module = frame.f_globals.get("__name__", "")
yield module or ""
tb = tb.tb_next
def is_stale_connection_error(exc: BaseException) -> bool:
"""Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
Matches:
* ``botocore.exceptions.ConnectionError`` and subclasses
(``ConnectionClosedError``, ``EndpointConnectionError``,
``ReadTimeoutError``, ``ConnectTimeoutError``).
* ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
``ConnectionError`` (best-effort import — urllib3 is a transitive
dependency of botocore so it is always available in practice).
* Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
or boto3. These are internal-invariant failures (typically triggered
by corrupted connection-pool state after a dropped socket) and are
recoverable by swapping the client.
Non-library ``AssertionError``s (from application code or tests) are
intentionally not matched — only library-internal asserts signal stale
connection state.
"""
# botocore: the canonical signal — HTTPClientError is the umbrella for
# ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
# ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
# the same family via a different branch of the hierarchy.
try:
from botocore.exceptions import (
ConnectionError as BotoConnectionError,
HTTPClientError,
)
botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
except ImportError: # pragma: no cover — botocore always present with boto3
botocore_errors = ()
if botocore_errors and isinstance(exc, botocore_errors):
return True
# urllib3: low-level transport failures
try:
from urllib3.exceptions import (
ProtocolError,
NewConnectionError,
ConnectionError as Urllib3ConnectionError,
)
urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
except ImportError: # pragma: no cover
urllib3_errors = ()
if urllib3_errors and isinstance(exc, urllib3_errors):
return True
# Library-internal AssertionError (urllib3 / botocore / boto3)
if isinstance(exc, AssertionError):
for module in _traceback_frames_modules(exc):
if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
return True
return False
# ---------------------------------------------------------------------------
# AWS credential detection
# ---------------------------------------------------------------------------
@@ -895,17 +787,7 @@ def call_converse(
guardrail_config=guardrail_config,
)
try:
response = client.converse(**kwargs)
except Exception as exc:
if is_stale_connection_error(exc):
logger.warning(
"bedrock: stale-connection error on converse(region=%s, model=%s): "
"%s — evicting cached client so the next call reconnects.",
region, model, type(exc).__name__,
)
invalidate_runtime_client(region)
raise
response = client.converse(**kwargs)
return normalize_converse_response(response)
@@ -937,17 +819,7 @@ def call_converse_stream(
guardrail_config=guardrail_config,
)
try:
response = client.converse_stream(**kwargs)
except Exception as exc:
if is_stale_connection_error(exc):
logger.warning(
"bedrock: stale-connection error on converse_stream(region=%s, "
"model=%s): %s — evicting cached client so the next call reconnects.",
region, model, type(exc).__name__,
)
invalidate_runtime_client(region)
raise
response = client.converse_stream(**kwargs)
return normalize_converse_stream_events(response)
-999
View File
@@ -1,999 +0,0 @@
"""Codex Responses API adapter.
Pure format-conversion and normalization logic for the OpenAI Responses API
(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints).
Extracted from run_agent.py to isolate Responses API-specific logic from the
core agent loop. All functions are stateless — they operate on the data passed
in and return transformed results.
"""
from __future__ import annotations
import hashlib
import json
import logging
import re
import uuid
from types import SimpleNamespace
from typing import Any, Dict, List, Optional
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
logger = logging.getLogger(__name__)
# Matches Codex/Harmony tool-call serialization that occasionally leaks into
# assistant-message content when the model fails to emit a structured
# ``function_call`` item. Accepts the common forms:
#
# to=functions.exec_command
# assistant to=functions.exec_command
# <|channel|>commentary to=functions.exec_command
#
# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
# Harmony channel prefix varies by degeneration mode. Case-insensitive to
# cover lowercase/uppercase ``assistant`` variants.
_TOOL_CALL_LEAK_PATTERN = re.compile(
r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
re.IGNORECASE,
)
# ---------------------------------------------------------------------------
# Multimodal content helpers
# ---------------------------------------------------------------------------
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
"""Convert chat-style multimodal content to Responses API input parts.
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
The ``role`` parameter controls the text content type:
- ``"user"`` (default) → ``"input_text"``
- ``"assistant"`` → ``"output_text"``
The Responses API rejects ``input_text`` inside assistant messages and
``output_text`` inside user messages, so callers MUST pass the correct
role for the message being converted.
Returns an empty list when ``content`` is not a list or contains no
recognized parts — callers fall back to the string path.
"""
text_type = "output_text" if role == "assistant" else "input_text"
if not isinstance(content, list):
return []
converted: List[Dict[str, Any]] = []
for part in content:
if isinstance(part, str):
if part:
converted.append({"type": text_type, "text": part})
continue
if not isinstance(part, dict):
continue
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
converted.append({"type": text_type, "text": text})
continue
if ptype in {"image_url", "input_image"}:
image_ref = part.get("image_url")
detail = part.get("detail")
if isinstance(image_ref, dict):
url = image_ref.get("url")
detail = image_ref.get("detail", detail)
else:
url = image_ref
if not isinstance(url, str) or not url:
continue
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
if isinstance(detail, str) and detail.strip():
image_part["detail"] = detail.strip()
converted.append(image_part)
return converted
def _summarize_user_message_for_log(content: Any) -> str:
"""Return a short text summary of a user message for logging/trajectory.
Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
parts from the API server. Logging, spinner previews, and trajectory
files all want a plain string — this helper extracts the first chunk of
text and notes any attached images. Returns an empty string for empty
lists and ``str(content)`` for unexpected scalar types.
"""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
text_bits: List[str] = []
image_count = 0
for part in content:
if isinstance(part, str):
if part:
text_bits.append(part)
continue
if not isinstance(part, dict):
continue
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
text_bits.append(text)
elif ptype in {"image_url", "input_image"}:
image_count += 1
summary = " ".join(text_bits).strip()
if image_count:
note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
summary = f"{note} {summary}" if summary else note
return summary
try:
return str(content)
except Exception:
return ""
# ---------------------------------------------------------------------------
# ID helpers
# ---------------------------------------------------------------------------
def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
"""Generate a deterministic call_id from tool call content.
Used as a fallback when the API doesn't provide a call_id.
Deterministic IDs prevent cache invalidation — random UUIDs would
make every API call's prefix unique, breaking OpenAI's prompt cache.
"""
seed = f"{fn_name}:{arguments}:{index}"
digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
return f"call_{digest}"
def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
"""Split a stored tool id into (call_id, response_item_id)."""
if not isinstance(raw_id, str):
return None, None
value = raw_id.strip()
if not value:
return None, None
if "|" in value:
call_id, response_item_id = value.split("|", 1)
call_id = call_id.strip() or None
response_item_id = response_item_id.strip() or None
return call_id, response_item_id
if value.startswith("fc_"):
return None, value
return value, None
def _derive_responses_function_call_id(
call_id: str,
response_item_id: Optional[str] = None,
) -> str:
"""Build a valid Responses `function_call.id` (must start with `fc_`)."""
if isinstance(response_item_id, str):
candidate = response_item_id.strip()
if candidate.startswith("fc_"):
return candidate
source = (call_id or "").strip()
if source.startswith("fc_"):
return source
if source.startswith("call_") and len(source) > len("call_"):
return f"fc_{source[len('call_'):]}"
sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
if sanitized.startswith("fc_"):
return sanitized
if sanitized.startswith("call_") and len(sanitized) > len("call_"):
return f"fc_{sanitized[len('call_'):]}"
if sanitized:
return f"fc_{sanitized[:48]}"
seed = source or str(response_item_id or "") or uuid.uuid4().hex
digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
return f"fc_{digest}"
# ---------------------------------------------------------------------------
# Schema conversion
# ---------------------------------------------------------------------------
def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
"""Convert chat-completions tool schemas to Responses function-tool schemas."""
if not tools:
return None
converted: List[Dict[str, Any]] = []
for item in tools:
fn = item.get("function", {}) if isinstance(item, dict) else {}
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
continue
converted.append({
"type": "function",
"name": name,
"description": fn.get("description", ""),
"strict": False,
"parameters": fn.get("parameters", {"type": "object", "properties": {}}),
})
return converted or None
# ---------------------------------------------------------------------------
# Message format conversion
# ---------------------------------------------------------------------------
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
"""Normalize a Responses assistant message status for replay.
The API accepts completed/incomplete/in_progress on replayed assistant
output messages. Preserve those exactly (modulo case/hyphen spelling) so
incomplete Codex continuation turns don't get falsely marked completed.
"""
if isinstance(value, str):
status = value.strip().lower().replace("-", "_").replace(" ", "_")
if status in _RESPONSE_MESSAGE_STATUSES:
return status
return default
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
items: List[Dict[str, Any]] = []
seen_item_ids: set = set()
for msg in messages:
if not isinstance(msg, dict):
continue
role = msg.get("role")
if role == "system":
continue
if role in {"user", "assistant"}:
content = msg.get("content", "")
if isinstance(content, list):
content_parts = _chat_content_to_responses_parts(content, role=role)
text_type = "output_text" if role == "assistant" else "input_text"
content_text = "".join(
p.get("text", "") for p in content_parts if p.get("type") == text_type
)
else:
content_parts = []
content_text = str(content) if content is not None else ""
if role == "assistant":
# Replay encrypted reasoning items from previous turns
# so the API can maintain coherent reasoning chains.
codex_reasoning = msg.get("codex_reasoning_items")
has_codex_reasoning = False
if isinstance(codex_reasoning, list):
for ri in codex_reasoning:
if isinstance(ri, dict) and ri.get("encrypted_content"):
item_id = ri.get("id")
if item_id and item_id in seen_item_ids:
continue
# Strip the "id" field — with store=False the
# Responses API cannot look up items by ID and
# returns 404. The encrypted_content blob is
# self-contained for reasoning chain continuity.
replay_item = {k: v for k, v in ri.items() if k != "id"}
items.append(replay_item)
if item_id:
seen_item_ids.add(item_id)
has_codex_reasoning = True
# Replay exact assistant message items (with id/phase) from
# previous turns so the API can maintain prefix-cache hits.
# OpenAI docs: "preserve and resend phase on all assistant
# messages — dropping it can degrade performance."
codex_message_items = msg.get("codex_message_items")
replayed_message_items = 0
if isinstance(codex_message_items, list):
for raw_item in codex_message_items:
if not isinstance(raw_item, dict):
continue
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
continue
raw_content_parts = raw_item.get("content")
if not isinstance(raw_content_parts, list):
continue
normalized_content_parts = []
for part in raw_content_parts:
if not isinstance(part, dict):
continue
part_type = str(part.get("type") or "").strip()
if part_type not in {"output_text", "text"}:
continue
text = part.get("text", "")
if text is None:
text = ""
if not isinstance(text, str):
text = str(text)
normalized_content_parts.append({"type": "output_text", "text": text})
if not normalized_content_parts:
continue
replay_item = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(raw_item.get("status")),
"content": normalized_content_parts,
}
item_id = raw_item.get("id")
if isinstance(item_id, str) and item_id.strip():
replay_item["id"] = item_id.strip()
phase = raw_item.get("phase")
if isinstance(phase, str) and phase.strip():
replay_item["phase"] = phase.strip()
items.append(replay_item)
replayed_message_items += 1
if replayed_message_items > 0:
pass
elif content_parts:
items.append({"role": "assistant", "content": content_parts})
elif content_text.strip():
items.append({"role": "assistant", "content": content_text})
elif has_codex_reasoning:
# The Responses API requires a following item after each
# reasoning item (otherwise: missing_following_item error).
# When the assistant produced only reasoning with no visible
# content, emit an empty assistant message as the required
# following item.
items.append({"role": "assistant", "content": ""})
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if not isinstance(tc, dict):
continue
fn = tc.get("function", {})
fn_name = fn.get("name")
if not isinstance(fn_name, str) or not fn_name.strip():
continue
embedded_call_id, embedded_response_item_id = _split_responses_tool_id(
tc.get("id")
)
call_id = tc.get("call_id")
if not isinstance(call_id, str) or not call_id.strip():
call_id = embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
if (
isinstance(embedded_response_item_id, str)
and embedded_response_item_id.startswith("fc_")
and len(embedded_response_item_id) > len("fc_")
):
call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
else:
_raw_args = str(fn.get("arguments", "{}"))
call_id = _deterministic_call_id(fn_name, _raw_args, len(items))
call_id = call_id.strip()
arguments = fn.get("arguments", "{}")
if isinstance(arguments, dict):
arguments = json.dumps(arguments, ensure_ascii=False)
elif not isinstance(arguments, str):
arguments = str(arguments)
arguments = arguments.strip() or "{}"
items.append({
"type": "function_call",
"call_id": call_id,
"name": fn_name,
"arguments": arguments,
})
continue
# Non-assistant (user) role: emit multimodal parts when present,
# otherwise fall back to the text payload.
if content_parts:
items.append({"role": role, "content": content_parts})
else:
items.append({"role": role, "content": content_text})
continue
if role == "tool":
raw_tool_call_id = msg.get("tool_call_id")
call_id, _ = _split_responses_tool_id(raw_tool_call_id)
if not isinstance(call_id, str) or not call_id.strip():
if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
call_id = raw_tool_call_id.strip()
if not isinstance(call_id, str) or not call_id.strip():
continue
items.append({
"type": "function_call_output",
"call_id": call_id,
"output": str(msg.get("content", "") or ""),
})
return items
# ---------------------------------------------------------------------------
# Input preflight / validation
# ---------------------------------------------------------------------------
def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
if not isinstance(raw_items, list):
raise ValueError("Codex Responses input must be a list of input items.")
normalized: List[Dict[str, Any]] = []
seen_ids: set = set()
for idx, item in enumerate(raw_items):
if not isinstance(item, dict):
raise ValueError(f"Codex Responses input[{idx}] must be an object.")
item_type = item.get("type")
if item_type == "function_call":
call_id = item.get("call_id")
name = item.get("name")
if not isinstance(call_id, str) or not call_id.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
if not isinstance(name, str) or not name.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
arguments = item.get("arguments", "{}")
if isinstance(arguments, dict):
arguments = json.dumps(arguments, ensure_ascii=False)
elif not isinstance(arguments, str):
arguments = str(arguments)
arguments = arguments.strip() or "{}"
normalized.append(
{
"type": "function_call",
"call_id": call_id.strip(),
"name": name.strip(),
"arguments": arguments,
}
)
continue
if item_type == "function_call_output":
call_id = item.get("call_id")
if not isinstance(call_id, str) or not call_id.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
output = item.get("output", "")
if output is None:
output = ""
if not isinstance(output, str):
output = str(output)
normalized.append(
{
"type": "function_call_output",
"call_id": call_id.strip(),
"output": output,
}
)
continue
if item_type == "reasoning":
encrypted = item.get("encrypted_content")
if isinstance(encrypted, str) and encrypted:
item_id = item.get("id")
if isinstance(item_id, str) and item_id:
if item_id in seen_ids:
continue
seen_ids.add(item_id)
reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
# Do NOT include the "id" in the outgoing item — with
# store=False (our default) the API tries to resolve the
# id server-side and returns 404. The id is still used
# above for local deduplication via seen_ids.
summary = item.get("summary")
if isinstance(summary, list):
reasoning_item["summary"] = summary
else:
reasoning_item["summary"] = []
normalized.append(reasoning_item)
continue
if item_type == "message":
role = item.get("role")
if role != "assistant":
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
content = item.get("content")
if not isinstance(content, list):
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
normalized_content = []
for part_idx, part in enumerate(content):
if not isinstance(part, dict):
raise ValueError(
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
)
part_type = part.get("type")
if part_type not in {"output_text", "text"}:
raise ValueError(
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
)
text = part.get("text", "")
if text is None:
text = ""
if not isinstance(text, str):
text = str(text)
normalized_content.append({"type": "output_text", "text": text})
if not normalized_content:
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
normalized_item: Dict[str, Any] = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(item.get("status")),
"content": normalized_content,
}
item_id = item.get("id")
if isinstance(item_id, str) and item_id.strip():
normalized_item["id"] = item_id.strip()
phase = item.get("phase")
if isinstance(phase, str) and phase.strip():
normalized_item["phase"] = phase.strip()
normalized.append(normalized_item)
continue
role = item.get("role")
if role in {"user", "assistant"}:
content = item.get("content", "")
if content is None:
content = ""
if isinstance(content, list):
# Multimodal content from ``_chat_messages_to_responses_input``
# is already in Responses format (``input_text`` / ``output_text``
# / ``input_image``). Validate each part and pass through.
# Use the correct text type for the role — ``output_text`` for
# assistant messages, ``input_text`` for user messages.
text_type = "output_text" if role == "assistant" else "input_text"
validated: List[Dict[str, Any]] = []
for part_idx, part in enumerate(content):
if isinstance(part, str):
if part:
validated.append({"type": text_type, "text": part})
continue
if not isinstance(part, dict):
raise ValueError(
f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
)
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"input_text", "text", "output_text"}:
text = part.get("text", "")
if not isinstance(text, str):
text = str(text or "")
validated.append({"type": text_type, "text": text})
elif ptype in {"input_image", "image_url"}:
image_ref = part.get("image_url", "")
detail = part.get("detail")
if isinstance(image_ref, dict):
url = image_ref.get("url", "")
detail = image_ref.get("detail", detail)
else:
url = image_ref
if not isinstance(url, str):
url = str(url or "")
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
if isinstance(detail, str) and detail.strip():
image_part["detail"] = detail.strip()
validated.append(image_part)
else:
raise ValueError(
f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
)
normalized.append({"role": role, "content": validated})
continue
if not isinstance(content, str):
content = str(content)
normalized.append({"role": role, "content": content})
continue
raise ValueError(
f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
)
return normalized
def _preflight_codex_api_kwargs(
api_kwargs: Any,
*,
allow_stream: bool = False,
) -> Dict[str, Any]:
if not isinstance(api_kwargs, dict):
raise ValueError("Codex Responses request must be a dict.")
required = {"model", "instructions", "input"}
missing = [key for key in required if key not in api_kwargs]
if missing:
raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
model = api_kwargs.get("model")
if not isinstance(model, str) or not model.strip():
raise ValueError("Codex Responses request 'model' must be a non-empty string.")
model = model.strip()
instructions = api_kwargs.get("instructions")
if instructions is None:
instructions = ""
if not isinstance(instructions, str):
instructions = str(instructions)
instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
normalized_input = _preflight_codex_input_items(api_kwargs.get("input"))
tools = api_kwargs.get("tools")
normalized_tools = None
if tools is not None:
if not isinstance(tools, list):
raise ValueError("Codex Responses request 'tools' must be a list when provided.")
normalized_tools = []
for idx, tool in enumerate(tools):
if not isinstance(tool, dict):
raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
if tool.get("type") != "function":
raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
name = tool.get("name")
parameters = tool.get("parameters")
if not isinstance(name, str) or not name.strip():
raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
if not isinstance(parameters, dict):
raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
description = tool.get("description", "")
if description is None:
description = ""
if not isinstance(description, str):
description = str(description)
strict = tool.get("strict", False)
if not isinstance(strict, bool):
strict = bool(strict)
normalized_tools.append(
{
"type": "function",
"name": name.strip(),
"description": description,
"strict": strict,
"parameters": parameters,
}
)
store = api_kwargs.get("store", False)
if store is not False:
raise ValueError("Codex Responses contract requires 'store' to be false.")
allowed_keys = {
"model", "instructions", "input", "tools", "store",
"reasoning", "include", "max_output_tokens", "temperature",
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
"extra_headers",
}
normalized: Dict[str, Any] = {
"model": model,
"instructions": instructions,
"input": normalized_input,
"store": False,
}
if normalized_tools is not None:
normalized["tools"] = normalized_tools
# Pass through reasoning config
reasoning = api_kwargs.get("reasoning")
if isinstance(reasoning, dict):
normalized["reasoning"] = reasoning
include = api_kwargs.get("include")
if isinstance(include, list):
normalized["include"] = include
service_tier = api_kwargs.get("service_tier")
if isinstance(service_tier, str) and service_tier.strip():
normalized["service_tier"] = service_tier.strip()
# Pass through max_output_tokens and temperature
max_output_tokens = api_kwargs.get("max_output_tokens")
if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
normalized["max_output_tokens"] = int(max_output_tokens)
temperature = api_kwargs.get("temperature")
if isinstance(temperature, (int, float)):
normalized["temperature"] = float(temperature)
# Pass through tool_choice, parallel_tool_calls, prompt_cache_key
for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
val = api_kwargs.get(passthrough_key)
if val is not None:
normalized[passthrough_key] = val
extra_headers = api_kwargs.get("extra_headers")
if extra_headers is not None:
if not isinstance(extra_headers, dict):
raise ValueError("Codex Responses request 'extra_headers' must be an object.")
normalized_headers: Dict[str, str] = {}
for key, value in extra_headers.items():
if not isinstance(key, str) or not key.strip():
raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
if value is None:
continue
normalized_headers[key.strip()] = str(value)
if normalized_headers:
normalized["extra_headers"] = normalized_headers
if allow_stream:
stream = api_kwargs.get("stream")
if stream is not None and stream is not True:
raise ValueError("Codex Responses 'stream' must be true when set.")
if stream is True:
normalized["stream"] = True
allowed_keys.add("stream")
elif "stream" in api_kwargs:
raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
if unexpected:
raise ValueError(
f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
)
return normalized
# ---------------------------------------------------------------------------
# Response extraction helpers
# ---------------------------------------------------------------------------
def _extract_responses_message_text(item: Any) -> str:
"""Extract assistant text from a Responses message output item."""
content = getattr(item, "content", None)
if not isinstance(content, list):
return ""
chunks: List[str] = []
for part in content:
ptype = getattr(part, "type", None)
if ptype not in {"output_text", "text"}:
continue
text = getattr(part, "text", None)
if isinstance(text, str) and text:
chunks.append(text)
return "".join(chunks).strip()
def _extract_responses_reasoning_text(item: Any) -> str:
"""Extract a compact reasoning text from a Responses reasoning item."""
summary = getattr(item, "summary", None)
if isinstance(summary, list):
chunks: List[str] = []
for part in summary:
text = getattr(part, "text", None)
if isinstance(text, str) and text:
chunks.append(text)
if chunks:
return "\n".join(chunks).strip()
text = getattr(item, "text", None)
if isinstance(text, str) and text:
return text.strip()
return ""
# ---------------------------------------------------------------------------
# Full response normalization
# ---------------------------------------------------------------------------
def _normalize_codex_response(response: Any) -> tuple[Any, str]:
"""Normalize a Responses API object to an assistant_message-like object."""
output = getattr(response, "output", None)
if not isinstance(output, list) or not output:
# The Codex backend can return empty output when the answer was
# delivered entirely via stream events. Check output_text as a
# last-resort fallback before raising.
out_text = getattr(response, "output_text", None)
if isinstance(out_text, str) and out_text.strip():
logger.debug(
"Codex response has empty output but output_text is present (%d chars); "
"synthesizing output item.", len(out_text.strip()),
)
output = [SimpleNamespace(
type="message", role="assistant", status="completed",
content=[SimpleNamespace(type="output_text", text=out_text.strip())],
)]
response.output = output
else:
raise RuntimeError("Responses API returned no output items")
response_status = getattr(response, "status", None)
if isinstance(response_status, str):
response_status = response_status.strip().lower()
else:
response_status = None
if response_status in {"failed", "cancelled"}:
error_obj = getattr(response, "error", None)
if isinstance(error_obj, dict):
error_msg = error_obj.get("message") or str(error_obj)
else:
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
raise RuntimeError(error_msg)
content_parts: List[str] = []
reasoning_parts: List[str] = []
reasoning_items_raw: List[Dict[str, Any]] = []
message_items_raw: List[Dict[str, Any]] = []
tool_calls: List[Any] = []
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
saw_commentary_phase = False
saw_final_answer_phase = False
for item in output:
item_type = getattr(item, "type", None)
item_status = getattr(item, "status", None)
if isinstance(item_status, str):
item_status = item_status.strip().lower()
else:
item_status = None
if item_status in {"queued", "in_progress", "incomplete"}:
has_incomplete_items = True
if item_type == "message":
item_phase = getattr(item, "phase", None)
normalized_phase = None
if isinstance(item_phase, str):
normalized_phase = item_phase.strip().lower()
if normalized_phase in {"commentary", "analysis"}:
saw_commentary_phase = True
elif normalized_phase in {"final_answer", "final"}:
saw_final_answer_phase = True
message_text = _extract_responses_message_text(item)
if message_text:
content_parts.append(message_text)
raw_message_item: Dict[str, Any] = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(item_status),
"content": [{"type": "output_text", "text": message_text}],
}
item_id = getattr(item, "id", None)
if isinstance(item_id, str) and item_id:
raw_message_item["id"] = item_id
if normalized_phase:
raw_message_item["phase"] = normalized_phase
message_items_raw.append(raw_message_item)
elif item_type == "reasoning":
reasoning_text = _extract_responses_reasoning_text(item)
if reasoning_text:
reasoning_parts.append(reasoning_text)
# Capture the full reasoning item for multi-turn continuity.
# encrypted_content is an opaque blob the API needs back on
# subsequent turns to maintain coherent reasoning chains.
encrypted = getattr(item, "encrypted_content", None)
if isinstance(encrypted, str) and encrypted:
raw_item = {"type": "reasoning", "encrypted_content": encrypted}
item_id = getattr(item, "id", None)
if isinstance(item_id, str) and item_id:
raw_item["id"] = item_id
# Capture summary — required by the API when replaying reasoning items
summary = getattr(item, "summary", None)
if isinstance(summary, list):
raw_summary = []
for part in summary:
text = getattr(part, "text", None)
if isinstance(text, str):
raw_summary.append({"type": "summary_text", "text": text})
raw_item["summary"] = raw_summary
reasoning_items_raw.append(raw_item)
elif item_type == "function_call":
if item_status in {"queued", "in_progress", "incomplete"}:
continue
fn_name = getattr(item, "name", "") or ""
arguments = getattr(item, "arguments", "{}")
if not isinstance(arguments, str):
arguments = json.dumps(arguments, ensure_ascii=False)
raw_call_id = getattr(item, "call_id", None)
raw_item_id = getattr(item, "id", None)
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
call_id = call_id.strip()
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
tool_calls.append(SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=response_item_id,
type="function",
function=SimpleNamespace(name=fn_name, arguments=arguments),
))
elif item_type == "custom_tool_call":
fn_name = getattr(item, "name", "") or ""
arguments = getattr(item, "input", "{}")
if not isinstance(arguments, str):
arguments = json.dumps(arguments, ensure_ascii=False)
raw_call_id = getattr(item, "call_id", None)
raw_item_id = getattr(item, "id", None)
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
call_id = call_id.strip()
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
tool_calls.append(SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=response_item_id,
type="function",
function=SimpleNamespace(name=fn_name, arguments=arguments),
))
final_text = "\n".join([p for p in content_parts if p]).strip()
if not final_text and hasattr(response, "output_text"):
out_text = getattr(response, "output_text", "")
if isinstance(out_text, str):
final_text = out_text.strip()
# ── Tool-call leak recovery ──────────────────────────────────
# gpt-5.x on the Codex Responses API sometimes degenerates and emits
# what should be a structured `function_call` item as plain assistant
# text using the Harmony/Codex serialization (``to=functions.foo
# {json}`` or ``assistant to=functions.foo {json}``). The model
# intended to call a tool, but the intent never made it into
# ``response.output`` as a ``function_call`` item, so ``tool_calls``
# is empty here. If we pass this through, the parent sees a
# confident-looking summary with no audit trail (empty ``tool_trace``)
# and no tools actually ran — the Taiwan-embassy-email incident.
#
# Detection: leaked tokens always contain ``to=functions.<name>`` and
# the assistant message has no real tool calls. Treat it as incomplete
# so the existing Codex-incomplete continuation path (3 retries,
# handled in run_agent.py) gets a chance to re-elicit a proper
# ``function_call`` item. The existing loop already handles message
# append, dedup, and retry budget.
leaked_tool_call_text = False
if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
leaked_tool_call_text = True
logger.warning(
"Codex response contains leaked tool-call text in assistant content "
"(no structured function_call items). Treating as incomplete so the "
"continuation path can re-elicit a proper tool call. Leaked snippet: %r",
final_text[:300],
)
# Clear the text so downstream code doesn't surface the garbage as
# a summary. The encrypted reasoning items (if any) are preserved
# so the model keeps its chain-of-thought on the retry.
final_text = ""
assistant_message = SimpleNamespace(
content=final_text,
tool_calls=tool_calls,
reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
reasoning_content=None,
reasoning_details=None,
codex_reasoning_items=reasoning_items_raw or None,
codex_message_items=message_items_raw or None,
)
if tool_calls:
finish_reason = "tool_calls"
elif leaked_tool_call_text:
finish_reason = "incomplete"
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
finish_reason = "incomplete"
elif reasoning_items_raw and not final_text:
# Response contains only reasoning (encrypted thinking state) with
# no visible content or tool calls. The model is still thinking and
# needs another turn to produce the actual answer. Marking this as
# "stop" would send it into the empty-content retry loop which burns
# 3 retries then fails — treat it as incomplete instead so the Codex
# continuation path handles it correctly.
finish_reason = "incomplete"
else:
finish_reason = "stop"
return assistant_message, finish_reason
+18 -216
View File
@@ -31,7 +31,6 @@ from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
from agent.redact import redact_sensitive_text
logger = logging.getLogger(__name__)
@@ -61,93 +60,9 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
# Chars per token rough estimate
_CHARS_PER_TOKEN = 4
# Flat token cost per attached image part. Real cost varies by provider and
# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
# that keeps compression budgeting honest for multi-image conversations.
# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
_IMAGE_TOKEN_ESTIMATE = 1600
# Same figure expressed in the char-budget currency the rest of the
# compressor speaks in. Used when accumulating message "content length"
# for tail-cut decisions.
_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _content_length_for_budget(raw_content: Any) -> int:
"""Return the effective char-length of a message's content for token budgeting.
Plain strings: ``len(content)``. Multimodal lists: sum of text-part
``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
(``image_url`` / ``input_image`` / Anthropic-style ``image``). This
keeps the compressor from treating a turn with 5 attached images as
near-zero tokens just because the text part is empty.
"""
if isinstance(raw_content, str):
return len(raw_content)
if not isinstance(raw_content, list):
return len(str(raw_content or ""))
total = 0
for p in raw_content:
if isinstance(p, str):
total += len(p)
continue
if not isinstance(p, dict):
total += len(str(p))
continue
ptype = p.get("type")
if ptype in {"image_url", "input_image", "image"}:
total += _IMAGE_CHAR_EQUIVALENT
else:
# text / input_text / tool_result-with-text / anything else with
# a text field. Ignore the raw base64 payload inside image_url
# dicts — dimensions don't matter, only whether it's an image.
total += len(p.get("text", "") or "")
return total
def _content_text_for_contains(content: Any) -> str:
"""Return a best-effort text view of message content.
Used only for substring checks when we need to know whether we've already
appended a note to a message. Keeps multimodal lists intact elsewhere.
"""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str):
parts.append(text)
return "\n".join(part for part in parts if part)
return str(content)
def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
"""Append or prepend plain text to message content safely.
Compression sometimes needs to add a note or merge a summary into an
existing message. Message content may be plain text or a multimodal list of
blocks, so direct string concatenation is not always safe.
"""
if content is None:
return text
if isinstance(content, str):
return text + content if prepend else content + text
if isinstance(content, list):
text_block = {"type": "text", "text": text}
return [text_block, *content] if prepend else [*content, text_block]
rendered = str(content)
return text + rendered if prepend else rendered + text
def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
"""Shrink long string values inside a tool-call arguments JSON blob while
preserving JSON validity.
@@ -337,11 +252,6 @@ class ContextCompressor(ContextEngine):
self._context_probed = False
self._context_probe_persistable = False
self._previous_summary = None
self._last_summary_error = None
self._last_summary_dropped_count = 0
self._last_summary_fallback_used = False
self._last_aux_model_failure_error = None
self._last_aux_model_failure_model = None
self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0
@@ -365,13 +275,6 @@ class ContextCompressor(ContextEngine):
int(context_length * self.threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
# Recalculate token budgets for the new context length so the
# compressor stays calibrated after a model switch (e.g. 200K → 32K).
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
self.tail_token_budget = target_tokens
self.max_summary_tokens = min(
int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
)
def __init__(
self,
@@ -444,18 +347,6 @@ class ContextCompressor(ContextEngine):
self._last_compression_savings_pct: float = 100.0
self._ineffective_compression_count: int = 0
self._summary_failure_cooldown_until: float = 0.0
self._last_summary_error: Optional[str] = None
# When summary generation fails and a static fallback is inserted,
# record how many turns were unrecoverably dropped so callers
# (gateway hygiene, /compress) can surface a visible warning.
self._last_summary_dropped_count: int = 0
self._last_summary_fallback_used: bool = False
# When a user-configured summary model fails and we recover by
# retrying on the main model, record the failure so gateway /
# CLI callers can still warn the user even though compression
# succeeded. Silent recovery would hide the broken config.
self._last_aux_model_failure_error: Optional[str] = None
self._last_aux_model_failure_model: Optional[str] = None
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""
@@ -542,7 +433,7 @@ class ContextCompressor(ContextEngine):
for i in range(len(result) - 1, -1, -1):
msg = result[i]
raw_content = msg.get("content") or ""
content_len = _content_length_for_budget(raw_content)
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -659,15 +550,11 @@ class ContextCompressor(ContextEngine):
Includes tool call arguments and result content (up to
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
specific details like file paths, commands, and outputs.
All content is redacted before serialization to prevent secrets
(API keys, tokens, passwords) from leaking into the summary that
gets sent to the auxiliary model and persisted across compactions.
"""
parts = []
for msg in turns:
role = msg.get("role", "unknown")
content = redact_sensitive_text(msg.get("content") or "")
content = msg.get("content") or ""
# Tool results: keep enough content for the summarizer
if role == "tool":
@@ -688,7 +575,7 @@ class ContextCompressor(ContextEngine):
if isinstance(tc, dict):
fn = tc.get("function", {})
name = fn.get("name", "?")
args = redact_sensitive_text(fn.get("arguments", ""))
args = fn.get("arguments", "")
# Truncate long arguments but keep enough for context
if len(args) > self._TOOL_ARGS_MAX:
args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -746,13 +633,7 @@ class ContextCompressor(ContextEngine):
"assistant that continues the conversation. "
"Do NOT respond to any questions or requests in the conversation — "
"only output the structured summary. "
"Do NOT include any preamble, greeting, or prefix. "
"Write the summary in the same language the user was using in the "
"conversation — do not translate or switch to English. "
"NEVER include API keys, tokens, passwords, secrets, credentials, "
"or connection strings in the summary — replace any that appear "
"with [REDACTED]. Note that the user had credentials present, but "
"do not preserve their values."
"Do NOT include any preamble, greeting, or prefix."
)
# Shared structured template (used by both paths).
@@ -809,7 +690,7 @@ Be specific with file paths, commands, line numbers, and results.]
[What remains to be done — framed as context, not instructions]
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
@@ -849,7 +730,7 @@ Use this exact structure:
prompt += f"""
FOCUS TOPIC: "{focus_topic}"
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
try:
call_kwargs = {
@@ -872,19 +753,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Handle cases where content is not a string (e.g., dict from llama.cpp)
if not isinstance(content, str):
content = str(content) if content else ""
# Redact the summary output as well — the summarizer LLM may
# ignore prompt instructions and echo back secrets verbatim.
summary = redact_sensitive_text(content.strip())
summary = content.strip()
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0
self._summary_model_fallen_back = False
self._last_summary_error = None
return self._with_summary_prefix(summary)
except RuntimeError:
# No provider configured — long cooldown, unlikely to self-resolve
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
self._last_summary_error = "no auxiliary LLM provider configured"
logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary "
"for %d seconds.",
@@ -915,57 +792,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
"Falling back to main model '%s' for compression.",
self.summary_model, e, self.model,
)
# Record the aux-model failure so callers can warn the user
# even if the retry-on-main succeeds — a misconfigured aux
# model is something the user needs to fix.
_err_text = str(e).strip() or e.__class__.__name__
if len(_err_text) > 220:
_err_text = _err_text[:217].rstrip() + "..."
self._last_aux_model_failure_error = _err_text
self._last_aux_model_failure_model = self.summary_model
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately
# Unknown-error best-effort retry on main model. Losing N turns of
# context is almost always worse than one extra summary attempt, so
# if we haven't already fallen back and the summary model differs
# from the main model, try once more on main before entering
# cooldown. Errors that DID match _is_model_not_found above are
# already handled by the fast-path retry; this branch catches
# everything else (400s, provider-specific "no route" strings,
# aggregator rejections, etc.) where auto-retry is still safer
# than dropping the turns.
if (
self.summary_model
and self.summary_model != self.model
and not getattr(self, "_summary_model_fallen_back", False)
):
self._summary_model_fallen_back = True
logging.warning(
"Summary model '%s' failed (%s). "
"Retrying on main model '%s' before giving up.",
self.summary_model, e, self.model,
)
# Record the aux-model failure (see 404 branch above) — user
# should know their configured model is broken even if main
# recovers the call.
_err_text = str(e).strip() or e.__class__.__name__
if len(_err_text) > 220:
_err_text = _err_text[:217].rstrip() + "..."
self._last_aux_model_failure_error = _err_text
self._last_aux_model_failure_model = self.summary_model
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
return self._generate_summary(messages, summary_budget) # retry immediately
# Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
err_text = str(e).strip() or e.__class__.__name__
if len(err_text) > 220:
err_text = err_text[:217].rstrip() + "..."
self._last_summary_error = err_text
logging.warning(
"Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.",
@@ -1180,9 +1013,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
for i in range(n - 1, head_end - 1, -1):
msg = messages[i]
raw_content = msg.get("content") or ""
content_len = _content_length_for_budget(raw_content)
msg_tokens = content_len // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
content = msg.get("content") or ""
msg_tokens = len(content) // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
# Include tool call arguments in estimate
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -1213,21 +1045,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
return max(cut_idx, head_end + 1)
# ------------------------------------------------------------------
# ContextEngine: manual /compress preflight
# ------------------------------------------------------------------
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
"""Return True if there is a non-empty middle region to compact.
Overrides the ABC default so the gateway ``/compress`` guard can
skip the LLM call when the transcript is still entirely inside
the protected head/tail.
"""
compress_start = self._align_boundary_forward(messages, self.protect_first_n)
compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
return compress_start < compress_end
# ------------------------------------------------------------------
# Main compression entry point
# ------------------------------------------------------------------
@@ -1251,13 +1068,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
related to this topic and be more aggressive about compressing
everything else. Inspired by Claude Code's ``/compact``.
"""
# Reset per-call summary failure state — callers inspect these fields
# after compress() returns to decide whether to surface a warning.
self._last_summary_dropped_count = 0
self._last_summary_fallback_used = False
self._last_summary_error = None
self._last_aux_model_failure_error = None
self._last_aux_model_failure_model = None
n_messages = len(messages)
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
_min_for_compress = self.protect_first_n + 3 + 1
@@ -1321,13 +1131,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system":
existing = msg.get("content")
existing = msg.get("content") or ""
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
if _compression_note not in _content_text_for_contains(existing):
msg["content"] = _append_text_to_content(
existing,
"\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
)
if _compression_note not in existing:
msg["content"] = existing + "\n\n" + _compression_note
compressed.append(msg)
# If LLM summary failed, insert a static fallback so the model
@@ -1336,13 +1143,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
if not self.quiet_mode:
logger.warning("Summary generation failed — inserting static fallback context marker")
n_dropped = compress_end - compress_start
self._last_summary_dropped_count = n_dropped
self._last_summary_fallback_used = True
summary = (
f"{SUMMARY_PREFIX}\n"
f"Summary generation was unavailable. {n_dropped} message(s) were "
f"Summary generation was unavailable. {n_dropped} conversation turns were "
f"removed to free context space but could not be summarized. The removed "
f"messages contained earlier work in this session. Continue based on the "
f"turns contained earlier work in this session. Continue based on the "
f"recent messages below and the current state of any files or resources."
)
@@ -1373,15 +1178,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
for i in range(compress_end, n_messages):
msg = messages[i].copy()
if _merge_summary_into_tail and i == compress_end:
merged_prefix = (
original = msg.get("content") or ""
msg["content"] = (
summary
+ "\n\n--- END OF CONTEXT SUMMARY — "
"respond to the message below, not the summary above ---\n\n"
)
msg["content"] = _append_text_to_content(
msg.get("content"),
merged_prefix,
prepend=True,
+ original
)
_merge_summary_into_tail = False
compressed.append(msg)
-22
View File
@@ -78,7 +78,6 @@ class ContextEngine(ABC):
self,
messages: List[Dict[str, Any]],
current_tokens: int = None,
focus_topic: str = None,
) -> List[Dict[str, Any]]:
"""Compact the message list and return the new message list.
@@ -87,12 +86,6 @@ class ContextEngine(ABC):
context budget. The implementation is free to summarize, build a
DAG, or do anything else — as long as the returned list is a valid
OpenAI-format message sequence.
Args:
focus_topic: Optional topic string from manual ``/compress <focus>``.
Engines that support guided compression should prioritise
preserving information related to this topic. Engines that
don't support it may simply ignore this argument.
"""
# -- Optional: pre-flight check ----------------------------------------
@@ -105,21 +98,6 @@ class ContextEngine(ABC):
"""
return False
# -- Optional: manual /compress preflight ------------------------------
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
"""Quick check: is there anything in ``messages`` that can be compacted?
Used by the gateway ``/compress`` command as a preflight guard —
returning False lets the gateway report "nothing to compress yet"
without making an LLM call.
Default returns True (always attempt). Engines with a cheap way
to introspect their own head/tail boundaries should override this
to return False when the transcript is still entirely protected.
"""
return True
# -- Optional: session lifecycle ---------------------------------------
def on_session_start(self, session_id: str, **kwargs) -> None:
+3 -1
View File
@@ -483,7 +483,9 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
text=True,
timeout=10,
)
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
except FileNotFoundError:
return None
except subprocess.TimeoutExpired:
return None
if result.returncode != 0:
return None
+583 -5
View File
@@ -1,8 +1,586 @@
"""Backward-compatibility shim.
"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
CopilotACPClient has moved to acp_adapter/copilot_client.py.
This module re-exports it so existing callers continue to work.
This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
backend. Each request starts a short-lived ACP session, sends the formatted
conversation as a single prompt, collects text chunks, and converts the result
back into the minimal shape Hermes expects from an OpenAI client.
"""
from acp_adapter.copilot_client import CopilotACPClient # noqa: F401
__all__ = ["CopilotACPClient"]
from __future__ import annotations
import json
import os
import queue
import re
import shlex
import subprocess
import threading
import time
from collections import deque
from pathlib import Path
from types import SimpleNamespace
from typing import Any
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
def _resolve_command() -> str:
return (
os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
or os.getenv("COPILOT_CLI_PATH", "").strip()
or "copilot"
)
def _resolve_args() -> list[str]:
raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
if not raw:
return ["--acp", "--stdio"]
return shlex.split(raw)
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"error": {
"code": code,
"message": message,
},
}
def _format_messages_as_prompt(
messages: list[dict[str, Any]],
model: str | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
) -> str:
sections: list[str] = [
"You are being used as the active ACP agent backend for Hermes.",
"Use ACP capabilities to complete tasks.",
"IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
"If no tool is needed, answer normally.",
]
if model:
sections.append(f"Hermes requested model hint: {model}")
if isinstance(tools, list) and tools:
tool_specs: list[dict[str, Any]] = []
for t in tools:
if not isinstance(t, dict):
continue
fn = t.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
continue
tool_specs.append(
{
"name": name.strip(),
"description": fn.get("description", ""),
"parameters": fn.get("parameters", {}),
}
)
if tool_specs:
sections.append(
"Available tools (OpenAI function schema). "
"When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
"containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
+ json.dumps(tool_specs, ensure_ascii=False)
)
if tool_choice is not None:
sections.append(f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}")
transcript: list[str] = []
for message in messages:
if not isinstance(message, dict):
continue
role = str(message.get("role") or "unknown").strip().lower()
if role == "tool":
role = "tool"
elif role not in {"system", "user", "assistant"}:
role = "context"
content = message.get("content")
rendered = _render_message_content(content)
if not rendered:
continue
label = {
"system": "System",
"user": "User",
"assistant": "Assistant",
"tool": "Tool",
"context": "Context",
}.get(role, role.title())
transcript.append(f"{label}:\n{rendered}")
if transcript:
sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
sections.append("Continue the conversation from the latest user request.")
return "\n\n".join(section.strip() for section in sections if section and section.strip())
def _render_message_content(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content.strip()
if isinstance(content, dict):
if "text" in content:
return str(content.get("text") or "").strip()
if "content" in content and isinstance(content.get("content"), str):
return str(content.get("content") or "").strip()
return json.dumps(content, ensure_ascii=True)
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str) and text.strip():
parts.append(text.strip())
return "\n".join(parts).strip()
return str(content).strip()
def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
if not isinstance(text, str) or not text.strip():
return [], ""
extracted: list[SimpleNamespace] = []
consumed_spans: list[tuple[int, int]] = []
def _try_add_tool_call(raw_json: str) -> None:
try:
obj = json.loads(raw_json)
except Exception:
return
if not isinstance(obj, dict):
return
fn = obj.get("function")
if not isinstance(fn, dict):
return
fn_name = fn.get("name")
if not isinstance(fn_name, str) or not fn_name.strip():
return
fn_args = fn.get("arguments", "{}")
if not isinstance(fn_args, str):
fn_args = json.dumps(fn_args, ensure_ascii=False)
call_id = obj.get("id")
if not isinstance(call_id, str) or not call_id.strip():
call_id = f"acp_call_{len(extracted)+1}"
extracted.append(
SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=None,
type="function",
function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
)
)
for m in _TOOL_CALL_BLOCK_RE.finditer(text):
raw = m.group(1)
_try_add_tool_call(raw)
consumed_spans.append((m.start(), m.end()))
# Only try bare-JSON fallback when no XML blocks were found.
if not extracted:
for m in _TOOL_CALL_JSON_RE.finditer(text):
raw = m.group(0)
_try_add_tool_call(raw)
consumed_spans.append((m.start(), m.end()))
if not consumed_spans:
return extracted, text.strip()
consumed_spans.sort()
merged: list[tuple[int, int]] = []
for start, end in consumed_spans:
if not merged or start > merged[-1][1]:
merged.append((start, end))
else:
merged[-1] = (merged[-1][0], max(merged[-1][1], end))
parts: list[str] = []
cursor = 0
for start, end in merged:
if cursor < start:
parts.append(text[cursor:start])
cursor = max(cursor, end)
if cursor < len(text):
parts.append(text[cursor:])
cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
return extracted, cleaned
def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
candidate = Path(path_text)
if not candidate.is_absolute():
raise PermissionError("ACP file-system paths must be absolute.")
resolved = candidate.resolve()
root = Path(cwd).resolve()
try:
resolved.relative_to(root)
except ValueError as exc:
raise PermissionError(f"Path '{resolved}' is outside the session cwd '{root}'.") from exc
return resolved
class _ACPChatCompletions:
def __init__(self, client: "CopilotACPClient"):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _ACPChatNamespace:
def __init__(self, client: "CopilotACPClient"):
self.completions = _ACPChatCompletions(client)
class CopilotACPClient:
"""Minimal OpenAI-client-compatible facade for Copilot ACP."""
def __init__(
self,
*,
api_key: str | None = None,
base_url: str | None = None,
default_headers: dict[str, str] | None = None,
acp_command: str | None = None,
acp_args: list[str] | None = None,
acp_cwd: str | None = None,
command: str | None = None,
args: list[str] | None = None,
**_: Any,
):
self.api_key = api_key or "copilot-acp"
self.base_url = base_url or ACP_MARKER_BASE_URL
self._default_headers = dict(default_headers or {})
self._acp_command = acp_command or command or _resolve_command()
self._acp_args = list(acp_args or args or _resolve_args())
self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
self.chat = _ACPChatNamespace(self)
self.is_closed = False
self._active_process: subprocess.Popen[str] | None = None
self._active_process_lock = threading.Lock()
def close(self) -> None:
proc: subprocess.Popen[str] | None
with self._active_process_lock:
proc = self._active_process
self._active_process = None
self.is_closed = True
if proc is None:
return
try:
proc.terminate()
proc.wait(timeout=2)
except Exception:
try:
proc.kill()
except Exception:
pass
def _create_chat_completion(
self,
*,
model: str | None = None,
messages: list[dict[str, Any]] | None = None,
timeout: float | None = None,
tools: list[dict[str, Any]] | None = None,
tool_choice: Any = None,
**_: Any,
) -> Any:
prompt_text = _format_messages_as_prompt(
messages or [],
model=model,
tools=tools,
tool_choice=tool_choice,
)
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
# (used natively by the OpenAI SDK) rather than a plain float.
if timeout is None:
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
elif isinstance(timeout, (int, float)):
_effective_timeout = float(timeout)
else:
# httpx.Timeout or similar — pick the largest component so the
# subprocess has enough wall-clock time for the full response.
_candidates = [
getattr(timeout, attr, None)
for attr in ("read", "write", "connect", "pool", "timeout")
]
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
response_text, reasoning_text = self._run_prompt(
prompt_text,
timeout_seconds=_effective_timeout,
)
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
usage = SimpleNamespace(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
assistant_message = SimpleNamespace(
content=cleaned_text,
tool_calls=tool_calls,
reasoning=reasoning_text or None,
reasoning_content=reasoning_text or None,
reasoning_details=None,
)
finish_reason = "tool_calls" if tool_calls else "stop"
choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
return SimpleNamespace(
choices=[choice],
usage=usage,
model=model or "copilot-acp",
)
def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
try:
proc = subprocess.Popen(
[self._acp_command] + self._acp_args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1,
cwd=self._acp_cwd,
)
except FileNotFoundError as exc:
raise RuntimeError(
f"Could not start Copilot ACP command '{self._acp_command}'. "
"Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
) from exc
if proc.stdin is None or proc.stdout is None:
proc.kill()
raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
self.is_closed = False
with self._active_process_lock:
self._active_process = proc
inbox: queue.Queue[dict[str, Any]] = queue.Queue()
stderr_tail: deque[str] = deque(maxlen=40)
def _stdout_reader() -> None:
for line in proc.stdout:
try:
inbox.put(json.loads(line))
except Exception:
inbox.put({"raw": line.rstrip("\n")})
def _stderr_reader() -> None:
if proc.stderr is None:
return
for line in proc.stderr:
stderr_tail.append(line.rstrip("\n"))
out_thread = threading.Thread(target=_stdout_reader, daemon=True)
err_thread = threading.Thread(target=_stderr_reader, daemon=True)
out_thread.start()
err_thread.start()
next_id = 0
def _request(method: str, params: dict[str, Any], *, text_parts: list[str] | None = None, reasoning_parts: list[str] | None = None) -> Any:
nonlocal next_id
next_id += 1
request_id = next_id
payload = {
"jsonrpc": "2.0",
"id": request_id,
"method": method,
"params": params,
}
proc.stdin.write(json.dumps(payload) + "\n")
proc.stdin.flush()
deadline = time.time() + timeout_seconds
while time.time() < deadline:
if proc.poll() is not None:
break
try:
msg = inbox.get(timeout=0.1)
except queue.Empty:
continue
if self._handle_server_message(
msg,
process=proc,
cwd=self._acp_cwd,
text_parts=text_parts,
reasoning_parts=reasoning_parts,
):
continue
if msg.get("id") != request_id:
continue
if "error" in msg:
err = msg.get("error") or {}
raise RuntimeError(
f"Copilot ACP {method} failed: {err.get('message') or err}"
)
return msg.get("result")
stderr_text = "\n".join(stderr_tail).strip()
if proc.poll() is not None and stderr_text:
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
try:
_request(
"initialize",
{
"protocolVersion": 1,
"clientCapabilities": {
"fs": {
"readTextFile": True,
"writeTextFile": True,
}
},
"clientInfo": {
"name": "hermes-agent",
"title": "Hermes Agent",
"version": "0.0.0",
},
},
)
session = _request(
"session/new",
{
"cwd": self._acp_cwd,
"mcpServers": [],
},
) or {}
session_id = str(session.get("sessionId") or "").strip()
if not session_id:
raise RuntimeError("Copilot ACP did not return a sessionId.")
text_parts: list[str] = []
reasoning_parts: list[str] = []
_request(
"session/prompt",
{
"sessionId": session_id,
"prompt": [
{
"type": "text",
"text": prompt_text,
}
],
},
text_parts=text_parts,
reasoning_parts=reasoning_parts,
)
return "".join(text_parts), "".join(reasoning_parts)
finally:
self.close()
def _handle_server_message(
self,
msg: dict[str, Any],
*,
process: subprocess.Popen[str],
cwd: str,
text_parts: list[str] | None,
reasoning_parts: list[str] | None,
) -> bool:
method = msg.get("method")
if not isinstance(method, str):
return False
if method == "session/update":
params = msg.get("params") or {}
update = params.get("update") or {}
kind = str(update.get("sessionUpdate") or "").strip()
content = update.get("content") or {}
chunk_text = ""
if isinstance(content, dict):
chunk_text = str(content.get("text") or "")
if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
text_parts.append(chunk_text)
elif kind == "agent_thought_chunk" and chunk_text and reasoning_parts is not None:
reasoning_parts.append(chunk_text)
return True
if process.stdin is None:
return True
message_id = msg.get("id")
params = msg.get("params") or {}
if method == "session/request_permission":
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "allow_once",
}
},
}
elif method == "fs/read_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
content = path.read_text() if path.exists() else ""
line = params.get("line")
limit = params.get("limit")
if isinstance(line, int) and line > 1:
lines = content.splitlines(keepends=True)
start = line - 1
end = start + limit if isinstance(limit, int) and limit > 0 else None
content = "".join(lines[start:end])
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"content": content,
},
}
except Exception as exc:
response = _jsonrpc_error(message_id, -32602, str(exc))
elif method == "fs/write_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(params.get("content") or ""))
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": None,
}
except Exception as exc:
response = _jsonrpc_error(message_id, -32602, str(exc))
else:
response = _jsonrpc_error(
message_id,
-32601,
f"ACP client method '{method}' is not supported by Hermes yet.",
)
process.stdin.write(json.dumps(response) + "\n")
process.stdin.flush()
return True
+74 -204
View File
@@ -14,7 +14,6 @@ from datetime import datetime
from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import OPENROUTER_BASE_URL
from hermes_cli.config import get_env_value
import hermes_cli.auth as auth_mod
from hermes_cli.auth import (
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@@ -456,61 +455,6 @@ class CredentialPool:
logger.debug("Failed to sync from credentials file: %s", exc)
return entry
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
"""Sync a Nous pool entry from auth.json if tokens differ.
Nous OAuth refresh tokens are single-use. When another process
(e.g. a concurrent cron) refreshes the token via
``resolve_nous_runtime_credentials``, it writes fresh tokens to
auth.json under ``_auth_store_lock``. The pool entry's tokens
become stale. This method detects that and adopts the newer pair,
avoiding a "refresh token reuse" revocation on the Nous Portal.
"""
if self.provider != "nous" or entry.source != "device_code":
return entry
try:
with _auth_store_lock():
auth_store = _load_auth_store()
state = _load_provider_state(auth_store, "nous")
if not state:
return entry
store_refresh = state.get("refresh_token", "")
store_access = state.get("access_token", "")
if store_refresh and store_refresh != entry.refresh_token:
logger.debug(
"Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
entry.id,
)
field_updates: Dict[str, Any] = {
"access_token": store_access,
"refresh_token": store_refresh,
"last_status": None,
"last_status_at": None,
"last_error_code": None,
}
if state.get("expires_at"):
field_updates["expires_at"] = state["expires_at"]
if state.get("agent_key"):
field_updates["agent_key"] = state["agent_key"]
if state.get("agent_key_expires_at"):
field_updates["agent_key_expires_at"] = state["agent_key_expires_at"]
if state.get("inference_base_url"):
field_updates["inference_base_url"] = state["inference_base_url"]
extra_updates = dict(entry.extra)
for extra_key in ("obtained_at", "expires_in", "agent_key_id",
"agent_key_expires_in", "agent_key_reused",
"agent_key_obtained_at"):
val = state.get(extra_key)
if val is not None:
extra_updates[extra_key] = val
updated = replace(entry, extra=extra_updates, **field_updates)
self._replace_entry(entry, updated)
self._persist()
return updated
except Exception as exc:
logger.debug("Failed to sync Nous entry from auth.json: %s", exc)
return entry
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
"""Write refreshed pool entry tokens back to auth.json providers.
@@ -617,9 +561,6 @@ class CredentialPool:
last_refresh=refreshed.get("last_refresh"),
)
elif self.provider == "nous":
synced = self._sync_nous_entry_from_auth_store(entry)
if synced is not entry:
entry = synced
nous_state = {
"access_token": entry.access_token,
"refresh_token": entry.refresh_token,
@@ -694,26 +635,6 @@ class CredentialPool:
# Credentials file had a valid (non-expired) token — use it directly
logger.debug("Credentials file has valid token, using without refresh")
return synced
# For nous: another process may have consumed the refresh token
# between our proactive sync and the HTTP call. Re-sync from
# auth.json and adopt the fresh tokens if available.
if self.provider == "nous":
synced = self._sync_nous_entry_from_auth_store(entry)
if synced.refresh_token != entry.refresh_token:
logger.debug("Nous refresh failed but auth.json has newer tokens — adopting")
updated = replace(
synced,
last_status=STATUS_OK,
last_status_at=None,
last_error_code=None,
last_error_reason=None,
last_error_message=None,
last_error_reset_at=None,
)
self._replace_entry(synced, updated)
self._persist()
self._sync_device_code_entry_to_auth_store(updated)
return updated
self._mark_exhausted(entry, None)
return None
@@ -777,17 +698,6 @@ class CredentialPool:
if synced is not entry:
entry = synced
cleared_any = True
# For nous entries, sync from auth.json before status checks.
# Another process may have successfully refreshed via
# resolve_nous_runtime_credentials(), making this entry's
# exhausted status stale.
if (self.provider == "nous"
and entry.source == "device_code"
and entry.last_status == STATUS_EXHAUSTED):
synced = self._sync_nous_entry_from_auth_store(entry)
if synced is not entry:
entry = synced
cleared_any = True
if entry.last_status == STATUS_EXHAUSTED:
exhausted_until = _exhausted_until(entry)
if exhausted_until is not None and now < exhausted_until:
@@ -829,11 +739,8 @@ class CredentialPool:
if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
entry = min(available, key=lambda e: e.request_count)
# Increment usage counter so subsequent selections distribute load
updated = replace(entry, request_count=entry.request_count + 1)
self._replace_entry(entry, updated)
self._current_id = entry.id
return updated
return entry
if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
entry = available[0]
@@ -1076,14 +983,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
active_sources: Set[str] = set()
auth_store = _load_auth_store()
# Shared suppression gate — used at every upsert site so
# `hermes auth remove <provider> <N>` is stable across all source types.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "anthropic":
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
# when the user has explicitly configured anthropic as their provider.
@@ -1103,8 +1002,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
("claude_code", read_claude_code_credentials()),
):
if creds and creds.get("accessToken"):
if _is_suppressed(provider, source_name):
continue
# Check if user explicitly removed this source
try:
from hermes_cli.auth import is_source_suppressed
if is_source_suppressed(provider, source_name):
continue
except ImportError:
pass
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
@@ -1122,7 +1026,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
elif provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state and not _is_suppressed(provider, "device_code"):
if state:
active_sources.add("device_code")
# Prefer a user-supplied label embedded in the singleton state
# (set by persist_nous_credentials(label=...) when the user ran
@@ -1149,18 +1053,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
"inference_base_url": state.get("inference_base_url"),
"agent_key": state.get("agent_key"),
"agent_key_expires_at": state.get("agent_key_expires_at"),
# Carry the mint/refresh timestamps into the pool so
# freshness-sensitive consumers (self-heal hooks, pool
# pruning by age) can distinguish just-minted credentials
# from stale ones. Without these, fresh device_code
# entries get obtained_at=None and look older than they
# are (#15099).
"obtained_at": state.get("obtained_at"),
"expires_in": state.get("expires_in"),
"agent_key_id": state.get("agent_key_id"),
"agent_key_expires_in": state.get("agent_key_expires_in"),
"agent_key_reused": state.get("agent_key_reused"),
"agent_key_obtained_at": state.get("agent_key_obtained_at"),
"tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
"label": seeded_label,
},
@@ -1171,26 +1063,24 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
# env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN). They don't live in
# the auth store or credential pool, so we resolve them here.
try:
from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
from hermes_cli.copilot_auth import resolve_copilot_token
token, source = resolve_copilot_token()
if token:
api_token = get_copilot_api_token(token)
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
except Exception as exc:
logger.debug("Copilot token seed failed: %s", exc)
@@ -1206,21 +1096,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token = creds.get("api_key", "")
if token:
source_name = creds.get("source", "qwen-cli")
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
except Exception as exc:
logger.debug("Qwen OAuth token seed failed: %s", exc)
@@ -1229,7 +1118,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
# the device_code source as suppressed so it won't be re-seeded from
# the Hermes auth store. Without this gate the removal is instantly
# undone on the next load_pool() call.
if _is_suppressed(provider, "device_code"):
codex_suppressed = False
try:
from hermes_cli.auth import is_source_suppressed
codex_suppressed = is_source_suppressed(provider, "device_code")
except ImportError:
pass
if codex_suppressed:
return changed, active_sources
state = _load_provider_state(auth_store, "openai-codex")
@@ -1263,23 +1158,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
changed = False
active_sources: Set[str] = set()
# Honour user suppression — `hermes auth remove <provider> <N>` for an
# env-seeded credential marks the env:<VAR> source as suppressed so it
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
# Without this gate the removal is silently undone on the next
# load_pool() call whenever the var is still exported by the shell.
try:
from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
except ImportError:
def _is_source_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "openrouter":
# Check both os.environ and ~/.hermes/.env file
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
token = os.getenv("OPENROUTER_API_KEY", "").strip()
if token:
source = "env:OPENROUTER_API_KEY"
if _is_source_suppressed(provider, source):
return changed, active_sources
active_sources.add(source)
changed |= _upsert_entry(
entries,
@@ -1301,7 +1183,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
env_url = ""
if pconfig.base_url_env_var:
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
env_vars = list(pconfig.api_key_env_vars)
if provider == "anthropic":
@@ -1312,13 +1194,10 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
]
for env_var in env_vars:
# Check both os.environ and ~/.hermes/.env file
token = (get_env_value(env_var) or "").strip()
token = os.getenv(env_var, "").strip()
if not token:
continue
source = f"env:{env_var}"
if _is_source_suppressed(provider, source):
continue
active_sources.add(source)
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
base_url = env_url or pconfig.inference_base_url
@@ -1363,13 +1242,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
changed = False
active_sources: Set[str] = set()
# Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
# Seed from the custom_providers config entry's api_key field
cp_config = _get_custom_provider_config(pool_key)
if cp_config:
@@ -1378,20 +1250,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
name = str(cp_config.get("name") or "").strip()
if api_key:
source = f"config:{name}"
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
# Seed from model.api_key if model.provider=='custom' and model.base_url matches
try:
@@ -1411,20 +1282,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
matched_key = get_custom_provider_pool_key(model_base_url)
if matched_key == pool_key:
source = "model_config"
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
except Exception:
pass
-401
View File
@@ -1,401 +0,0 @@
"""Unified removal contract for every credential source Hermes reads from.
Hermes seeds its credential pool from many places:
env:<VAR> os.environ / ~/.hermes/.env
claude_code ~/.claude/.credentials.json
hermes_pkce ~/.hermes/.anthropic_oauth.json
device_code auth.json providers.<provider> (nous, openai-codex, ...)
qwen-cli ~/.qwen/oauth_creds.json
gh_cli gh auth token
config:<name> custom_providers config entry
model_config model.api_key when model.provider == "custom"
manual user ran `hermes auth add`
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
(which keep their existing shape we haven't restructured them). What we
unify here is **removal**:
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
Before this module, every source had an ad-hoc removal branch in
``auth_remove_command``, and several sources had no branch at all so
``auth remove`` silently reverted on the next ``load_pool()`` call for
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
custom-config sources.
Now every source registers a ``RemovalStep`` that does exactly three things
in the same shape:
1. Clean up whatever externally-readable state the source reads from
(.env line, auth.json block, OAuth file, etc.)
2. Suppress the ``(provider, source_id)`` in auth.json so the
corresponding ``_seed_from_*`` branch skips the upsert on re-load
3. Return ``RemovalResult`` describing what was cleaned and any
diagnostic hints the user should see (shell-exported env vars,
external credential files we deliberately don't delete, etc.)
Adding a new credential source is:
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
- gate that reader behind ``is_source_suppressed(provider, source_id)``
- register a ``RemovalStep`` here
No more per-source if/elif chain in ``auth_remove_command``.
"""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, List, Optional
@dataclass
class RemovalResult:
"""Outcome of removing a credential source.
Attributes:
cleaned: Short strings describing external state that was actually
mutated (``"Cleared XAI_API_KEY from .env"``,
``"Cleared openai-codex OAuth tokens from auth store"``).
Printed as plain lines to the user.
hints: Diagnostic lines ABOUT state the user may need to clean up
themselves or is deliberately left intact (shell-exported env
var, Claude Code credential file we don't delete, etc.).
Printed as plain lines to the user. Always non-destructive.
suppress: Whether to call ``suppress_credential_source`` after
cleanup so future ``load_pool`` calls skip this source.
Default True almost every source needs this to stay sticky.
The only legitimate False is ``manual`` entries, which aren't
seeded from anywhere external.
"""
cleaned: List[str] = field(default_factory=list)
hints: List[str] = field(default_factory=list)
suppress: bool = True
@dataclass
class RemovalStep:
"""How to remove one specific credential source cleanly.
Attributes:
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
Special value ``"*"`` means "matches any provider" used for
sources like ``manual`` that aren't provider-specific.
source_id: Source identifier as it appears in
``PooledCredential.source``. May be a literal (``"claude_code"``)
or a prefix pattern matched via ``match_fn``.
match_fn: Optional predicate overriding literal ``source_id``
matching. Gets the removed entry's source string. Used for
``env:*`` (any env-seeded key), ``config:*`` (any custom
pool), and ``manual:*`` (any manual-source variant).
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
actual cleanup and returns what happened for the user.
description: One-line human-readable description for docs / tests.
"""
provider: str
source_id: str
remove_fn: Callable[..., RemovalResult]
match_fn: Optional[Callable[[str], bool]] = None
description: str = ""
def matches(self, provider: str, source: str) -> bool:
if self.provider != "*" and self.provider != provider:
return False
if self.match_fn is not None:
return self.match_fn(source)
return source == self.source_id
_REGISTRY: List[RemovalStep] = []
def register(step: RemovalStep) -> RemovalStep:
_REGISTRY.append(step)
return step
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
"""Return the first matching RemovalStep, or None if unregistered.
Unregistered sources fall through to the default remove path in
``auth_remove_command``: the pool entry is already gone (that happens
before dispatch), no external cleanup, no suppression. This is the
correct behaviour for ``manual`` entries they were only ever stored
in the pool, nothing external to clean up.
"""
for step in _REGISTRY:
if step.matches(provider, source):
return step
return None
# ---------------------------------------------------------------------------
# Individual RemovalStep implementations — one per source.
# ---------------------------------------------------------------------------
# Each remove_fn is intentionally small and single-purpose. Adding a new
# credential source means adding ONE entry here — no other changes to
# auth_remove_command.
def _remove_env_source(provider: str, removed) -> RemovalResult:
"""env:<VAR> — the most common case.
Handles three user situations:
1. Var lives only in ~/.hermes/.env clear it
2. Var lives only in the user's shell (shell profile, systemd
EnvironmentFile, launchd plist) hint them where to unset it
3. Var lives in both clear from .env, hint about shell
"""
from hermes_cli.config import get_env_path, remove_env_value
result = RemovalResult()
env_var = removed.source[len("env:"):]
if not env_var:
return result
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
env_in_process = bool(os.getenv(env_var))
env_in_dotenv = False
try:
env_path = get_env_path()
if env_path.exists():
env_in_dotenv = any(
line.strip().startswith(f"{env_var}=")
for line in env_path.read_text(errors="replace").splitlines()
)
except OSError:
pass
shell_exported = env_in_process and not env_in_dotenv
cleared = remove_env_value(env_var)
if cleared:
result.cleaned.append(f"Cleared {env_var} from .env")
if shell_exported:
result.hints.extend([
f"Note: {env_var} is still set in your shell environment "
f"(not in ~/.hermes/.env).",
" Unset it there (shell profile, systemd EnvironmentFile, "
"launchd plist, etc.) or it will keep being visible to Hermes.",
f" The pool entry is now suppressed — Hermes will ignore "
f"{env_var} until you run `hermes auth add {provider}`.",
])
else:
result.hints.append(
f"Suppressed env:{env_var} — it will not be re-seeded even "
f"if the variable is re-exported later."
)
return result
def _remove_claude_code(provider: str, removed) -> RemovalResult:
"""~/.claude/.credentials.json is owned by Claude Code itself.
We don't delete it — the user's Claude Code install still needs to
work. We just suppress it so Hermes stops reading it.
"""
return RemovalResult(hints=[
"Suppressed claude_code credential — it will not be re-seeded.",
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
"Run `hermes auth add anthropic` to re-enable if needed.",
])
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
from hermes_constants import get_hermes_home
result = RemovalResult()
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
try:
oauth_file.unlink()
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
except OSError as exc:
result.hints.append(f"Could not delete {oauth_file}: {exc}")
return result
def _clear_auth_store_provider(provider: str) -> bool:
"""Delete auth_store.providers[provider]. Returns True if deleted."""
from hermes_cli.auth import (
_auth_store_lock,
_load_auth_store,
_save_auth_store,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
return True
return False
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
We suppress in addition to clearing because nothing else stops the
user's next `hermes login` run from writing providers.nous again
before they decide to. Suppression forces them to go through
`hermes auth add nous` to re-engage, which is the documented re-add
path and clears the suppression atomically.
"""
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
return result
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
refresh_codex_oauth_pure() writes both every time, so clearing only
the Hermes auth store is not enough _seed_from_singletons() would
re-import from ~/.codex/auth.json on the next load_pool() call and
the removal would be instantly undone. We suppress instead of
deleting Codex CLI's file, so the Codex CLI itself keeps working.
The canonical source name in ``_seed_from_singletons`` is
``"device_code"`` (no prefix). Entries may show up in the pool as
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
via ``hermes auth add openai-codex``), but in both cases the re-seed
gate lives at the ``"device_code"`` suppression key. We suppress
that canonical key here; the central dispatcher also suppresses
``removed.source`` which is fine belt-and-suspenders, idempotent.
"""
from hermes_cli.auth import suppress_credential_source
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
# Suppress the canonical re-seed source, not just whatever source the
# removed entry had. Otherwise `manual:device_code` removals wouldn't
# block the `device_code` re-seed path.
suppress_credential_source(provider, "device_code")
result.hints.extend([
"Suppressed openai-codex device_code source — it will not be re-seeded.",
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
"Run `hermes auth add openai-codex` to re-enable if needed.",
])
return result
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
Same pattern as claude_code suppress, don't delete. The user's
Qwen CLI install still reads from that file.
"""
return RemovalResult(hints=[
"Suppressed qwen-cli credential — it will not be re-seeded.",
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
])
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
Copilot is special: the same token can be seeded as multiple source
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
``_seed_from_env``), so removing one entry without suppressing the
others lets the duplicates resurrect. We suppress ALL known copilot
sources here so removal is stable regardless of which entry the
user clicked.
We don't touch the user's gh CLI or shell state just suppress so
Hermes stops picking the token up.
"""
# Suppress ALL copilot source variants up-front so no path resurrects
# the pool entry. The central dispatcher in auth_remove_command will
# ALSO suppress removed.source, but it's idempotent so double-calling
# is harmless.
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "gh_cli")
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
suppress_credential_source(provider, f"env:{env_var}")
return RemovalResult(hints=[
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
"Note: Your gh CLI / shell environment is unchanged.",
"Run `hermes auth add copilot` to re-enable if needed.",
])
def _remove_custom_config(provider: str, removed) -> RemovalResult:
"""Custom provider pools are seeded from custom_providers config or
model.api_key. Both are in config.yaml modifying that from here
is more invasive than suppression. We suppress; the user can edit
config.yaml if they want to remove the key from disk entirely.
"""
source_label = removed.source
return RemovalResult(hints=[
f"Suppressed {source_label} — it will not be re-seeded.",
"Note: The underlying value in config.yaml is unchanged. Edit it "
"directly if you want to remove the credential from disk.",
])
def _register_all_sources() -> None:
"""Called once on module import.
ORDER MATTERS ``find_removal_step`` returns the first match. Put
provider-specific steps before the generic ``env:*`` step so that e.g.
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
doesn't touch the user's shell), not the generic env-var removal
(which would try to clear .env).
"""
register(RemovalStep(
provider="copilot", source_id="gh_cli",
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
remove_fn=_remove_copilot_gh,
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
))
register(RemovalStep(
provider="*", source_id="env:",
match_fn=lambda src: src.startswith("env:"),
remove_fn=_remove_env_source,
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
))
register(RemovalStep(
provider="anthropic", source_id="claude_code",
remove_fn=_remove_claude_code,
description="~/.claude/.credentials.json",
))
register(RemovalStep(
provider="anthropic", source_id="hermes_pkce",
remove_fn=_remove_hermes_pkce,
description="~/.hermes/.anthropic_oauth.json",
))
register(RemovalStep(
provider="nous", source_id="device_code",
remove_fn=_remove_nous_device_code,
description="auth.json providers.nous",
))
register(RemovalStep(
provider="openai-codex", source_id="device_code",
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
remove_fn=_remove_codex_device_code,
description="auth.json providers.openai-codex + ~/.codex/auth.json",
))
register(RemovalStep(
provider="qwen-oauth", source_id="qwen-cli",
remove_fn=_remove_qwen_cli,
description="~/.qwen/oauth_creds.json",
))
register(RemovalStep(
provider="*", source_id="config:",
match_fn=lambda src: src.startswith("config:") or src == "model_config",
remove_fn=_remove_custom_config,
description="Custom provider config.yaml api_key field",
))
_register_all_sources()
+4 -10
View File
@@ -225,11 +225,9 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
content = _oneline(args.get("content", ""))
return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
elif action == "replace":
old = _oneline(args.get("old_text") or "") or "<missing old_text>"
return f"~{target}: \"{old[:20]}\""
return f"~{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
elif action == "remove":
old = _oneline(args.get("old_text") or "") or "<missing old_text>"
return f"-{target}: \"{old[:20]}\""
return f"-{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
return action
if tool_name == "send_message":
@@ -941,13 +939,9 @@ def get_cute_tool_message(
if action == "add":
return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
elif action == "replace":
old = args.get("old_text") or ""
old = old if old else "<missing old_text>"
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(old, 20)}\" {dur}")
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
elif action == "remove":
old = args.get("old_text") or ""
old = old if old else "<missing old_text>"
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(old, 20)}\" {dur}")
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
return _wrap(f"┊ 🧠 memory {action} {dur}")
if tool_name == "skills_list":
return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
+15 -165
View File
@@ -42,11 +42,9 @@ class FailoverReason(enum.Enum):
# Context / payload
context_overflow = "context_overflow" # Context too large — compress, not failover
payload_too_large = "payload_too_large" # 413 — compress payload
image_too_large = "image_too_large" # Native image part exceeds provider's per-image limit — shrink and retry
# Model
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
# Request format
format_error = "format_error" # 400 bad request — abort or strip + retry
@@ -148,20 +146,6 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
"error code: 413",
]
# Image-size patterns. Matched against 400 bodies (not 413) because most
# providers return a 400 with a specific image-too-big message before the
# whole request hits the 413 size limit. Anthropic's wording is the most
# important here (hard 5 MB per image, returned as
# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
_IMAGE_TOO_LARGE_PATTERNS = [
"image exceeds", # Anthropic: "image exceeds 5 MB maximum"
"image too large", # generic
"image_too_large", # error_code variant
"image size exceeds", # variant
# "request_too_large" on a request known to contain an image → image is
# the likely culprit; we still try the shrink path before giving up.
]
# Context overflow patterns
_CONTEXT_OVERFLOW_PATTERNS = [
"context length",
@@ -210,29 +194,6 @@ _MODEL_NOT_FOUND_PATTERNS = [
"unsupported model",
]
# OpenRouter aggregator policy-block patterns.
#
# When a user's OpenRouter account privacy setting (or a per-request
# `provider.data_collection: deny` preference) excludes the only endpoint
# serving a model, OpenRouter returns 404 with a *specific* message that is
# distinct from "model not found":
#
# "No endpoints available matching your guardrail restrictions and
# data policy. Configure: https://openrouter.ai/settings/privacy"
#
# We classify this as `provider_policy_blocked` rather than
# `model_not_found` because:
# - The model *exists* — model_not_found is misleading in logs
# - Provider fallback won't help: the account-level setting applies to
# every call on the same OpenRouter account
# - The error body already contains the fix URL, so the user gets
# actionable guidance without us rewriting the message
_PROVIDER_POLICY_BLOCKED_PATTERNS = [
"no endpoints available matching your guardrail",
"no endpoints available matching your data policy",
"no endpoints found matching your data policy",
]
# Auth patterns (non-status-code signals)
_AUTH_PATTERNS = [
"invalid api key",
@@ -259,25 +220,12 @@ _TRANSPORT_ERROR_TYPES = frozenset({
"ConnectionAbortedError", "BrokenPipeError",
"TimeoutError", "ReadError",
"ServerDisconnectedError",
# SSL/TLS transport errors — transient mid-stream handshake/record
# failures that should retry rather than surface as a stalled session.
# ssl.SSLError subclasses OSError (caught by isinstance) but we list
# the type names here so provider-wrapped SSL errors (e.g. when the
# SDK re-raises without preserving the exception chain) still classify
# as transport rather than falling through to the unknown bucket.
"SSLError", "SSLZeroReturnError", "SSLWantReadError",
"SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
# OpenAI SDK errors (not subclasses of Python builtins)
"APIConnectionError",
"APITimeoutError",
})
# Server disconnect patterns (no status code, but transport-level).
# These are the "ambiguous" patterns — a plain connection close could be
# transient transport hiccup OR server-side context overflow rejection
# (common when the API gateway disconnects instead of returning an HTTP
# error for oversized requests). A large session + one of these patterns
# triggers the context-overflow-with-compression recovery path.
# Server disconnect patterns (no status code, but transport-level)
_SERVER_DISCONNECT_PATTERNS = [
"server disconnected",
"peer closed connection",
@@ -288,40 +236,6 @@ _SERVER_DISCONNECT_PATTERNS = [
"incomplete chunked read",
]
# SSL/TLS transient failure patterns — intentionally distinct from
# _SERVER_DISCONNECT_PATTERNS above.
#
# An SSL alert mid-stream is almost always a transport-layer hiccup
# (flaky network, mid-session TLS renegotiation failure, load balancer
# dropping the connection) — NOT a server-side context overflow signal.
# So we want the retry path but NOT the compression path; lumping these
# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
# expensive) context compression on any large-session SSL hiccup.
#
# The OpenSSL library constructs error codes by prepending a format string
# to the uppercased alert reason; OpenSSL 3.x changed the separator
# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
# which silently stopped matching anything explicit. Matching on the
# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
# survives future OpenSSL format churn without code changes.
_SSL_TRANSIENT_PATTERNS = [
# Space-separated (human-readable form, Python ssl module, most SDKs)
"bad record mac",
"ssl alert",
"tls alert",
"ssl handshake failure",
"tlsv1 alert",
"sslv3 alert",
# Underscore-separated (OpenSSL error code tokens, e.g.
# `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
"bad_record_mac",
"ssl_alert",
"tls_alert",
"tls_alert_internal_error",
# Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
"[ssl:",
]
# ── Classification pipeline ─────────────────────────────────────────────
@@ -341,10 +255,9 @@ def classify_api_error(
2. HTTP status code + message-aware refinement
3. Error code classification (from body)
4. Message pattern matching (billing vs rate_limit vs context vs auth)
5. SSL/TLS transient alert patterns retry as timeout
5. Transport error heuristics
6. Server disconnect + large session context overflow
7. Transport error heuristics
8. Fallback: unknown (retryable with backoff)
7. Fallback: unknown (retryable with backoff)
Args:
error: The exception from the API call.
@@ -358,11 +271,6 @@ def classify_api_error(
"""
status_code = _extract_status_code(error)
error_type = type(error).__name__
# Copilot/GitHub Models RateLimitError may not set .status_code; force 429
# so downstream rate-limit handling (classifier reason, pool rotation,
# fallback gating) fires correctly instead of misclassifying as generic.
if status_code is None and error_type == "RateLimitError":
status_code = 429
body = _extract_error_body(error)
error_code = _extract_error_code(body)
@@ -382,7 +290,7 @@ def classify_api_error(
if isinstance(body, dict):
_err_obj = body.get("error", {})
if isinstance(_err_obj, dict):
_body_msg = str(_err_obj.get("message") or "").lower()
_body_msg = (_err_obj.get("message") or "").lower()
# Parse metadata.raw for wrapped provider errors
_metadata = _err_obj.get("metadata", {})
if isinstance(_metadata, dict):
@@ -394,11 +302,11 @@ def classify_api_error(
if isinstance(_inner, dict):
_inner_err = _inner.get("error", {})
if isinstance(_inner_err, dict):
_metadata_msg = str(_inner_err.get("message") or "").lower()
_metadata_msg = (_inner_err.get("message") or "").lower()
except (json.JSONDecodeError, TypeError):
pass
if not _body_msg:
_body_msg = str(body.get("message") or "").lower()
_body_msg = (body.get("message") or "").lower()
# Combine all message sources for pattern matching
parts = [_raw_msg]
if _body_msg and _body_msg not in _raw_msg:
@@ -480,18 +388,7 @@ def classify_api_error(
if classified is not None:
return classified
# ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
# SSL alerts mid-stream are transport hiccups, not server-side context
# overflow signals. Classify before the disconnect check so a large
# session doesn't incorrectly trigger context compression when the real
# cause is a flaky TLS handshake. Also matches when the error is
# wrapped in a generic exception whose message string carries the SSL
# alert text but the type isn't ssl.SSLError (happens with some SDKs
# that re-raise without chaining).
if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
return _result(FailoverReason.timeout, retryable=True)
# ── 6. Server disconnect + large session → context overflow ─────
# ── 5. Server disconnect + large session → context overflow ─────
# Must come BEFORE generic transport error catch — a disconnect on
# a large session is more likely context overflow than a transient
# transport hiccup. Without this ordering, RemoteProtocolError
@@ -508,12 +405,12 @@ def classify_api_error(
)
return _result(FailoverReason.timeout, retryable=True)
# ── 7. Transport / timeout heuristics ───────────────────────────
# ── 6. Transport / timeout heuristics ───────────────────────────
if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
return _result(FailoverReason.timeout, retryable=True)
# ── 8. Fallback: unknown ────────────────────────────────────────
# ── 7. Fallback: unknown ────────────────────────────────────────
return _result(FailoverReason.unknown, retryable=True)
@@ -567,33 +464,17 @@ def _classify_by_status(
return _classify_402(error_msg, result_fn)
if status_code == 404:
# OpenRouter policy-block 404 — distinct from "model not found".
# The model exists; the user's account privacy setting excludes the
# only endpoint serving it. Falling back to another provider won't
# help (same account setting applies). The error body already
# contains the fix URL, so just surface it.
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
return result_fn(
FailoverReason.provider_policy_blocked,
retryable=False,
should_fallback=False,
)
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
return result_fn(
FailoverReason.model_not_found,
retryable=False,
should_fallback=True,
)
# Generic 404 with no "model not found" signal — could be a wrong
# endpoint path (common with local llama.cpp / Ollama / vLLM when
# the URL is slightly misconfigured), a proxy routing glitch, or
# a transient backend issue. Classifying these as model_not_found
# silently falls back to a different provider and tells the model
# the model is missing, which is wrong and wastes a turn. Treat
# as unknown so the retry loop surfaces the real error instead.
# Generic 404 — could be model or endpoint
return result_fn(
FailoverReason.unknown,
retryable=True,
FailoverReason.model_not_found,
retryable=False,
should_fallback=True,
)
if status_code == 413:
@@ -686,15 +567,6 @@ def _classify_400(
) -> ClassifiedError:
"""Classify 400 Bad Request — context overflow, format error, or generic."""
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
# Must be checked BEFORE context_overflow because messages can trip both
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
return result_fn(
FailoverReason.image_too_large,
retryable=True,
)
# Context overflow from 400
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
return result_fn(
@@ -704,12 +576,6 @@ def _classify_400(
)
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
return result_fn(
FailoverReason.provider_policy_blocked,
retryable=False,
should_fallback=False,
)
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
return result_fn(
FailoverReason.model_not_found,
@@ -740,10 +606,10 @@ def _classify_400(
if isinstance(body, dict):
err_obj = body.get("error", {})
if isinstance(err_obj, dict):
err_body_msg = str(err_obj.get("message") or "").strip().lower()
err_body_msg = (err_obj.get("message") or "").strip().lower()
# Responses API (and some providers) use flat body: {"message": "..."}
if not err_body_msg:
err_body_msg = str(body.get("message") or "").strip().lower()
err_body_msg = (body.get("message") or "").strip().lower()
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
@@ -822,13 +688,6 @@ def _classify_by_message(
should_compress=True,
)
# Image-too-large patterns (from message text when no status_code)
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
return result_fn(
FailoverReason.image_too_large,
retryable=True,
)
# Usage-limit patterns need the same disambiguation as 402: some providers
# surface "usage limit" errors without an HTTP status code. A transient
# signal ("try again", "resets at", …) means it's a periodic quota, not
@@ -889,15 +748,6 @@ def _classify_by_message(
should_fallback=True,
)
# Provider policy-block (aggregator-side guardrail) — check before
# model_not_found so we don't mis-label as a missing model.
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
return result_fn(
FailoverReason.provider_policy_blocked,
retryable=False,
should_fallback=False,
)
# Model not found patterns
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
return result_fn(
-111
View File
@@ -1,111 +0,0 @@
"""Shared file safety rules used by both tools and ACP shims."""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional
def _hermes_home_path() -> Path:
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
try:
from hermes_constants import get_hermes_home # local import to avoid cycles
return get_hermes_home()
except Exception:
return Path(os.path.expanduser("~/.hermes"))
def build_write_denied_paths(home: str) -> set[str]:
"""Return exact sensitive paths that must never be written."""
hermes_home = _hermes_home_path()
return {
os.path.realpath(p)
for p in [
os.path.join(home, ".ssh", "authorized_keys"),
os.path.join(home, ".ssh", "id_rsa"),
os.path.join(home, ".ssh", "id_ed25519"),
os.path.join(home, ".ssh", "config"),
str(hermes_home / ".env"),
os.path.join(home, ".bashrc"),
os.path.join(home, ".zshrc"),
os.path.join(home, ".profile"),
os.path.join(home, ".bash_profile"),
os.path.join(home, ".zprofile"),
os.path.join(home, ".netrc"),
os.path.join(home, ".pgpass"),
os.path.join(home, ".npmrc"),
os.path.join(home, ".pypirc"),
"/etc/sudoers",
"/etc/passwd",
"/etc/shadow",
]
}
def build_write_denied_prefixes(home: str) -> list[str]:
"""Return sensitive directory prefixes that must never be written."""
return [
os.path.realpath(p) + os.sep
for p in [
os.path.join(home, ".ssh"),
os.path.join(home, ".aws"),
os.path.join(home, ".gnupg"),
os.path.join(home, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
os.path.join(home, ".docker"),
os.path.join(home, ".azure"),
os.path.join(home, ".config", "gh"),
]
]
def get_safe_write_root() -> Optional[str]:
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not root:
return None
try:
return os.path.realpath(os.path.expanduser(root))
except Exception:
return None
def is_write_denied(path: str) -> bool:
"""Return True if path is blocked by the write denylist or safe root."""
home = os.path.realpath(os.path.expanduser("~"))
resolved = os.path.realpath(os.path.expanduser(str(path)))
if resolved in build_write_denied_paths(home):
return True
for prefix in build_write_denied_prefixes(home):
if resolved.startswith(prefix):
return True
safe_root = get_safe_write_root()
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
return True
return False
def get_read_block_error(path: str) -> Optional[str]:
"""Return an error message when a read targets internal Hermes cache files."""
resolved = Path(path).expanduser().resolve()
hermes_home = _hermes_home_path().resolve()
blocked_dirs = [
hermes_home / "skills" / ".hub" / "index-cache",
hermes_home / "skills" / ".hub",
]
for blocked in blocked_dirs:
try:
resolved.relative_to(blocked)
except ValueError:
continue
return (
f"Access denied: {path} is an internal Hermes cache file "
"and cannot be read directly to prevent prompt injection. "
"Use the skills_list or skill_view tools instead."
)
return None
+8 -18
View File
@@ -39,7 +39,6 @@ from typing import Any, Dict, Iterator, List, Optional
import httpx
from agent import google_oauth
from agent.gemini_schema import sanitize_gemini_tool_parameters
from agent.google_code_assist import (
CODE_ASSIST_ENDPOINT,
FREE_TIER_ID,
@@ -206,7 +205,7 @@ def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
decl["description"] = str(fn["description"])
params = fn.get("parameters")
if isinstance(params, dict):
decl["parameters"] = sanitize_gemini_tool_parameters(params)
decl["parameters"] = params
declarations.append(decl)
if not declarations:
return []
@@ -505,16 +504,9 @@ def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
def _translate_stream_event(
event: Dict[str, Any],
model: str,
tool_call_counter: List[int],
tool_call_indices: Dict[str, int],
) -> List[_GeminiStreamChunk]:
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s).
``tool_call_counter`` is a single-element list used as a mutable counter
across events in the same stream. Each ``functionCall`` part gets a
fresh, unique OpenAI ``index`` keying by function name would collide
whenever the model issues parallel calls to the same tool (e.g. reading
three files in one turn).
"""
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s)."""
inner = event.get("response") if isinstance(event.get("response"), dict) else event
candidates = inner.get("candidates") or []
if not candidates:
@@ -540,8 +532,7 @@ def _translate_stream_event(
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
name = str(fc["name"])
idx = tool_call_counter[0]
tool_call_counter[0] += 1
idx = tool_call_indices.setdefault(name, len(tool_call_indices))
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
@@ -558,7 +549,7 @@ def _translate_stream_event(
finish_reason_raw = str(cand.get("finishReason") or "")
if finish_reason_raw:
mapped = _map_gemini_finish_reason(finish_reason_raw)
if tool_call_counter[0] > 0:
if tool_call_indices:
mapped = "tool_calls"
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
return chunks
@@ -742,9 +733,9 @@ class GeminiCloudCodeClient:
# Materialize error body for better diagnostics
response.read()
raise _gemini_http_error(response)
tool_call_counter: List[int] = [0]
tool_call_indices: Dict[str, int] = {}
for event in _iter_sse_events(response):
for chunk in _translate_stream_event(event, model, tool_call_counter):
for chunk in _translate_stream_event(event, model, tool_call_indices):
yield chunk
except httpx.HTTPError as exc:
raise CodeAssistError(
@@ -799,8 +790,7 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
_raw_details = err_obj.get("details")
err_details_list = _raw_details if isinstance(_raw_details, list) else []
err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
# than one ErrorInfo (rare), so we pick the first one with a reason.
-951
View File
@@ -1,951 +0,0 @@
"""OpenAI-compatible facade over Google AI Studio's native Gemini API.
Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the
main agent loop can keep using its existing OpenAI-shaped message flow.
This adapter is the transport shim that converts those OpenAI-style
``messages[]`` / ``tools[]`` requests into Gemini's native
``models/{model}:generateContent`` schema and converts the responses back.
Why this exists
---------------
Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn
agent/tool loop (auth churn, tool-call replay quirks, thought-signature
requirements). The native Gemini API is the canonical path and avoids the
OpenAI-compat layer entirely.
"""
from __future__ import annotations
import asyncio
import base64
import json
import logging
import time
import uuid
from types import SimpleNamespace
from typing import Any, Dict, Iterator, List, Optional
import httpx
from agent.gemini_schema import sanitize_gemini_tool_parameters
logger = logging.getLogger(__name__)
DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
def is_native_gemini_base_url(base_url: str) -> bool:
"""Return True when the endpoint speaks Gemini's native REST API."""
normalized = str(base_url or "").strip().rstrip("/").lower()
if not normalized:
return False
if "generativelanguage.googleapis.com" not in normalized:
return False
return not normalized.endswith("/openai")
def probe_gemini_tier(
api_key: str,
base_url: str = DEFAULT_GEMINI_BASE_URL,
*,
model: str = "gemini-2.5-flash",
timeout: float = 10.0,
) -> str:
"""Probe a Google AI Studio API key and return its tier.
Returns one of:
- ``"free"`` -- key is on the free tier (unusable with Hermes)
- ``"paid"`` -- key is on a paid tier
- ``"unknown"`` -- probe failed; callers should proceed without blocking.
"""
key = (api_key or "").strip()
if not key:
return "unknown"
normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
if not normalized_base:
normalized_base = DEFAULT_GEMINI_BASE_URL
if normalized_base.lower().endswith("/openai"):
normalized_base = normalized_base[: -len("/openai")]
url = f"{normalized_base}/models/{model}:generateContent"
payload = {
"contents": [{"role": "user", "parts": [{"text": "hi"}]}],
"generationConfig": {"maxOutputTokens": 1},
}
try:
with httpx.Client(timeout=timeout) as client:
resp = client.post(
url,
params={"key": key},
json=payload,
headers={"Content-Type": "application/json"},
)
except Exception as exc:
logger.debug("probe_gemini_tier: network error: %s", exc)
return "unknown"
headers_lower = {k.lower(): v for k, v in resp.headers.items()}
rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
if rpd_header:
try:
rpd_val = int(rpd_header)
except (TypeError, ValueError):
rpd_val = None
# Published free-tier daily caps (Dec 2025):
# gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
# Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
if rpd_val is not None and rpd_val <= 1000:
return "free"
if rpd_val is not None and rpd_val > 1000:
return "paid"
if resp.status_code == 429:
body_text = ""
try:
body_text = resp.text or ""
except Exception:
body_text = ""
if "free_tier" in body_text.lower():
return "free"
return "paid"
if 200 <= resp.status_code < 300:
return "paid"
return "unknown"
def is_free_tier_quota_error(error_message: str) -> bool:
"""Return True when a Gemini 429 message indicates free-tier exhaustion."""
if not error_message:
return False
return "free_tier" in error_message.lower()
_FREE_TIER_GUIDANCE = (
"\n\nYour Google API key is on the free tier (<= 250 requests/day for "
"gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
"so the free tier is exhausted in a handful of messages and cannot sustain "
"an agent session. Enable billing on your Google Cloud project and "
"regenerate the key in a billing-enabled project: "
"https://aistudio.google.com/apikey"
)
class GeminiAPIError(Exception):
"""Error shape compatible with Hermes retry/error classification."""
def __init__(
self,
message: str,
*,
code: str = "gemini_api_error",
status_code: Optional[int] = None,
response: Optional[httpx.Response] = None,
retry_after: Optional[float] = None,
details: Optional[Dict[str, Any]] = None,
) -> None:
super().__init__(message)
self.code = code
self.status_code = status_code
self.response = response
self.retry_after = retry_after
self.details = details or {}
def _coerce_content_to_text(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
pieces: List[str] = []
for part in content:
if isinstance(part, str):
pieces.append(part)
elif isinstance(part, dict) and part.get("type") == "text":
text = part.get("text")
if isinstance(text, str):
pieces.append(text)
return "\n".join(pieces)
return str(content)
def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]:
if not isinstance(content, list):
text = _coerce_content_to_text(content)
return [{"text": text}] if text else []
parts: List[Dict[str, Any]] = []
for item in content:
if isinstance(item, str):
parts.append({"text": item})
continue
if not isinstance(item, dict):
continue
ptype = item.get("type")
if ptype == "text":
text = item.get("text")
if isinstance(text, str) and text:
parts.append({"text": text})
elif ptype == "image_url":
url = ((item.get("image_url") or {}).get("url") or "")
if not isinstance(url, str) or not url.startswith("data:"):
continue
try:
header, encoded = url.split(",", 1)
mime = header.split(":", 1)[1].split(";", 1)[0]
raw = base64.b64decode(encoded)
except Exception:
continue
parts.append(
{
"inlineData": {
"mimeType": mime,
"data": base64.b64encode(raw).decode("ascii"),
}
}
)
return parts
def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]:
extra = tool_call.get("extra_content") or {}
if not isinstance(extra, dict):
return None
google = extra.get("google") or extra.get("thought_signature")
if isinstance(google, dict):
sig = google.get("thought_signature") or google.get("thoughtSignature")
return str(sig) if isinstance(sig, str) and sig else None
if isinstance(google, str) and google:
return google
return None
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
fn = tool_call.get("function") or {}
args_raw = fn.get("arguments", "")
try:
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
except json.JSONDecodeError:
args = {"_raw": args_raw}
if not isinstance(args, dict):
args = {"_value": args}
part: Dict[str, Any] = {
"functionCall": {
"name": str(fn.get("name") or ""),
"args": args,
}
}
thought_signature = _tool_call_extra_signature(tool_call)
if thought_signature:
part["thoughtSignature"] = thought_signature
return part
def _translate_tool_result_to_gemini(
message: Dict[str, Any],
tool_name_by_call_id: Optional[Dict[str, str]] = None,
) -> Dict[str, Any]:
tool_name_by_call_id = tool_name_by_call_id or {}
tool_call_id = str(message.get("tool_call_id") or "")
name = str(
message.get("name")
or tool_name_by_call_id.get(tool_call_id)
or tool_call_id
or "tool"
)
content = _coerce_content_to_text(message.get("content"))
try:
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
except json.JSONDecodeError:
parsed = None
response = parsed if isinstance(parsed, dict) else {"output": content}
return {
"functionResponse": {
"name": name,
"response": response,
}
}
def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
system_text_parts: List[str] = []
contents: List[Dict[str, Any]] = []
tool_name_by_call_id: Dict[str, str] = {}
for msg in messages:
if not isinstance(msg, dict):
continue
role = str(msg.get("role") or "user")
if role == "system":
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
continue
if role in {"tool", "function"}:
contents.append(
{
"role": "user",
"parts": [
_translate_tool_result_to_gemini(
msg,
tool_name_by_call_id=tool_name_by_call_id,
)
],
}
)
continue
gemini_role = "model" if role == "assistant" else "user"
parts: List[Dict[str, Any]] = []
content_parts = _extract_multimodal_parts(msg.get("content"))
parts.extend(content_parts)
tool_calls = msg.get("tool_calls") or []
if isinstance(tool_calls, list):
for tool_call in tool_calls:
if isinstance(tool_call, dict):
tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "")
tool_name = str(((tool_call.get("function") or {}).get("name") or ""))
if tool_call_id and tool_name:
tool_name_by_call_id[tool_call_id] = tool_name
parts.append(_translate_tool_call_to_gemini(tool_call))
if parts:
contents.append({"role": gemini_role, "parts": parts})
system_instruction = None
joined_system = "\n".join(part for part in system_text_parts if part).strip()
if joined_system:
system_instruction = {"parts": [{"text": joined_system}]}
return contents, system_instruction
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
if not isinstance(tools, list):
return []
declarations: List[Dict[str, Any]] = []
for tool in tools:
if not isinstance(tool, dict):
continue
fn = tool.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not isinstance(name, str) or not name:
continue
decl: Dict[str, Any] = {"name": name}
description = fn.get("description")
if isinstance(description, str) and description:
decl["description"] = description
parameters = fn.get("parameters")
if isinstance(parameters, dict):
decl["parameters"] = sanitize_gemini_tool_parameters(parameters)
declarations.append(decl)
return [{"functionDeclarations": declarations}] if declarations else []
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
if tool_choice is None:
return None
if isinstance(tool_choice, str):
if tool_choice == "auto":
return {"functionCallingConfig": {"mode": "AUTO"}}
if tool_choice == "required":
return {"functionCallingConfig": {"mode": "ANY"}}
if tool_choice == "none":
return {"functionCallingConfig": {"mode": "NONE"}}
if isinstance(tool_choice, dict):
fn = tool_choice.get("function") or {}
name = fn.get("name")
if isinstance(name, str) and name:
return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}}
return None
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
if not isinstance(config, dict) or not config:
return None
budget = config.get("thinkingBudget", config.get("thinking_budget"))
include = config.get("includeThoughts", config.get("include_thoughts"))
level = config.get("thinkingLevel", config.get("thinking_level"))
normalized: Dict[str, Any] = {}
if isinstance(budget, (int, float)):
normalized["thinkingBudget"] = int(budget)
if isinstance(include, bool):
normalized["includeThoughts"] = include
if isinstance(level, str) and level.strip():
normalized["thinkingLevel"] = level.strip().lower()
return normalized or None
def build_gemini_request(
*,
messages: List[Dict[str, Any]],
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
thinking_config: Any = None,
) -> Dict[str, Any]:
contents, system_instruction = _build_gemini_contents(messages)
request: Dict[str, Any] = {"contents": contents}
if system_instruction:
request["systemInstruction"] = system_instruction
gemini_tools = _translate_tools_to_gemini(tools)
if gemini_tools:
request["tools"] = gemini_tools
tool_config = _translate_tool_choice_to_gemini(tool_choice)
if tool_config:
request["toolConfig"] = tool_config
generation_config: Dict[str, Any] = {}
if temperature is not None:
generation_config["temperature"] = temperature
if max_tokens is not None:
generation_config["maxOutputTokens"] = max_tokens
if top_p is not None:
generation_config["topP"] = top_p
if stop:
generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)]
normalized_thinking = _normalize_thinking_config(thinking_config)
if normalized_thinking:
generation_config["thinkingConfig"] = normalized_thinking
if generation_config:
request["generationConfig"] = generation_config
return request
def _map_gemini_finish_reason(reason: str) -> str:
mapping = {
"STOP": "stop",
"MAX_TOKENS": "length",
"SAFETY": "content_filter",
"RECITATION": "content_filter",
"OTHER": "stop",
}
return mapping.get(str(reason or "").upper(), "stop")
def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
sig = part.get("thoughtSignature")
if isinstance(sig, str) and sig:
return {"google": {"thought_signature": sig}}
return None
def _empty_response(model: str) -> SimpleNamespace:
message = SimpleNamespace(
role="assistant",
content="",
tool_calls=None,
reasoning=None,
reasoning_content=None,
reasoning_details=None,
)
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
usage = SimpleNamespace(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace:
candidates = resp.get("candidates") or []
if not isinstance(candidates, list) or not candidates:
return _empty_response(model)
cand = candidates[0] if isinstance(candidates[0], dict) else {}
content_obj = cand.get("content") if isinstance(cand, dict) else {}
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
text_pieces: List[str] = []
reasoning_pieces: List[str] = []
tool_calls: List[SimpleNamespace] = []
for index, part in enumerate(parts or []):
if not isinstance(part, dict):
continue
if part.get("thought") is True and isinstance(part.get("text"), str):
reasoning_pieces.append(part["text"])
continue
if isinstance(part.get("text"), str):
text_pieces.append(part["text"])
continue
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
args_str = "{}"
tool_call = SimpleNamespace(
id=f"call_{uuid.uuid4().hex[:12]}",
type="function",
index=index,
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
)
extra_content = _tool_call_extra_from_part(part)
if extra_content:
tool_call.extra_content = extra_content
tool_calls.append(tool_call)
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or ""))
usage_meta = resp.get("usageMetadata") or {}
usage = SimpleNamespace(
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
prompt_tokens_details=SimpleNamespace(
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
),
)
reasoning = "".join(reasoning_pieces) or None
message = SimpleNamespace(
role="assistant",
content="".join(text_pieces) if text_pieces else None,
tool_calls=tool_calls or None,
reasoning=reasoning,
reasoning_content=reasoning,
reasoning_details=None,
)
choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
class _GeminiStreamChunk(SimpleNamespace):
pass
def _make_stream_chunk(
*,
model: str,
content: str = "",
tool_call_delta: Optional[Dict[str, Any]] = None,
finish_reason: Optional[str] = None,
reasoning: str = "",
) -> _GeminiStreamChunk:
delta_kwargs: Dict[str, Any] = {
"role": "assistant",
"content": None,
"tool_calls": None,
"reasoning": None,
"reasoning_content": None,
}
if content:
delta_kwargs["content"] = content
if tool_call_delta is not None:
tool_delta = SimpleNamespace(
index=tool_call_delta.get("index", 0),
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
type="function",
function=SimpleNamespace(
name=tool_call_delta.get("name") or "",
arguments=tool_call_delta.get("arguments") or "",
),
)
extra_content = tool_call_delta.get("extra_content")
if isinstance(extra_content, dict):
tool_delta.extra_content = extra_content
delta_kwargs["tool_calls"] = [tool_delta]
if reasoning:
delta_kwargs["reasoning"] = reasoning
delta_kwargs["reasoning_content"] = reasoning
delta = SimpleNamespace(**delta_kwargs)
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
return _GeminiStreamChunk(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion.chunk",
created=int(time.time()),
model=model,
choices=[choice],
usage=None,
)
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
buffer = ""
for chunk in response.iter_text():
if not chunk:
continue
buffer += chunk
while "\n" in buffer:
line, buffer = buffer.split("\n", 1)
line = line.rstrip("\r")
if not line:
continue
if not line.startswith("data: "):
continue
data = line[6:]
if data == "[DONE]":
return
try:
payload = json.loads(data)
except json.JSONDecodeError:
logger.debug("Non-JSON Gemini SSE line: %s", data[:200])
continue
if isinstance(payload, dict):
yield payload
def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]:
candidates = event.get("candidates") or []
if not candidates:
return []
cand = candidates[0] if isinstance(candidates[0], dict) else {}
parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else []
chunks: List[_GeminiStreamChunk] = []
for part_index, part in enumerate(parts):
if not isinstance(part, dict):
continue
if part.get("thought") is True and isinstance(part.get("text"), str):
chunks.append(_make_stream_chunk(model=model, reasoning=part["text"]))
continue
if isinstance(part.get("text"), str) and part["text"]:
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
name = str(fc["name"])
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True)
except (TypeError, ValueError):
args_str = "{}"
thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else ""
call_key = json.dumps(
{
"part_index": part_index,
"name": name,
"thought_signature": thought_signature,
},
sort_keys=True,
)
slot = tool_call_indices.get(call_key)
if slot is None:
slot = {
"index": len(tool_call_indices),
"id": f"call_{uuid.uuid4().hex[:12]}",
"last_arguments": "",
}
tool_call_indices[call_key] = slot
emitted_arguments = args_str
last_arguments = str(slot.get("last_arguments") or "")
if last_arguments:
if args_str == last_arguments:
emitted_arguments = ""
elif args_str.startswith(last_arguments):
emitted_arguments = args_str[len(last_arguments):]
slot["last_arguments"] = args_str
chunks.append(
_make_stream_chunk(
model=model,
tool_call_delta={
"index": slot["index"],
"id": slot["id"],
"name": name,
"arguments": emitted_arguments,
"extra_content": _tool_call_extra_from_part(part),
},
)
)
finish_reason_raw = str(cand.get("finishReason") or "")
if finish_reason_raw:
mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
return chunks
def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
status = response.status_code
body_text = ""
body_json: Dict[str, Any] = {}
try:
body_text = response.text
except Exception:
body_text = ""
if body_text:
try:
parsed = json.loads(body_text)
if isinstance(parsed, dict):
body_json = parsed
except (ValueError, TypeError):
body_json = {}
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
if not isinstance(err_obj, dict):
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
_raw_details = err_obj.get("details")
details_list = _raw_details if isinstance(_raw_details, list) else []
reason = ""
retry_after: Optional[float] = None
metadata: Dict[str, Any] = {}
for detail in details_list:
if not isinstance(detail, dict):
continue
type_url = str(detail.get("@type") or "")
if not reason and type_url.endswith("/google.rpc.ErrorInfo"):
reason_value = detail.get("reason")
if isinstance(reason_value, str):
reason = reason_value
md = detail.get("metadata")
if isinstance(md, dict):
metadata = md
header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after")
if header_retry:
try:
retry_after = float(header_retry)
except (TypeError, ValueError):
retry_after = None
code = f"gemini_http_{status}"
if status == 401:
code = "gemini_unauthorized"
elif status == 429:
code = "gemini_rate_limited"
elif status == 404:
code = "gemini_model_not_found"
if err_message:
message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}"
else:
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
# Free-tier quota exhaustion -> append actionable guidance so users who
# bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
# that the free tier cannot sustain an agent session.
if status == 429 and is_free_tier_quota_error(err_message or body_text):
message = message + _FREE_TIER_GUIDANCE
return GeminiAPIError(
message,
code=code,
status_code=status,
response=response,
retry_after=retry_after,
details={
"status": err_status,
"reason": reason,
"metadata": metadata,
"message": err_message,
},
)
class _GeminiChatCompletions:
def __init__(self, client: "GeminiNativeClient"):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _AsyncGeminiChatCompletions:
def __init__(self, client: "AsyncGeminiNativeClient"):
self._client = client
async def create(self, **kwargs: Any) -> Any:
return await self._client._create_chat_completion(**kwargs)
class _GeminiChatNamespace:
def __init__(self, client: "GeminiNativeClient"):
self.completions = _GeminiChatCompletions(client)
class _AsyncGeminiChatNamespace:
def __init__(self, client: "AsyncGeminiNativeClient"):
self.completions = _AsyncGeminiChatCompletions(client)
class GeminiNativeClient:
"""Minimal OpenAI-SDK-compatible facade over Gemini's native REST API."""
def __init__(
self,
*,
api_key: str,
base_url: Optional[str] = None,
default_headers: Optional[Dict[str, str]] = None,
timeout: Any = None,
http_client: Optional[httpx.Client] = None,
**_: Any,
) -> None:
if not (api_key or "").strip():
raise RuntimeError(
"Gemini native client requires an API key, but none was provided. "
"Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
"(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
"to configure the Google provider."
)
self.api_key = api_key
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
if normalized_base.endswith("/openai"):
normalized_base = normalized_base[: -len("/openai")]
self.base_url = normalized_base
self._default_headers = dict(default_headers or {})
self.chat = _GeminiChatNamespace(self)
self.is_closed = False
self._http = http_client or httpx.Client(
timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0)
)
def close(self) -> None:
self.is_closed = True
try:
self._http.close()
except Exception:
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def _headers(self) -> Dict[str, str]:
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"x-goog-api-key": self.api_key,
"User-Agent": "hermes-agent (gemini-native)",
}
headers.update(self._default_headers)
return headers
@staticmethod
def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]:
try:
return False, next(iterator)
except StopIteration:
return True, None
def _create_chat_completion(
self,
*,
model: str = "gemini-2.5-flash",
messages: Optional[List[Dict[str, Any]]] = None,
stream: bool = False,
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Any = None,
**_: Any,
) -> Any:
thinking_config = None
if isinstance(extra_body, dict):
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
request = build_gemini_request(
messages=messages or [],
tools=tools,
tool_choice=tool_choice,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stop=stop,
thinking_config=thinking_config,
)
if stream:
return self._stream_completion(model=model, request=request, timeout=timeout)
url = f"{self.base_url}/models/{model}:generateContent"
response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout)
if response.status_code != 200:
raise gemini_http_error(response)
try:
payload = response.json()
except ValueError as exc:
raise GeminiAPIError(
f"Invalid JSON from Gemini native API: {exc}",
code="gemini_invalid_json",
status_code=response.status_code,
response=response,
) from exc
return translate_gemini_response(payload, model=model)
def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]:
url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse"
stream_headers = dict(self._headers())
stream_headers["Accept"] = "text/event-stream"
def _generator() -> Iterator[_GeminiStreamChunk]:
try:
with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response:
if response.status_code != 200:
response.read()
raise gemini_http_error(response)
tool_call_indices: Dict[str, Dict[str, Any]] = {}
for event in _iter_sse_events(response):
for chunk in translate_stream_event(event, model, tool_call_indices):
yield chunk
except httpx.HTTPError as exc:
raise GeminiAPIError(
f"Gemini streaming request failed: {exc}",
code="gemini_stream_error",
) from exc
return _generator()
class AsyncGeminiNativeClient:
"""Async wrapper used by auxiliary_client for native Gemini calls."""
def __init__(self, sync_client: GeminiNativeClient):
self._sync = sync_client
self.api_key = sync_client.api_key
self.base_url = sync_client.base_url
self.chat = _AsyncGeminiChatNamespace(self)
async def _create_chat_completion(self, **kwargs: Any) -> Any:
stream = bool(kwargs.get("stream"))
result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs)
if not stream:
return result
async def _async_stream() -> Any:
while True:
done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result)
if done:
break
yield chunk
return _async_stream()
async def close(self) -> None:
await asyncio.to_thread(self._sync.close)
-99
View File
@@ -1,99 +0,0 @@
"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset."""
from __future__ import annotations
from typing import Any, Dict, List
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
# outside that subset before sending Hermes tool schemas to Google.
_GEMINI_SCHEMA_ALLOWED_KEYS = {
"type",
"format",
"title",
"description",
"nullable",
"enum",
"maxItems",
"minItems",
"properties",
"required",
"minProperties",
"maxProperties",
"minLength",
"maxLength",
"pattern",
"example",
"anyOf",
"propertyOrdering",
"default",
"items",
"minimum",
"maximum",
}
def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
"""Return a Gemini-compatible copy of a tool parameter schema.
Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys
such as ``$schema`` or ``additionalProperties`` that Google's Gemini
``Schema`` object rejects. This helper preserves the documented Gemini
subset and recursively sanitizes nested ``properties`` / ``items`` /
``anyOf`` definitions.
"""
if not isinstance(schema, dict):
return {}
cleaned: Dict[str, Any] = {}
for key, value in schema.items():
if key not in _GEMINI_SCHEMA_ALLOWED_KEYS:
continue
if key == "properties":
if not isinstance(value, dict):
continue
props: Dict[str, Any] = {}
for prop_name, prop_schema in value.items():
if not isinstance(prop_name, str):
continue
props[prop_name] = sanitize_gemini_schema(prop_schema)
cleaned[key] = props
continue
if key == "items":
cleaned[key] = sanitize_gemini_schema(value)
continue
if key == "anyOf":
if not isinstance(value, list):
continue
cleaned[key] = [
sanitize_gemini_schema(item)
for item in value
if isinstance(item, dict)
]
continue
cleaned[key] = value
# Gemini's Schema validator requires every ``enum`` entry to be a string,
# even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
# OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
# ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
# so we only drop the ``enum`` when it would collide with Gemini's rule.
# Keeping ``type: integer`` plus the human-readable description gives the
# model enough guidance; the tool handler still validates the value.
enum_val = cleaned.get("enum")
type_val = cleaned.get("type")
if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
if any(not isinstance(item, str) for item in enum_val):
cleaned.pop("enum", None)
return cleaned
def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]:
"""Normalize tool parameters to a valid Gemini object schema."""
cleaned = sanitize_gemini_schema(parameters)
if not cleaned:
return {"type": "object", "properties": {}}
return cleaned
-242
View File
@@ -1,242 +0,0 @@
"""
Image Generation Provider ABC
=============================
Defines the pluggable-backend interface for image generation. Providers register
instances via ``PluginContext.register_image_gen_provider()``; the active one
(selected via ``image_gen.provider`` in ``config.yaml``) services every
``image_generate`` tool call.
Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
via ``plugins.enabled``).
Response shape
--------------
All providers return a dict that :func:`success_response` / :func:`error_response`
produce. The tool wrapper JSON-serializes it. Keys:
success bool
image str | None URL or absolute file path
model str provider-specific model identifier
prompt str echoed prompt
aspect_ratio str "landscape" | "square" | "portrait"
provider str provider name (for diagnostics)
error str only when success=False
error_type str only when success=False
"""
from __future__ import annotations
import abc
import base64
import datetime
import logging
import uuid
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
DEFAULT_ASPECT_RATIO = "landscape"
# ---------------------------------------------------------------------------
# ABC
# ---------------------------------------------------------------------------
class ImageGenProvider(abc.ABC):
"""Abstract base class for an image generation backend.
Subclasses must implement :meth:`generate`. Everything else has sane
defaults override only what your provider needs.
"""
@property
@abc.abstractmethod
def name(self) -> str:
"""Stable short identifier used in ``image_gen.provider`` config.
Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
"""
@property
def display_name(self) -> str:
"""Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
return self.name.title()
def is_available(self) -> bool:
"""Return True when this provider can service calls.
Typically checks for a required API key. Default: True
(providers with no external dependencies are always available).
"""
return True
def list_models(self) -> List[Dict[str, Any]]:
"""Return catalog entries for ``hermes tools`` model picker.
Each entry::
{
"id": "gpt-image-1.5", # required
"display": "GPT Image 1.5", # optional; defaults to id
"speed": "~10s", # optional
"strengths": "...", # optional
"price": "$...", # optional
}
Default: empty list (provider has no user-selectable models).
"""
return []
def get_setup_schema(self) -> Dict[str, Any]:
"""Return provider metadata for the ``hermes tools`` picker.
Used by ``tools_config.py`` to inject this provider as a row in
the Image Generation provider list. Shape::
{
"name": "OpenAI", # picker label
"badge": "paid", # optional short tag
"tag": "One-line description...", # optional subtitle
"env_vars": [ # keys to prompt for
{"key": "OPENAI_API_KEY",
"prompt": "OpenAI API key",
"url": "https://platform.openai.com/api-keys"},
],
}
Default: minimal entry derived from ``display_name``. Override to
expose API key prompts and custom badges.
"""
return {
"name": self.display_name,
"badge": "",
"tag": "",
"env_vars": [],
}
def default_model(self) -> Optional[str]:
"""Return the default model id, or None if not applicable."""
models = self.list_models()
if models:
return models[0].get("id")
return None
@abc.abstractmethod
def generate(
self,
prompt: str,
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
**kwargs: Any,
) -> Dict[str, Any]:
"""Generate an image.
Implementations should return the dict from :func:`success_response`
or :func:`error_response`. ``kwargs`` may contain forward-compat
parameters future versions of the schema will expose implementations
should ignore unknown keys.
"""
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def resolve_aspect_ratio(value: Optional[str]) -> str:
"""Clamp an aspect_ratio value to the valid set, defaulting to landscape.
Invalid values are coerced rather than rejected so the tool surface is
forgiving of agent mistakes.
"""
if not isinstance(value, str):
return DEFAULT_ASPECT_RATIO
v = value.strip().lower()
if v in VALID_ASPECT_RATIOS:
return v
return DEFAULT_ASPECT_RATIO
def _images_cache_dir() -> Path:
"""Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
from hermes_constants import get_hermes_home
path = get_hermes_home() / "cache" / "images"
path.mkdir(parents=True, exist_ok=True)
return path
def save_b64_image(
b64_data: str,
*,
prefix: str = "image",
extension: str = "png",
) -> Path:
"""Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
Returns the absolute :class:`Path` to the saved file.
Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
"""
raw = base64.b64decode(b64_data)
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
short = uuid.uuid4().hex[:8]
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
path.write_bytes(raw)
return path
def success_response(
*,
image: str,
model: str,
prompt: str,
aspect_ratio: str,
provider: str,
extra: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
"""Build a uniform success response dict.
``image`` may be an HTTP URL or an absolute filesystem path (for b64
providers like OpenAI). Callers that need to pass through additional
backend-specific fields can supply ``extra``.
"""
payload: Dict[str, Any] = {
"success": True,
"image": image,
"model": model,
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"provider": provider,
}
if extra:
for k, v in extra.items():
payload.setdefault(k, v)
return payload
def error_response(
*,
error: str,
error_type: str = "provider_error",
provider: str = "",
model: str = "",
prompt: str = "",
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
) -> Dict[str, Any]:
"""Build a uniform error response dict."""
return {
"success": False,
"image": None,
"error": error,
"error_type": error_type,
"model": model,
"prompt": prompt,
"aspect_ratio": aspect_ratio,
"provider": provider,
}
-120
View File
@@ -1,120 +0,0 @@
"""
Image Generation Provider Registry
==================================
Central map of registered providers. Populated by plugins at import-time via
``PluginContext.register_image_gen_provider()``; consumed by the
``image_generate`` tool to dispatch each call to the active backend.
Active selection
----------------
The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
If unset, :func:`get_active_provider` applies fallback logic:
1. If exactly one provider is registered, use it.
2. Otherwise if a provider named ``fal`` is registered, use it (legacy
default matches pre-plugin behavior).
3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
the user at ``hermes tools``).
"""
from __future__ import annotations
import logging
import threading
from typing import Dict, List, Optional
from agent.image_gen_provider import ImageGenProvider
logger = logging.getLogger(__name__)
_providers: Dict[str, ImageGenProvider] = {}
_lock = threading.Lock()
def register_provider(provider: ImageGenProvider) -> None:
"""Register an image generation provider.
Re-registration (same ``name``) overwrites the previous entry and logs
a debug message this makes hot-reload scenarios (tests, dev loops)
behave predictably.
"""
if not isinstance(provider, ImageGenProvider):
raise TypeError(
f"register_provider() expects an ImageGenProvider instance, "
f"got {type(provider).__name__}"
)
name = provider.name
if not isinstance(name, str) or not name.strip():
raise ValueError("Image gen provider .name must be a non-empty string")
with _lock:
existing = _providers.get(name)
_providers[name] = provider
if existing is not None:
logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
else:
logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
def list_providers() -> List[ImageGenProvider]:
"""Return all registered providers, sorted by name."""
with _lock:
items = list(_providers.values())
return sorted(items, key=lambda p: p.name)
def get_provider(name: str) -> Optional[ImageGenProvider]:
"""Return the provider registered under *name*, or None."""
if not isinstance(name, str):
return None
with _lock:
return _providers.get(name.strip())
def get_active_provider() -> Optional[ImageGenProvider]:
"""Resolve the currently-active provider.
Reads ``image_gen.provider`` from config.yaml; falls back per the
module docstring.
"""
configured: Optional[str] = None
try:
from hermes_cli.config import load_config
cfg = load_config()
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
if isinstance(section, dict):
raw = section.get("provider")
if isinstance(raw, str) and raw.strip():
configured = raw.strip()
except Exception as exc:
logger.debug("Could not read image_gen.provider from config: %s", exc)
with _lock:
snapshot = dict(_providers)
if configured:
provider = snapshot.get(configured)
if provider is not None:
return provider
logger.debug(
"image_gen.provider='%s' configured but not registered; falling back",
configured,
)
# Fallback: single-provider case
if len(snapshot) == 1:
return next(iter(snapshot.values()))
# Fallback: prefer legacy FAL for backward compat
if "fal" in snapshot:
return snapshot["fal"]
return None
def _reset_for_tests() -> None:
"""Clear the registry. **Test-only.**"""
with _lock:
_providers.clear()
-236
View File
@@ -1,236 +0,0 @@
"""Routing helpers for inbound user-attached images.
Two modes:
native attach images as OpenAI-style ``image_url`` content parts on the
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
OpenAI chat.completions) already translate these into their
vendor-specific multimodal formats.
text run ``vision_analyze`` on each image up-front and prepend the
description to the user's text. The model never sees the pixels;
it only sees a lossy text summary. This is the pre-existing
behaviour and still the right choice for non-vision models.
The decision is made once per message turn by :func:`decide_image_input_mode`.
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
| ``text``, default ``auto``) and the active model's capability metadata.
In ``auto`` mode:
- If the user has explicitly configured ``auxiliary.vision.provider``
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
regardless of the main model they've opted in to a specific vision
backend for a reason (cost, quality, local-only, etc.).
- Otherwise, if the active model reports ``supports_vision=True`` in its
models.dev metadata, we attach natively.
- Otherwise (non-vision model, no explicit override), we fall back to text.
This keeps ``vision_analyze`` surfaced as a tool in every session skills
and agent flows that chain it (browser screenshots, deeper inspection of
URL-referenced images, style-gating loops) keep working. The routing only
affects *how user-attached images on the current turn* are presented to the
main model.
"""
from __future__ import annotations
import base64
import logging
import mimetypes
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
_VALID_MODES = frozenset({"auto", "native", "text"})
def _coerce_mode(raw: Any) -> str:
"""Normalize a config value into one of the valid modes."""
if not isinstance(raw, str):
return "auto"
val = raw.strip().lower()
if val in _VALID_MODES:
return val
return "auto"
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
"""True when the user configured a specific auxiliary vision backend.
An explicit override means the user *wants* the text pipeline (they're
paying for a dedicated vision model), so we don't silently bypass it.
"""
if not isinstance(cfg, dict):
return False
aux = cfg.get("auxiliary") or {}
if not isinstance(aux, dict):
return False
vision = aux.get("vision") or {}
if not isinstance(vision, dict):
return False
provider = str(vision.get("provider") or "").strip().lower()
model = str(vision.get("model") or "").strip()
base_url = str(vision.get("base_url") or "").strip()
# "auto" / "" / blank = not explicit
if provider in ("", "auto") and not model and not base_url:
return False
return True
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
"""Return True/False if we can resolve caps, None if unknown."""
if not provider or not model:
return None
try:
from agent.models_dev import get_model_capabilities
caps = get_model_capabilities(provider, model)
except Exception as exc: # pragma: no cover - defensive
logger.debug("image_routing: caps lookup failed for %s:%s%s", provider, model, exc)
return None
if caps is None:
return None
return bool(caps.supports_vision)
def decide_image_input_mode(
provider: str,
model: str,
cfg: Optional[Dict[str, Any]],
) -> str:
"""Return ``"native"`` or ``"text"`` for the given turn.
Args:
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
model: active model slug as it would be sent to the provider.
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
"""
mode_cfg = "auto"
if isinstance(cfg, dict):
agent_cfg = cfg.get("agent") or {}
if isinstance(agent_cfg, dict):
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
if mode_cfg == "native":
return "native"
if mode_cfg == "text":
return "text"
# auto
if _explicit_aux_vision_override(cfg):
return "text"
supports = _lookup_supports_vision(provider, model)
if supports is True:
return "native"
return "text"
# Image size handling is REACTIVE rather than proactive: we attempt native
# attachment at full size regardless of provider, and rely on
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
#
# Why reactive: our knowledge of provider ceilings is partial and evolving
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
# A proactive per-provider table would be stale the moment a provider raises
# or lowers its limit, and silently degrading quality for users on providers
# that would have accepted the full image is the worse failure mode.
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
# it fires, which is cheaper than permanent quality loss.
def _guess_mime(path: Path) -> str:
mime, _ = mimetypes.guess_type(str(path))
if mime and mime.startswith("image/"):
return mime
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
# the suffix looks imagey.
suffix = path.suffix.lower()
return {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
}.get(suffix, "image/jpeg")
def _file_to_data_url(path: Path) -> Optional[str]:
"""Encode a local image as a base64 data URL at its native size.
Size limits are NOT enforced here the agent retry loop
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
provider's first rejection. Keeping this simple means providers that
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
quality tax just because one other provider is stricter.
Returns None only if the file can't be read (missing, permission
denied, etc.); the caller reports those paths in ``skipped``.
"""
try:
raw = path.read_bytes()
except Exception as exc:
logger.warning("image_routing: failed to read %s%s", path, exc)
return None
mime = _guess_mime(path)
b64 = base64.b64encode(raw).decode("ascii")
return f"data:{mime};base64,{b64}"
def build_native_content_parts(
user_text: str,
image_paths: List[str],
) -> Tuple[List[Dict[str, Any]], List[str]]:
"""Build an OpenAI-style ``content`` list for a user turn.
Shape:
[{"type": "text", "text": "..."},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
...]
Images are attached at their native size. If a provider rejects the
request because an image is too large (e.g. Anthropic's 5 MB per-image
ceiling), the agent's retry loop transparently shrinks and retries
once see ``run_agent._try_shrink_image_parts_in_messages``.
Returns (content_parts, skipped_paths). Skipped paths are files that
couldn't be read from disk.
"""
parts: List[Dict[str, Any]] = []
skipped: List[str] = []
text = (user_text or "").strip()
if text:
parts.append({"type": "text", "text": text})
for raw_path in image_paths:
p = Path(raw_path)
if not p.exists() or not p.is_file():
skipped.append(str(raw_path))
continue
data_url = _file_to_data_url(p)
if not data_url:
skipped.append(str(raw_path))
continue
parts.append({
"type": "image_url",
"image_url": {"url": data_url},
})
# If the text was empty, add a neutral prompt so the turn isn't just images.
if not text and any(p.get("type") == "image_url" for p in parts):
parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
return parts, skipped
__all__ = [
"decide_image_input_mode",
"build_native_content_parts",
]
-162
View File
@@ -124,7 +124,6 @@ class InsightsEngine:
# Gather raw data
sessions = self._get_sessions(cutoff, source)
tool_usage = self._get_tool_usage(cutoff, source)
skill_usage = self._get_skill_usage(cutoff, source)
message_stats = self._get_message_stats(cutoff, source)
if not sessions:
@@ -136,15 +135,6 @@ class InsightsEngine:
"models": [],
"platforms": [],
"tools": [],
"skills": {
"summary": {
"total_skill_loads": 0,
"total_skill_edits": 0,
"total_skill_actions": 0,
"distinct_skills_used": 0,
},
"top_skills": [],
},
"activity": {},
"top_sessions": [],
}
@@ -154,7 +144,6 @@ class InsightsEngine:
models = self._compute_model_breakdown(sessions)
platforms = self._compute_platform_breakdown(sessions)
tools = self._compute_tool_breakdown(tool_usage)
skills = self._compute_skill_breakdown(skill_usage)
activity = self._compute_activity_patterns(sessions)
top_sessions = self._compute_top_sessions(sessions)
@@ -167,7 +156,6 @@ class InsightsEngine:
"models": models,
"platforms": platforms,
"tools": tools,
"skills": skills,
"activity": activity,
"top_sessions": top_sessions,
}
@@ -296,82 +284,6 @@ class InsightsEngine:
for name, count in tool_counts.most_common()
]
def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
"""Extract per-skill usage from assistant tool calls."""
skill_counts: Dict[str, Dict[str, Any]] = {}
if source:
cursor = self._conn.execute(
"""SELECT m.tool_calls, m.timestamp
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ? AND s.source = ?
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
(cutoff, source),
)
else:
cursor = self._conn.execute(
"""SELECT m.tool_calls, m.timestamp
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ?
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
(cutoff,),
)
for row in cursor.fetchall():
try:
calls = row["tool_calls"]
if isinstance(calls, str):
calls = json.loads(calls)
if not isinstance(calls, list):
continue
except (json.JSONDecodeError, TypeError):
continue
timestamp = row["timestamp"]
for call in calls:
if not isinstance(call, dict):
continue
func = call.get("function", {})
tool_name = func.get("name")
if tool_name not in {"skill_view", "skill_manage"}:
continue
args = func.get("arguments")
if isinstance(args, str):
try:
args = json.loads(args)
except (json.JSONDecodeError, TypeError):
continue
if not isinstance(args, dict):
continue
skill_name = args.get("name")
if not isinstance(skill_name, str) or not skill_name.strip():
continue
entry = skill_counts.setdefault(
skill_name,
{
"skill": skill_name,
"view_count": 0,
"manage_count": 0,
"last_used_at": None,
},
)
if tool_name == "skill_view":
entry["view_count"] += 1
else:
entry["manage_count"] += 1
if timestamp is not None and (
entry["last_used_at"] is None or timestamp > entry["last_used_at"]
):
entry["last_used_at"] = timestamp
return list(skill_counts.values())
def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
"""Get aggregate message statistics."""
if source:
@@ -563,46 +475,6 @@ class InsightsEngine:
})
return result
def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
"""Process per-skill usage into summary + ranked list."""
total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
total_skill_actions = total_skill_loads + total_skill_edits
top_skills = []
for skill in skill_usage:
total_count = skill["view_count"] + skill["manage_count"]
percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
top_skills.append({
"skill": skill["skill"],
"view_count": skill["view_count"],
"manage_count": skill["manage_count"],
"total_count": total_count,
"percentage": percentage,
"last_used_at": skill.get("last_used_at"),
})
top_skills.sort(
key=lambda s: (
s["total_count"],
s["view_count"],
s["manage_count"],
s["last_used_at"] or 0,
s["skill"],
),
reverse=True,
)
return {
"summary": {
"total_skill_loads": total_skill_loads,
"total_skill_edits": total_skill_edits,
"total_skill_actions": total_skill_actions,
"distinct_skills_used": len(skill_usage),
},
"top_skills": top_skills,
}
def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
"""Analyze activity patterns by day of week and hour."""
day_counts = Counter() # 0=Monday ... 6=Sunday
@@ -798,28 +670,6 @@ class InsightsEngine:
lines.append(f" ... and {len(report['tools']) - 15} more tools")
lines.append("")
# Skill usage
skills = report.get("skills", {})
top_skills = skills.get("top_skills", [])
if top_skills:
lines.append(" 🧠 Top Skills")
lines.append(" " + "" * 56)
lines.append(f" {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
for skill in top_skills[:10]:
last_used = ""
if skill.get("last_used_at"):
last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
lines.append(
f" {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
)
summary = skills.get("summary", {})
lines.append(
f" Distinct skills: {summary.get('distinct_skills_used', 0)} "
f"Loads: {summary.get('total_skill_loads', 0):,} "
f"Edits: {summary.get('total_skill_edits', 0):,}"
)
lines.append("")
# Activity patterns
act = report.get("activity", {})
if act.get("by_day"):
@@ -903,18 +753,6 @@ class InsightsEngine:
lines.append(f" {t['tool']}{t['count']:,} calls ({t['percentage']:.1f}%)")
lines.append("")
skills = report.get("skills", {})
if skills.get("top_skills"):
lines.append("**🧠 Top Skills:**")
for skill in skills["top_skills"][:5]:
suffix = ""
if skill.get("last_used_at"):
suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
lines.append(
f" {skill['skill']}{skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
)
lines.append("")
# Activity summary
act = report.get("activity", {})
if act.get("busiest_day") and act.get("busiest_hour"):
+7 -157
View File
@@ -31,7 +31,6 @@ from __future__ import annotations
import json
import logging
import re
import inspect
from typing import Any, Dict, List, Optional
from agent.memory_provider import MemoryProvider
@@ -63,124 +62,15 @@ def sanitize_context(text: str) -> str:
return text
class StreamingContextScrubber:
"""Stateful scrubber for streaming text that may contain split memory-context spans.
The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
a ``<memory-context>`` opened in one delta and closed in a later delta
leaks its payload to the UI because the non-greedy block regex needs
both tags in one string. This scrubber runs a small state machine
across deltas, holding back partial-tag tails and discarding
everything inside a span (including the system-note line).
Usage::
scrubber = StreamingContextScrubber()
for delta in stream:
visible = scrubber.feed(delta)
if visible:
emit(visible)
trailing = scrubber.flush() # at end of stream
if trailing:
emit(trailing)
The scrubber is re-entrant per agent instance. Callers building new
top-level responses (new turn) should create a fresh scrubber or call
``reset()``.
"""
_OPEN_TAG = "<memory-context>"
_CLOSE_TAG = "</memory-context>"
def __init__(self) -> None:
self._in_span: bool = False
self._buf: str = ""
def reset(self) -> None:
self._in_span = False
self._buf = ""
def feed(self, text: str) -> str:
"""Return the visible portion of ``text`` after scrubbing.
Any trailing fragment that could be the start of an open/close tag
is held back in the internal buffer and surfaced on the next
``feed()`` call or discarded/emitted by ``flush()``.
"""
if not text:
return ""
buf = self._buf + text
self._buf = ""
out: list[str] = []
while buf:
if self._in_span:
idx = buf.lower().find(self._CLOSE_TAG)
if idx == -1:
# Hold back a potential partial close tag; drop the rest
held = self._max_partial_suffix(buf, self._CLOSE_TAG)
self._buf = buf[-held:] if held else ""
return "".join(out)
# Found close — skip span content + tag, continue
buf = buf[idx + len(self._CLOSE_TAG):]
self._in_span = False
else:
idx = buf.lower().find(self._OPEN_TAG)
if idx == -1:
# No open tag — hold back a potential partial open tag
held = self._max_partial_suffix(buf, self._OPEN_TAG)
if held:
out.append(buf[:-held])
self._buf = buf[-held:]
else:
out.append(buf)
return "".join(out)
# Emit text before the tag, enter span
if idx > 0:
out.append(buf[:idx])
buf = buf[idx + len(self._OPEN_TAG):]
self._in_span = True
return "".join(out)
def flush(self) -> str:
"""Emit any held-back buffer at end-of-stream.
If we're still inside an unterminated span the remaining content is
discarded (safer: leaking partial memory context is worse than a
truncated answer). Otherwise the held-back partial-tag tail is
emitted verbatim (it turned out not to be a real tag).
"""
if self._in_span:
self._buf = ""
self._in_span = False
return ""
tail = self._buf
self._buf = ""
return tail
@staticmethod
def _max_partial_suffix(buf: str, tag: str) -> int:
"""Return the length of the longest buf-suffix that is a tag-prefix.
Case-insensitive. Returns 0 if no suffix could start the tag.
"""
tag_lower = tag.lower()
buf_lower = buf.lower()
max_check = min(len(buf_lower), len(tag_lower) - 1)
for i in range(max_check, 0, -1):
if tag_lower.startswith(buf_lower[-i:]):
return i
return 0
def build_memory_context_block(raw_context: str) -> str:
"""Wrap prefetched memory in a fenced block with system note."""
"""Wrap prefetched memory in a fenced block with system note.
The fence prevents the model from treating recalled context as user
discourse. Injected at API-call time only never persisted.
"""
if not raw_context or not raw_context.strip():
return ""
clean = sanitize_context(raw_context)
if clean != raw_context:
logger.warning("memory provider returned pre-wrapped context; stripped")
return (
"<memory-context>\n"
"[System note: The following is recalled memory context, "
@@ -422,39 +312,7 @@ class MemoryManager:
)
return "\n\n".join(parts)
@staticmethod
def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
"""Return how to pass metadata to a provider's memory-write hook."""
try:
signature = inspect.signature(provider.on_memory_write)
except (TypeError, ValueError):
return "keyword"
params = list(signature.parameters.values())
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
return "keyword"
if "metadata" in signature.parameters:
return "keyword"
accepted = [
p for p in params
if p.kind in (
inspect.Parameter.POSITIONAL_ONLY,
inspect.Parameter.POSITIONAL_OR_KEYWORD,
inspect.Parameter.KEYWORD_ONLY,
)
]
if len(accepted) >= 4:
return "positional"
return "legacy"
def on_memory_write(
self,
action: str,
target: str,
content: str,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
def on_memory_write(self, action: str, target: str, content: str) -> None:
"""Notify external providers when the built-in memory tool writes.
Skips the builtin provider itself (it's the source of the write).
@@ -463,15 +321,7 @@ class MemoryManager:
if provider.name == "builtin":
continue
try:
metadata_mode = self._provider_memory_write_metadata_mode(provider)
if metadata_mode == "keyword":
provider.on_memory_write(
action, target, content, metadata=dict(metadata or {})
)
elif metadata_mode == "positional":
provider.on_memory_write(action, target, content, dict(metadata or {}))
else:
provider.on_memory_write(action, target, content)
provider.on_memory_write(action, target, content)
except Exception as e:
logger.debug(
"Memory provider '%s' on_memory_write failed: %s",
+3 -12
View File
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
on_turn_start(turn, message, **kwargs) per-turn tick with runtime context
on_session_end(messages) end-of-session extraction
on_pre_compress(messages) -> str extract before context compression
on_memory_write(action, target, content, metadata=None) mirror built-in memory writes
on_memory_write(action, target, content) mirror built-in memory writes
on_delegation(task, result, **kwargs) parent-side observation of subagent work
"""
@@ -34,7 +34,7 @@ from __future__ import annotations
import logging
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List
logger = logging.getLogger(__name__)
@@ -220,21 +220,12 @@ class MemoryProvider(ABC):
should all have ``env_var`` set and this method stays no-op).
"""
def on_memory_write(
self,
action: str,
target: str,
content: str,
metadata: Optional[Dict[str, Any]] = None,
) -> None:
def on_memory_write(self, action: str, target: str, content: str) -> None:
"""Called when the built-in memory tool writes an entry.
action: 'add', 'replace', or 'remove'
target: 'memory' or 'user'
content: the entry content
metadata: structured provenance for the write, when available. Common
keys include ``write_origin``, ``execution_context``, ``session_id``,
``parent_session_id``, ``platform``, and ``tool_name``.
Use to mirror built-in memory writes to your backend.
"""
+58 -417
View File
@@ -4,9 +4,7 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
and run_agent.py for pre-flight context checks.
"""
import ipaddress
import logging
import os
import re
import time
from pathlib import Path
@@ -16,52 +14,29 @@ from urllib.parse import urlparse
import requests
import yaml
from utils import base_url_host_matches, base_url_hostname
from hermes_constants import OPENROUTER_MODELS_URL
logger = logging.getLogger(__name__)
def _resolve_requests_verify() -> bool | str:
"""Resolve SSL verify setting for `requests` calls from env vars.
The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
that a single env var can cover both `requests` and `httpx` callsites
inside the same process.
Returns either a filesystem path to a CA bundle, or True to defer to
the requests default (certifi).
"""
for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"):
val = os.getenv(env_var)
if val and os.path.isfile(val):
return val
return True
# Provider names that can appear as a "provider:" prefix before a model ID.
# Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b")
# are preserved so the full model name reaches cache lookups and server queries.
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
"qwen-oauth",
"xiaomi",
"arcee",
"gmi",
"custom", "local",
# Common aliases
"google", "google-gemini", "google-ai-studio",
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
"ollama",
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"gmi-cloud", "gmicloud",
"xai", "x-ai", "x.ai", "grok",
"nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
@@ -74,13 +49,6 @@ _OLLAMA_TAG_PATTERN = re.compile(
)
# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
# block, so without an explicit check Ollama reached over Tailscale (e.g.
# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
def _strip_provider_prefix(model: str) -> str:
"""Strip a recognised provider prefix from a model string.
@@ -108,11 +76,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
_ENDPOINT_MODEL_CACHE_TTL = 300
# Descending tiers for context length probing when the model is unknown.
# We start at 256K (covers GPT-5.x, many current large-context models) and
# step down on context-length errors until one works. Tier[0] is also the
# default fallback when no detection method succeeds.
# We start at 128K (a safe default for most modern models) and step down
# on context-length errors until one works.
CONTEXT_PROBE_TIERS = [
256_000,
128_000,
64_000,
32_000,
@@ -147,14 +113,10 @@ DEFAULT_CONTEXT_LENGTHS = {
"claude": 200000,
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
# Source: https://developers.openai.com/api/docs/models
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
# This hardcoded value is only reached when every probe misses.
"gpt-5.5": 1050000,
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
"gpt-5.3-codex-spark": 128000, # Spark variant has reduced 128k context
"gpt-5.1-chat": 128000, # Chat variant has 128k context
"gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k)
"gpt-4.1": 1047576,
@@ -162,22 +124,10 @@ DEFAULT_CONTEXT_LENGTHS = {
# Google
"gemini": 1048576,
# Gemma (open models served via AI Studio)
"gemma-4": 256000, # Gemma 4 family
"gemma4": 256000, # Ollama-style naming (e.g. gemma4:31b-cloud)
"gemma-4-31b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek — V4 family ships with a 1M context window. The legacy
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
# and inherit the same 1M window. The ``deepseek`` substring entry
# below remains as a 128K fallback for older / unknown DeepSeek model
# ids (e.g. via custom endpoints).
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
"deepseek-v4-pro": 1_000_000,
"deepseek-v4-flash": 1_000_000,
"deepseek-chat": 1_000_000,
"deepseek-reasoner": 1_000_000,
# DeepSeek
"deepseek": 128000,
# Meta
"llama": 131072,
@@ -219,15 +169,12 @@ DEFAULT_CONTEXT_LENGTHS = {
"Qwen/Qwen3.5-35B-A3B": 131072,
"deepseek-ai/DeepSeek-V3.2": 65536,
"moonshotai/Kimi-K2.5": 262144,
"moonshotai/Kimi-K2.6": 262144,
"moonshotai/Kimi-K2-Thinking": 262144,
"MiniMaxAI/MiniMax-M2.5": 204800,
"XiaomiMiMo/MiMo-V2-Flash": 262144,
"mimo-v2-pro": 1048576,
"mimo-v2.5-pro": 1048576,
"mimo-v2.5": 1048576,
"mimo-v2-omni": 262144,
"mimo-v2-flash": 262144,
"XiaomiMiMo/MiMo-V2-Flash": 256000,
"mimo-v2-pro": 1000000,
"mimo-v2-omni": 256000,
"mimo-v2-flash": 256000,
"zai-org/GLM-5": 202752,
}
@@ -242,7 +189,6 @@ _CONTEXT_LENGTH_KEYS = (
"max_seq_len",
"n_ctx_train",
"n_ctx",
"ctx_size",
)
_MAX_COMPLETION_KEYS = (
@@ -265,15 +211,8 @@ def _normalize_base_url(base_url: str) -> str:
return (base_url or "").strip().rstrip("/")
def _auth_headers(api_key: str = "") -> Dict[str, str]:
token = str(api_key or "").strip()
if not token:
return {}
return {"Authorization": f"Bearer {token}"}
def _is_openrouter_base_url(base_url: str) -> bool:
return base_url_host_matches(base_url, "openrouter.ai")
return "openrouter.ai" in _normalize_base_url(base_url).lower()
def _is_custom_endpoint(base_url: str) -> bool:
@@ -286,12 +225,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"chatgpt.com": "openai",
"api.anthropic.com": "anthropic",
"api.z.ai": "zai",
"open.bigmodel.cn": "zai",
"api.moonshot.ai": "kimi-coding",
"api.moonshot.cn": "kimi-coding-cn",
"api.kimi.com": "kimi-coding",
"api.stepfun.ai": "stepfun",
"api.stepfun.com": "stepfun",
"api.arcee.ai": "arcee",
"api.minimax": "minimax",
"dashscope.aliyuncs.com": "alibaba",
@@ -309,21 +245,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"api.gmi-serving.com": "gmi",
"ollama.com": "ollama-cloud",
}
# Auto-extend with hostnames derived from provider profiles.
# Any provider with a base_url not already in the map gets added automatically.
try:
from providers import list_providers as _list_providers
for _pp in _list_providers():
_host = _pp.get_hostname()
if _host and _host not in _URL_TO_PROVIDER:
_URL_TO_PROVIDER[_host] = _pp.name
except Exception:
pass
def _infer_provider_from_url(base_url: str) -> Optional[str]:
"""Infer the models.dev provider name from a base URL.
@@ -348,15 +272,7 @@ def _is_known_provider_base_url(base_url: str) -> bool:
def is_local_endpoint(base_url: str) -> bool:
"""Return True if base_url points to a local machine.
Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
container-internal DNS names (``host.docker.internal`` et al.),
RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
is included so remote-but-trusted Ollama boxes reached over a
Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
"""
"""Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
normalized = _normalize_base_url(base_url)
if not normalized:
return False
@@ -371,17 +287,14 @@ def is_local_endpoint(base_url: str) -> bool:
# Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
return True
# RFC-1918 private ranges, link-local, and Tailscale CGNAT
# RFC-1918 private ranges and link-local
import ipaddress
try:
addr = ipaddress.ip_address(host)
if addr.is_private or addr.is_loopback or addr.is_link_local:
return True
if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
return True
return addr.is_private or addr.is_loopback or addr.is_link_local
except ValueError:
pass
# Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
# or Tailscale CGNAT (100.64.x.x100.127.x.x).
parts = host.split(".")
if len(parts) == 4:
try:
@@ -392,14 +305,12 @@ def is_local_endpoint(base_url: str) -> bool:
return True
if first == 192 and second == 168:
return True
if first == 100 and 64 <= second <= 127:
return True
except ValueError:
pass
return False
def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
def detect_local_server_type(base_url: str) -> Optional[str]:
"""Detect which local server is running at base_url by probing known endpoints.
Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -411,10 +322,8 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=2.0, headers=headers) as client:
with httpx.Client(timeout=2.0) as client:
# LM Studio exposes /api/v1/models — check first (most specific)
try:
r = client.get(f"{server_url}/api/v1/models")
@@ -542,7 +451,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
return _model_metadata_cache
try:
response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
response.raise_for_status()
data = response.json()
@@ -601,64 +510,10 @@ def fetch_endpoint_model_metadata(
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
last_error: Optional[Exception] = None
if is_local_endpoint(normalized):
try:
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
response = requests.get(
server_url.rstrip("/") + "/api/v1/models",
headers=headers,
timeout=10,
verify=_resolve_requests_verify(),
)
response.raise_for_status()
payload = response.json()
cache: Dict[str, Dict[str, Any]] = {}
for model in payload.get("models", []):
if not isinstance(model, dict):
continue
model_id = model.get("key") or model.get("id")
if not model_id:
continue
entry: Dict[str, Any] = {"name": model.get("name", model_id)}
context_length = None
for inst in model.get("loaded_instances", []) or []:
if not isinstance(inst, dict):
continue
cfg = inst.get("config", {})
ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
if isinstance(ctx, int) and ctx > 0:
context_length = ctx
break
if context_length is None:
context_length = _extract_context_length(model)
if context_length is not None:
entry["context_length"] = context_length
max_completion_tokens = _extract_max_completion_tokens(model)
if max_completion_tokens is not None:
entry["max_completion_tokens"] = max_completion_tokens
pricing = _extract_pricing(model)
if pricing:
entry["pricing"] = pricing
_add_model_aliases(cache, model_id, entry)
alt_id = model.get("id")
if isinstance(alt_id, str) and alt_id and alt_id != model_id:
_add_model_aliases(cache, alt_id, entry)
_endpoint_model_metadata_cache[normalized] = cache
_endpoint_model_metadata_cache_time[normalized] = time.time()
return cache
except Exception as exc:
last_error = exc
for candidate in candidates:
url = candidate.rstrip("/") + "/models"
try:
response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
payload = response.json()
cache: Dict[str, Dict[str, Any]] = {}
@@ -689,10 +544,9 @@ def fetch_endpoint_model_metadata(
try:
# Try /v1/props first (current llama.cpp); fall back to /props for older builds
base = candidate.rstrip("/").replace("/v1", "")
_verify = _resolve_requests_verify()
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify)
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
if not props_resp.ok:
props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify)
props_resp = requests.get(base + "/props", headers=headers, timeout=5)
if props_resp.ok:
props = props_resp.json()
gen_settings = props.get("default_generation_settings", {})
@@ -716,29 +570,6 @@ def fetch_endpoint_model_metadata(
return {}
def _resolve_endpoint_context_length(
model: str,
base_url: str,
api_key: str = "",
) -> Optional[int]:
"""Resolve context length from an endpoint's live ``/models`` metadata."""
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
matched = endpoint_metadata.get(model)
if not matched:
if len(endpoint_metadata) == 1:
matched = next(iter(endpoint_metadata.values()))
else:
for key, entry in endpoint_metadata.items():
if model in key or key in model:
matched = entry
break
if matched:
context_length = matched.get("context_length")
if isinstance(context_length, int):
return context_length
return None
def _get_context_cache_path() -> Path:
"""Return path to the persistent context length cache file."""
from hermes_constants import get_hermes_home
@@ -787,22 +618,6 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
return cache.get(key)
def _invalidate_cached_context_length(model: str, base_url: str) -> None:
"""Drop a stale cache entry so it gets re-resolved on the next lookup."""
key = f"{model}@{base_url}"
cache = _load_context_cache()
if key not in cache:
return
del cache[key]
path = _get_context_cache_path()
try:
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
except Exception as e:
logger.debug("Failed to invalidate context length cache entry %s: %s", key, e)
def get_next_probe_tier(current_length: int) -> Optional[int]:
"""Return the next lower probe tier, or None if already at minimum."""
for tier in CONTEXT_PROBE_TIERS:
@@ -901,7 +716,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
return False
def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
"""Query an Ollama server for the model's context length.
Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -919,16 +734,14 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
server_url = server_url[:-3]
try:
server_type = detect_local_server_type(base_url, api_key=api_key)
server_type = detect_local_server_type(base_url)
except Exception:
return None
if server_type != "ollama":
return None
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
with httpx.Client(timeout=3.0) as client:
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
if resp.status_code != 200:
return None
@@ -956,7 +769,7 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
return None
def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
"""Query a local server for the model's context length."""
import httpx
@@ -969,15 +782,13 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
server_type = detect_local_server_type(base_url, api_key=api_key)
server_type = detect_local_server_type(base_url)
except Exception:
server_type = None
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
with httpx.Client(timeout=3.0) as client:
# Ollama: /api/show returns model details with context info
if server_type == "ollama":
resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -1080,7 +891,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
}
resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
resp = requests.get(url, headers=headers, timeout=10)
if resp.status_code != 200:
return None
data = resp.json()
@@ -1094,116 +905,6 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
return None
# Known ChatGPT Codex OAuth context windows (observed via live
# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
# `context_window` values, which are what Codex actually enforces — the
# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
#
# Used as a fallback when the live probe fails (no token, network error).
# Longest keys first so substring match picks the most specific entry.
_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
"gpt-5.1-codex-max": 272_000,
"gpt-5.1-codex-mini": 272_000,
"gpt-5.3-codex": 272_000,
"gpt-5.2-codex": 272_000,
"gpt-5.4-mini": 272_000,
"gpt-5.5": 272_000,
"gpt-5.4": 272_000,
"gpt-5.2": 272_000,
"gpt-5": 272_000,
}
_codex_oauth_context_cache: Dict[str, int] = {}
_codex_oauth_context_cache_time: float = 0.0
_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour
def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
"""Probe the ChatGPT Codex /models endpoint for per-slug context windows.
Codex OAuth imposes its own context limits that differ from the direct
OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
`context_window` field in each model entry is the authoritative source.
Returns a ``{slug: context_window}`` dict. Empty on failure.
"""
global _codex_oauth_context_cache, _codex_oauth_context_cache_time
now = time.time()
if (
_codex_oauth_context_cache
and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
):
return _codex_oauth_context_cache
try:
resp = requests.get(
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
headers={"Authorization": f"Bearer {access_token}"},
timeout=10,
verify=_resolve_requests_verify(),
)
if resp.status_code != 200:
logger.debug(
"Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
resp.status_code,
)
return {}
data = resp.json()
except Exception as exc:
logger.debug("Codex /models probe failed: %s", exc)
return {}
entries = data.get("models", []) if isinstance(data, dict) else []
result: Dict[str, int] = {}
for item in entries:
if not isinstance(item, dict):
continue
slug = item.get("slug")
ctx = item.get("context_window")
if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
result[slug.strip()] = ctx
if result:
_codex_oauth_context_cache = result
_codex_oauth_context_cache_time = now
return result
def _resolve_codex_oauth_context_length(
model: str, access_token: str = ""
) -> Optional[int]:
"""Resolve a Codex OAuth model's real context window.
Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
"""
model_bare = _strip_provider_prefix(model).strip()
if not model_bare:
return None
if access_token:
live = _fetch_codex_oauth_context_lengths(access_token)
if model_bare in live:
return live[model_bare]
# Case-insensitive match in case casing drifts
model_lower = model_bare.lower()
for slug, ctx in live.items():
if slug.lower() == model_lower:
return ctx
# Fallback: longest-key-first substring match over hardcoded defaults.
model_lower = model_bare.lower()
for slug, ctx in sorted(
_CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
):
if slug in model_lower:
return ctx
return None
def _resolve_nous_context_length(model: str) -> Optional[int]:
"""Resolve Nous Portal model context length via OpenRouter metadata.
@@ -1243,14 +944,12 @@ def get_model_context_length(
api_key: str = "",
config_context_length: int | None = None,
provider: str = "",
custom_providers: list | None = None,
) -> int:
"""Get the context length for a model.
Resolution order:
0. Explicit config override (model.context_length or custom_providers per-model)
1. Persistent cache (previously discovered via probing)
1b. AWS Bedrock static table (must precede custom-endpoint probe)
2. Active endpoint metadata (/models for explicit custom endpoints)
3. Local server query (for local endpoints)
4. Anthropic /v1/models API (API-key users only, not OAuth)
@@ -1264,23 +963,6 @@ def get_model_context_length(
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
return config_context_length
# 0b. custom_providers per-model override — check before any probe.
# This closes the gap where /model switch and display paths used to fall
# back to 128K despite the user having a per-model context_length set.
# See #15779.
if custom_providers and base_url and model:
try:
from hermes_cli.config import get_custom_provider_context_length
cp_ctx = get_custom_provider_context_length(
model=model,
base_url=base_url,
custom_providers=custom_providers,
)
if cp_ctx:
return cp_ctx
except Exception:
pass # fall through to probing
# Normalise provider-prefixed model names (e.g. "local:model-name" →
# "model-name") so cache lookups and server queries use the bare ID that
# local servers actually know about. Ollama "model:tag" colons are preserved.
@@ -1290,41 +972,7 @@ def get_model_context_length(
if base_url:
cached = get_cached_context_length(model, base_url)
if cached is not None:
# Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
# resolved gpt-5.x to the direct-API value (e.g. 1.05M) via
# models.dev and persisted it. Codex OAuth caps at 272K for every
# slug, so any cached Codex entry at or above 400K is a leftover
# from the old resolution path. Drop it and fall through to the
# live /models probe in step 5 below.
if provider == "openai-codex" and cached >= 400_000:
logger.info(
"Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); "
"re-resolving via live /models probe",
model, base_url, f"{cached:,}",
)
_invalidate_cached_context_length(model, base_url)
else:
return cached
# 1b. AWS Bedrock — use static context length table.
# Bedrock's ListFoundationModels API doesn't expose context window sizes,
# so we maintain a curated table in bedrock_adapter.py that reflects
# AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
# Anthropic API). This must run BEFORE the custom-endpoint probe at
# step 2 — bedrock-runtime.<region>.amazonaws.com is not in
# _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
# fail the /models probe (Bedrock doesn't expose that shape), and fall
# back to the 128K default before reaching the original step 4b branch.
if provider == "bedrock" or (
base_url
and base_url_hostname(base_url).startswith("bedrock-runtime.")
and base_url_host_matches(base_url, "amazonaws.com")
):
try:
from agent.bedrock_adapter import get_bedrock_context_length
return get_bedrock_context_length(model)
except ImportError:
pass # boto3 not installed — fall through to generic resolution
return cached
# 2. Active endpoint metadata for truly custom/unknown endpoints.
# Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
@@ -1332,13 +980,26 @@ def get_model_context_length(
# returns 128k) instead of the model's full context (400k). models.dev
# has the correct per-provider values and is checked at step 5+.
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if context_length is not None:
return context_length
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
matched = endpoint_metadata.get(model)
if not matched:
# Single-model servers: if only one model is loaded, use it
if len(endpoint_metadata) == 1:
matched = next(iter(endpoint_metadata.values()))
else:
# Fuzzy match: substring in either direction
for key, entry in endpoint_metadata.items():
if model in key or key in model:
matched = entry
break
if matched:
context_length = matched.get("context_length")
if isinstance(context_length, int):
return context_length
if not _is_known_provider_base_url(base_url):
# 3. Try querying local server directly
if is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
local_ctx = _query_local_context_length(model, base_url)
if local_ctx and local_ctx > 0:
save_context_length(model, base_url, local_ctx)
return local_ctx
@@ -1352,13 +1013,21 @@ def get_model_context_length(
# 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
if provider == "anthropic" or (
base_url and base_url_hostname(base_url) == "api.anthropic.com"
base_url and "api.anthropic.com" in base_url
):
ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
if ctx:
return ctx
# 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
# 4b. AWS Bedrock — use static context length table.
# Bedrock's ListFoundationModels doesn't expose context window sizes,
# so we maintain a curated table in bedrock_adapter.py.
if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
try:
from agent.bedrock_adapter import get_bedrock_context_length
return get_bedrock_context_length(model)
except ImportError:
pass # boto3 not installed — fall through to generic resolution
# 5. Provider-aware lookups (before generic OpenRouter cache)
# These are provider-specific and take priority over the generic OR cache,
@@ -1372,38 +1041,10 @@ def get_model_context_length(
if inferred:
effective_provider = inferred
# 5a. Copilot live /models API — max_prompt_tokens from the user's account.
# This catches account-specific models (e.g. claude-opus-4.6-1m) that
# don't exist in models.dev. For models that ARE in models.dev, this
# returns the provider-enforced limit which is what users can actually use.
if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
try:
from hermes_cli.models import get_copilot_model_context
ctx = get_copilot_model_context(model, api_key=api_key)
if ctx:
return ctx
except Exception:
pass # Fall through to models.dev
if effective_provider == "nous":
ctx = _resolve_nous_context_length(model)
if ctx:
return ctx
if effective_provider == "openai-codex":
# Codex OAuth enforces lower context limits than the direct OpenAI
# API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
# on Codex). Authoritative source is Codex's own /models endpoint.
codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
if codex_ctx:
if base_url:
save_context_length(model, base_url, codex_ctx)
return codex_ctx
if effective_provider == "gmi" and base_url:
# GMI exposes authoritative context_length via /models, but it is not
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if ctx is not None:
return ctx
if effective_provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(effective_provider, model)
@@ -1413,7 +1054,7 @@ def get_model_context_length(
# 6. OpenRouter live API metadata (provider-unaware fallback)
metadata = fetch_model_metadata()
if model in metadata:
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
return metadata[model].get("context_length", 128000)
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
# Only check `default_model in model` (is the key a substring of the input).
@@ -1428,7 +1069,7 @@ def get_model_context_length(
# 9. Query local server as last resort
if base_url and is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
local_ctx = _query_local_context_length(model, base_url)
if local_ctx and local_ctx > 0:
save_context_length(model, base_url, local_ctx)
return local_ctx
-4
View File
@@ -146,7 +146,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
"openai-codex": "openai",
"zai": "zai",
"kimi-coding": "kimi-for-coding",
"stepfun": "stepfun",
"kimi-coding-cn": "kimi-for-coding",
"minimax": "minimax",
"minimax-cn": "minimax-cn",
@@ -418,9 +417,6 @@ def list_provider_models(provider: str) -> List[str]:
Returns an empty list if the provider is unknown or has no data.
"""
from hermes_cli.models import normalize_provider
provider = normalize_provider(provider) or provider
models = _get_provider_models(provider)
if models is None:
return []
-190
View File
@@ -1,190 +0,0 @@
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
tool calling. Requests that violate it fail with HTTP 400:
tools.function.parameters is not a valid moonshot flavored json schema,
details: <...>
Known rejection modes documented at
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
and MoonshotAI/kimi-cli#1595:
1. Every property schema must carry a ``type``. Standard JSON Schema allows
type to be omitted (the value is then unconstrained); Moonshot refuses.
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
the parent. Presence of both causes "type should be defined in anyOf
items instead of the parent schema".
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
applies at MCP registration time for all providers.
"""
from __future__ import annotations
import copy
from typing import Any, Dict, List
# Keys whose values are maps of name → schema (not schemas themselves).
# When we recurse, we walk the values of these maps as schemas, but we do
# NOT apply the missing-type repair to the map itself.
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
# Keys whose values are lists of schemas.
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
# Keys whose values are a single nested schema.
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
"""Recursively apply Moonshot repairs to a schema node.
``is_schema=True`` means this dict is a JSON Schema node and gets the
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
it's a container map (e.g. the value of ``properties``) and we only
recurse into its values.
"""
if isinstance(node, list):
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
# every element is itself a schema.
return [_repair_schema(item, is_schema=True) for item in node]
if not isinstance(node, dict):
return node
# Walk the dict, deciding per-key whether recursion is into a schema
# node, a container map, or a scalar.
repaired: Dict[str, Any] = {}
for key, value in node.items():
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
# Map of name → schema. Don't treat the map itself as a schema
# (it has no type / properties of its own), but each value is.
repaired[key] = {
sub_key: _repair_schema(sub_val, is_schema=True)
for sub_key, sub_val in value.items()
}
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
elif key in _SCHEMA_NODE_KEYS:
# items / not / additionalProperties: single nested schema.
# additionalProperties can also be a bool — leave those alone.
if isinstance(value, dict):
repaired[key] = _repair_schema(value, is_schema=True)
else:
repaired[key] = value
else:
# Scalars (description, title, format, enum values, etc.) pass through.
repaired[key] = value
if not is_schema:
return repaired
# Rule 2: when anyOf is present, type belongs only on the children.
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
repaired.pop("type", None)
return repaired
# Rule 1: property schemas without type need one. $ref nodes are exempt
# — their type comes from the referenced definition.
if "$ref" in repaired:
return repaired
return _fill_missing_type(repaired)
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
"""Infer a reasonable ``type`` if this schema node has none."""
if "type" in node and node["type"] not in (None, ""):
return node
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
# → type of first enum value, else fall back to ``string`` (safest scalar).
if "properties" in node or "required" in node or "additionalProperties" in node:
inferred = "object"
elif "items" in node or "prefixItems" in node:
inferred = "array"
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
sample = node["enum"][0]
if isinstance(sample, bool):
inferred = "boolean"
elif isinstance(sample, int):
inferred = "integer"
elif isinstance(sample, float):
inferred = "number"
else:
inferred = "string"
else:
inferred = "string"
return {**node, "type": inferred}
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
"""Normalize tool parameters to a Moonshot-compatible object schema.
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
applied. Input is not mutated.
"""
if not isinstance(parameters, dict):
return {"type": "object", "properties": {}}
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
if not isinstance(repaired, dict):
return {"type": "object", "properties": {}}
# Top-level must be an object schema
if repaired.get("type") != "object":
repaired["type"] = "object"
if "properties" not in repaired:
repaired["properties"] = {}
return repaired
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
if not tools:
return tools
sanitized: List[Dict[str, Any]] = []
any_change = False
for tool in tools:
if not isinstance(tool, dict):
sanitized.append(tool)
continue
fn = tool.get("function")
if not isinstance(fn, dict):
sanitized.append(tool)
continue
params = fn.get("parameters")
repaired = sanitize_moonshot_tool_parameters(params)
if repaired is not params:
any_change = True
new_fn = {**fn, "parameters": repaired}
sanitized.append({**tool, "function": new_fn})
else:
sanitized.append(tool)
return sanitized if any_change else tools
def is_moonshot_model(model: str | None) -> bool:
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
Detection by model name covers Nous / OpenRouter / other aggregators that
route to Moonshot's inference, where the base URL is the aggregator's, not
``api.moonshot.ai``.
"""
if not model:
return False
bare = model.strip().lower()
# Last path segment (covers aggregator-prefixed slugs)
tail = bare.rsplit("/", 1)[-1]
if tail.startswith("kimi-") or tail == "kimi":
return True
# Vendor-prefixed forms commonly used on aggregators
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
return True
return False
-142
View File
@@ -180,145 +180,3 @@ def format_remaining(seconds: float) -> str:
h, remainder = divmod(s, 3600)
m = remainder // 60
return f"{h}h {m}m" if m else f"{h}h"
# Buckets with reset windows shorter than this are treated as transient
# (upstream jitter, secondary throttling) rather than a genuine quota
# exhaustion worth a cross-session breaker trip.
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
def is_genuine_nous_rate_limit(
*,
headers: Optional[Mapping[str, str]] = None,
last_known_state: Optional[Any] = None,
) -> bool:
"""Decide whether a 429 from Nous Portal is a real account rate limit.
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
exhausted a genuine rate limit that will last until the
bucket resets.
(b) The upstream provider is out of capacity for a specific model
transient, clears in seconds, and has nothing to do with
the caller's quota on Nous.
Tripping the cross-session breaker on (b) blocks ALL Nous requests
(and all models, since Nous is one provider key) for minutes even
though the caller's account is healthy and a different model would
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
We tell the two apart by looking at:
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
the full suite on every response including 429s. An exhausted
bucket (``remaining == 0`` with a reset window >= 60s) is
proof of (a).
2. The last-known-good rate-limit state captured by
``_capture_rate_limits()`` on the previous successful
response. If any bucket there was already near-exhausted with
a substantial reset window, the current 429 is almost
certainly (a) continuing from that condition.
If neither signal fires, we treat the 429 as (b): fail the single
request, let the retry loop or model-switch proceed, and do NOT
write the cross-session breaker file.
Returns True when the evidence points at (a).
"""
# Signal 1: current 429 response headers.
state = _parse_buckets_from_headers(headers)
if _has_exhausted_bucket(state):
return True
# Signal 2: last-known-good state from a recent successful response.
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
# or a dict of bucket snapshots.
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
return True
return False
def _parse_buckets_from_headers(
headers: Optional[Mapping[str, str]],
) -> dict[str, tuple[Optional[int], Optional[float]]]:
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
Returns empty dict when no rate-limit headers are present.
"""
if not headers:
return {}
lowered = {k.lower(): v for k, v in headers.items()}
if not any(k.startswith("x-ratelimit-") for k in lowered):
return {}
def _maybe_int(raw: Optional[str]) -> Optional[int]:
if raw is None:
return None
try:
return int(float(raw))
except (TypeError, ValueError):
return None
def _maybe_float(raw: Optional[str]) -> Optional[float]:
if raw is None:
return None
try:
return float(raw)
except (TypeError, ValueError):
return None
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
if remaining is not None or reset is not None:
result[tag] = (remaining, reset)
return result
def _has_exhausted_bucket(
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
) -> bool:
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
for remaining, reset in buckets.values():
if remaining is None or remaining > 0:
continue
if reset is None:
continue
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
return True
return False
def _has_exhausted_bucket_in_object(state: Any) -> bool:
"""Check a RateLimitState-like object for an exhausted bucket.
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
exposed as attributes ``requests_min``, ``requests_hour``,
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
object missing those attributes.
"""
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
bucket = getattr(state, attr, None)
if bucket is None:
continue
limit = getattr(bucket, "limit", 0) or 0
remaining = getattr(bucket, "remaining", 0) or 0
# Prefer the adjusted "remaining_seconds_now" property when present;
# fall back to raw reset_seconds.
reset = getattr(bucket, "remaining_seconds_now", None)
if reset is None:
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
if limit <= 0:
continue
if remaining > 0:
continue
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
return True
return False
-191
View File
@@ -1,191 +0,0 @@
"""
Contextual first-touch onboarding hints.
Instead of blocking first-run questionnaires, show a one-time hint the *first*
time a user hits a behavior fork message-while-running, first long-running
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
``onboarding.seen.<flag>``) and then never again.
Keep this module tiny and dependency-free so both the CLI and gateway can import
it without pulling in heavy modules.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, Mapping, Optional
logger = logging.getLogger(__name__)
# -------------------------------------------------------------------------
# Flag names (stable — used as config.yaml keys under onboarding.seen)
# -------------------------------------------------------------------------
BUSY_INPUT_FLAG = "busy_input_prompt"
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
# -------------------------------------------------------------------------
# Hint content
# -------------------------------------------------------------------------
def busy_input_hint_gateway(mode: str) -> str:
"""Hint shown the first time a user messages while the agent is busy.
``mode`` is the effective busy_input_mode that was just applied, so the
message matches reality ("I just interrupted…" vs "I just queued…").
"""
if mode == "queue":
return (
"💡 First-time tip — I queued your message instead of interrupting. "
"Send `/busy interrupt` to make new messages stop the current task "
"immediately, or `/busy status` to check. This notice won't appear again."
)
if mode == "steer":
return (
"💡 First-time tip — I steered your message into the current run; "
"it will arrive after the next tool call instead of interrupting. "
"Send `/busy interrupt` or `/busy queue` to change this, or "
"`/busy status` to check. This notice won't appear again."
)
return (
"💡 First-time tip — I just interrupted my current task to answer you. "
"Send `/busy queue` to queue follow-ups for after the current task instead, "
"`/busy steer` to inject them mid-run without interrupting, or "
"`/busy status` to check. This notice won't appear again."
)
def busy_input_hint_cli(mode: str) -> str:
"""CLI version of the busy-input hint (plain text, no markdown)."""
if mode == "queue":
return (
"(tip) Your message was queued for the next turn. "
"Use /busy interrupt to make Enter stop the current run instead, "
"or /busy steer to inject mid-run. This tip only shows once."
)
if mode == "steer":
return (
"(tip) Your message was steered into the current run; it arrives "
"after the next tool call. Use /busy interrupt or /busy queue to "
"change this. This tip only shows once."
)
return (
"(tip) Your message interrupted the current run. "
"Use /busy queue to queue messages for the next turn instead, "
"or /busy steer to inject mid-run. This tip only shows once."
)
def tool_progress_hint_gateway() -> str:
return (
"💡 First-time tip — that tool took a while and I'm streaming every step. "
"If the progress messages feel noisy, send `/verbose` to cycle modes "
"(all → new → off). This notice won't appear again."
)
def tool_progress_hint_cli() -> str:
return (
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
"display modes (all -> new -> off -> verbose). This tip only shows once."
)
def openclaw_residue_hint_cli() -> str:
"""Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
get carried forward and the agent dutifully reads them). ``hermes claw
cleanup`` renames the directory so the agent stops finding it.
"""
return (
"Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
"After migrating, the agent can still get confused and read that "
"directory's config/memory instead of Hermes's.\n"
"Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
"This tip only shows once; rerun it any time with `hermes claw cleanup`."
)
def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
"""Return True if an OpenClaw workspace directory is present in ``$HOME``.
Pure filesystem check no side effects. ``home`` override exists for tests.
"""
base = home or Path.home()
try:
return (base / ".openclaw").is_dir()
except OSError:
return False
# -------------------------------------------------------------------------
# State read / write
# -------------------------------------------------------------------------
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
if not isinstance(onboarding, Mapping):
return {}
seen = onboarding.get("seen")
return seen if isinstance(seen, Mapping) else {}
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
"""Return True if the user has already been shown this first-touch hint."""
return bool(_get_seen_dict(config).get(flag))
def mark_seen(config_path: Path, flag: str) -> bool:
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
Uses the atomic YAML writer so a concurrent process can't observe a
partially-written file. Returns True on success, False on any error
(including the config file being absent onboarding is best-effort).
"""
try:
import yaml
from utils import atomic_yaml_write
except Exception as e: # pragma: no cover — dependency issue
logger.debug("onboarding: failed to import yaml/utils: %s", e)
return False
try:
cfg: dict = {}
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
if not isinstance(cfg.get("onboarding"), dict):
cfg["onboarding"] = {}
seen = cfg["onboarding"].get("seen")
if not isinstance(seen, dict):
seen = {}
cfg["onboarding"]["seen"] = seen
if seen.get(flag) is True:
return True # already marked — nothing to do
seen[flag] = True
atomic_yaml_write(config_path, cfg)
return True
except Exception as e:
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
return False
__all__ = [
"BUSY_INPUT_FLAG",
"TOOL_PROGRESS_FLAG",
"OPENCLAW_RESIDUE_FLAG",
"busy_input_hint_gateway",
"busy_input_hint_cli",
"tool_progress_hint_gateway",
"tool_progress_hint_cli",
"openclaw_residue_hint_cli",
"detect_openclaw_residue",
"is_seen",
"mark_seen",
]
+4 -76
View File
@@ -141,12 +141,6 @@ DEFAULT_AGENT_IDENTITY = (
"Be targeted and efficient in your exploration and investigations."
)
HERMES_AGENT_HELP_GUIDANCE = (
"If the user asks about configuring, setting up, or using Hermes Agent "
"itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
"before answering. Docs: https://hermes-agent.nousresearch.com/docs"
)
MEMORY_GUIDANCE = (
"You have persistent memory across sessions. Save durable facts using the memory "
"tool: user preferences, environment details, tool quirks, and stable conventions. "
@@ -158,13 +152,7 @@ MEMORY_GUIDANCE = (
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
"state to memory; use session_search to recall those from past transcripts. "
"If you've discovered a new way to do something, solved a problem that could be "
"necessary later, save it as a skill with the skill tool.\n"
"Write memories as declarative facts, not instructions to yourself. "
"'User prefers concise responses' ✓ — 'Always respond concisely' ✗. "
"'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. "
"Imperative phrasing gets re-read as a directive in later sessions and can "
"cause repeated work or override the user's current request. Procedures and "
"workflows belong in skills, not memory."
"necessary later, save it as a skill with the skill tool."
)
SESSION_SEARCH_GUIDANCE = (
@@ -356,13 +344,7 @@ PLATFORM_HINTS = {
),
"cli": (
"You are a CLI AI Agent. Try not to use markdown but simple text "
"renderable inside a terminal. "
"File delivery: there is no attachment channel — the user reads your "
"response directly in their terminal. Do NOT emit MEDIA:/path tags "
"(those are only intercepted on messaging platforms like Telegram, "
"Discord, Slack, etc.; on the CLI they render as literal text). "
"When referring to a file you created or changed, just state its "
"absolute path in plain text; the user can open it from there."
"renderable inside a terminal."
),
"sms": (
"You are communicating via SMS. Keep responses concise and use plain text "
@@ -376,32 +358,6 @@ PLATFORM_HINTS = {
"MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
".heic) appear as photos and other files arrive as attachments."
),
"mattermost": (
"You are in a Mattermost workspace communicating with your user. "
"Mattermost renders standard Markdown — headings, bold, italic, code "
"blocks, and tables all work. "
"You can send media files natively: include MEDIA:/absolute/path/to/file "
"in your response. Images (.jpg, .png, .webp) are uploaded as photo "
"attachments, audio and video as file attachments. "
"Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
),
"matrix": (
"You are in a Matrix room communicating with your user. "
"Matrix renders Markdown — bold, italic, code blocks, and links work; "
"the adapter converts your Markdown to HTML for rich display. "
"You can send media files natively: include MEDIA:/absolute/path/to/file "
"in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
"audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
"and other files as downloadable attachments."
),
"feishu": (
"You are in a Feishu (Lark) workspace communicating with your user. "
"Feishu renders Markdown in messages — bold, italic, code blocks, and "
"links are supported. "
"You can send media files natively: include MEDIA:/absolute/path/to/file "
"in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
"inline, audio files as voice messages, and other files as attachments."
),
"weixin": (
"You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
"it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -428,29 +384,6 @@ PLATFORM_HINTS = {
"your response. Images are sent as native photos, and other files arrive as downloadable "
"documents."
),
"yuanbao": (
"You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
"Markdown formatting is supported (code blocks, tables, bold/italic). "
"You CAN send media files natively — to deliver a file to the user, include "
"MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
"Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
"and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
"(max 50 MB). You can also include image URLs in markdown format ![alt](url) and "
"they will be downloaded and sent as native photos. "
"Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
"whenever a file delivery is appropriate.\n\n"
"Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
"When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
"you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
" 1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
" '捂脸', '合十') to discover matching sticker_ids.\n"
" 2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
" TIMFaceElem that renders as a native sticker in the chat.\n"
"DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
"them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
"image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
"— when a sticker is the right response, use yb_send_sticker."
),
}
# ---------------------------------------------------------------------------
@@ -680,14 +613,12 @@ def build_skills_system_prompt(
or get_session_env("HERMES_SESSION_PLATFORM")
or ""
)
disabled = get_disabled_skill_names()
cache_key = (
str(skills_dir.resolve()),
tuple(str(d) for d in external_dirs),
tuple(sorted(str(t) for t in (available_tools or set()))),
tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
_platform_hint,
tuple(sorted(disabled)),
)
with _SKILLS_PROMPT_CACHE_LOCK:
cached = _SKILLS_PROMPT_CACHE.get(cache_key)
@@ -695,6 +626,8 @@ def build_skills_system_prompt(
_SKILLS_PROMPT_CACHE.move_to_end(cache_key)
return cached
disabled = get_disabled_skill_names()
# ── Layer 2: disk snapshot ────────────────────────────────────────
snapshot = _load_skills_snapshot(skills_dir)
@@ -854,11 +787,6 @@ def build_skills_system_prompt(
"Skills also encode the user's preferred approach, conventions, and quality standards "
"for tasks like code review, planning, and testing — load them even for tasks you "
"already know how to do, because the skill defines how it should be done here.\n"
"Whenever the user asks you to configure, set up, install, enable, disable, modify, "
"or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
"skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
"first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
"`hermes setup`) so you don't have to guess or invent workarounds.\n"
"If a skill has issues, fix it with skill_manage(action='patch').\n"
"After difficult/iterative tasks, offer to save as a skill. "
"If a skill you loaded was missing steps, had wrong commands, or needed "
-142
View File
@@ -13,48 +13,6 @@ import re
logger = logging.getLogger(__name__)
# Sensitive query-string parameter names (case-insensitive exact match).
# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
_SENSITIVE_QUERY_PARAMS = frozenset({
"access_token",
"refresh_token",
"id_token",
"token",
"api_key",
"apikey",
"client_secret",
"password",
"auth",
"jwt",
"session",
"secret",
"key",
"code", # OAuth authorization codes
"signature", # pre-signed URL signatures
"x-amz-signature",
})
# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
# Ported from nearai/ironclaw#2529.
_SENSITIVE_BODY_KEYS = frozenset({
"access_token",
"refresh_token",
"id_token",
"token",
"api_key",
"apikey",
"client_secret",
"password",
"auth",
"jwt",
"secret",
"private_key",
"authorization",
"key",
})
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -150,30 +108,6 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
# Negative lookahead prevents matching hex strings or identifiers
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
# URLs containing query strings — matches `scheme://...?...[# or end]`.
# Used to scan text for URLs whose query params may contain secrets.
# Ported from nearai/ironclaw#2529.
_URL_WITH_QUERY_RE = re.compile(
r"(https?|wss?|ftp)://" # scheme
r"([^\s/?#]+)" # authority (may include userinfo)
r"([^\s?#]*)" # path
r"\?([^\s#]+)" # query (required)
r"(#\S*)?", # optional fragment
)
# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
# (not just DB protocols already covered by _DB_CONNSTR_RE above).
# Catches things like `https://user:token@api.example.com/v1/foo`.
_URL_USERINFO_RE = re.compile(
r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
)
# Form-urlencoded body detection: conservative — only applies when the entire
# text looks like a query string (k=v&k=v pattern with no newlines).
_FORM_BODY_RE = re.compile(
r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
)
# Compile known prefix patterns into one alternation
_PREFIX_RE = re.compile(
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -187,72 +121,6 @@ def _mask_token(token: str) -> str:
return f"{token[:6]}...{token[-4:]}"
def _redact_query_string(query: str) -> str:
"""Redact sensitive parameter values in a URL query string.
Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
replaced with `***`. Non-sensitive keys pass through unchanged.
Empty or malformed pairs are preserved as-is.
"""
if not query:
return query
parts = []
for pair in query.split("&"):
if "=" not in pair:
parts.append(pair)
continue
key, _, value = pair.partition("=")
if key.lower() in _SENSITIVE_QUERY_PARAMS:
parts.append(f"{key}=***")
else:
parts.append(pair)
return "&".join(parts)
def _redact_url_query_params(text: str) -> str:
"""Scan text for URLs with query strings and redact sensitive params.
Catches opaque tokens that don't match vendor prefix regexes, e.g.
`https://example.com/cb?code=ABC123&state=xyz` `...?code=***&state=xyz`.
"""
def _sub(m: re.Match) -> str:
scheme = m.group(1)
authority = m.group(2)
path = m.group(3)
query = _redact_query_string(m.group(4))
fragment = m.group(5) or ""
return f"{scheme}://{authority}{path}?{query}{fragment}"
return _URL_WITH_QUERY_RE.sub(_sub, text)
def _redact_url_userinfo(text: str) -> str:
"""Strip `user:password@` from HTTP/WS/FTP URLs.
DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
separately by `_DB_CONNSTR_RE`.
"""
return _URL_USERINFO_RE.sub(
lambda m: f"{m.group(1)}://{m.group(2)}:***@",
text,
)
def _redact_form_body(text: str) -> str:
"""Redact sensitive values in a form-urlencoded body.
Only applies when the entire input looks like a pure form body
(k=v&k=v with no newlines, no other text). Single-line non-form
text passes through unchanged. This is a conservative pass the
`_redact_url_query_params` function handles embedded query strings.
"""
if not text or "\n" in text or "&" not in text:
return text
# The body-body form check is strict: only trigger on clean k=v&k=v.
if not _FORM_BODY_RE.match(text.strip()):
return text
return _redact_query_string(text.strip())
def redact_sensitive_text(text: str) -> str:
"""Apply all redaction patterns to a block of text.
@@ -305,16 +173,6 @@ def redact_sensitive_text(text: str) -> str:
# JWT tokens (eyJ... — base64-encoded JSON headers)
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
# DB schemes are handled above by _DB_CONNSTR_RE.
text = _redact_url_userinfo(text)
# URL query params containing opaque tokens (?access_token=…&code=…)
text = _redact_url_query_params(text)
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
text = _redact_form_body(text)
# Discord user/role mentions (<@snowflake_id>)
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
-835
View File
@@ -1,835 +0,0 @@
"""
Shell-script hooks bridge.
Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
consent on first use of each ``(event, command)`` pair, and registers
callbacks on the existing plugin hook manager so every existing
``invoke_hook()`` site dispatches to the configured shell scripts with
zero changes to call sites.
Design notes
------------
* Python plugins and shell hooks compose naturally: both flow through
:func:`hermes_cli.plugins.invoke_hook` and its aggregators. Python
plugins are registered first (via ``discover_and_load()``) so their
block decisions win ties over shell-hook blocks.
* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
with ``shell=False`` no shell injection footguns. Users that need
pipes/redirection wrap their logic in a script.
* First-use consent is gated by the allowlist under
``~/.hermes/shell-hooks-allowlist.json``. Non-TTY callers must pass
``accept_hooks=True`` (resolved from ``--accept-hooks``,
``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
for registration to succeed without a prompt.
* Registration is idempotent safe to invoke from both the CLI entry
point (``hermes_cli/main.py``) and the gateway entry point
(``gateway/run.py``).
Wire protocol
-------------
**stdin** (JSON, piped to the script)::
{
"hook_event_name": "pre_tool_call",
"tool_name": "terminal",
"tool_input": {"command": "rm -rf /"},
"session_id": "sess_abc123",
"cwd": "/home/user/project",
"extra": {...} # event-specific kwargs
}
**stdout** (JSON, optional anything else is ignored)::
# Block a pre_tool_call (either shape accepted; normalised internally):
{"decision": "block", "reason": "Forbidden command"} # Claude-Code-style
{"action": "block", "message": "Forbidden command"} # Hermes-canonical
# Inject context for pre_llm_call:
{"context": "Today is Friday"}
# Silent no-op:
<empty or any non-matching JSON object>
"""
from __future__ import annotations
import difflib
import json
import logging
import os
import re
import shlex
import subprocess
import sys
import tempfile
import threading
import time
from contextlib import contextmanager
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
try:
import fcntl # POSIX only; Windows falls back to best-effort without flock.
except ImportError: # pragma: no cover
fcntl = None # type: ignore[assignment]
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
DEFAULT_TIMEOUT_SECONDS = 60
MAX_TIMEOUT_SECONDS = 300
ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
# (event, matcher, command) triples that have been wired to the plugin
# manager in the current process. Matcher is part of the key because
# the same script can legitimately register for different matchers under
# the same event (e.g. one entry per tool the user wants to gate).
# Second registration attempts for the exact same triple become no-ops
# so the CLI and gateway can both call register_from_config() safely.
_registered: Set[Tuple[str, Optional[str], str]] = set()
_registered_lock = threading.Lock()
# Intra-process lock for allowlist read-modify-write on platforms that
# lack ``fcntl`` (non-POSIX). Kept separate from ``_registered_lock``
# because ``register_from_config`` already holds ``_registered_lock`` when
# it triggers ``_record_approval`` — reusing it here would self-deadlock
# (``threading.Lock`` is non-reentrant). POSIX callers use the sibling
# ``.lock`` file via ``fcntl.flock`` and bypass this.
_allowlist_write_lock = threading.Lock()
@dataclass
class ShellHookSpec:
"""Parsed and validated representation of a single ``hooks:`` entry."""
event: str
command: str
matcher: Optional[str] = None
timeout: int = DEFAULT_TIMEOUT_SECONDS
compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
def __post_init__(self) -> None:
# Strip whitespace introduced by YAML quirks (e.g. multi-line string
# folding) — a matcher of " terminal" would otherwise silently fail
# to match "terminal" without any diagnostic.
if isinstance(self.matcher, str):
stripped = self.matcher.strip()
self.matcher = stripped if stripped else None
if self.matcher:
try:
self.compiled_matcher = re.compile(self.matcher)
except re.error as exc:
logger.warning(
"shell hook matcher %r is invalid (%s) — treating as "
"literal equality", self.matcher, exc,
)
self.compiled_matcher = None
def matches_tool(self, tool_name: Optional[str]) -> bool:
if not self.matcher:
return True
if tool_name is None:
return False
if self.compiled_matcher is not None:
return self.compiled_matcher.fullmatch(tool_name) is not None
# compiled_matcher is None only when the regex failed to compile,
# in which case we already warned and fall back to literal equality.
return tool_name == self.matcher
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def register_from_config(
cfg: Optional[Dict[str, Any]],
*,
accept_hooks: bool = False,
) -> List[ShellHookSpec]:
"""Register every configured shell hook on the plugin manager.
``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
output). The ``hooks:`` key is read out of it. Missing, empty, or
non-dict ``hooks`` is treated as zero configured hooks.
``accept_hooks=True`` skips the TTY consent prompt the caller is
promising that the user has opted in via a flag, env var, or config
setting. ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
also honored inside this function so either CLI or gateway call sites
pick them up.
Returns the list of :class:`ShellHookSpec` entries that ended up wired
up on the plugin manager. Skipped entries (unknown events, malformed,
not allowlisted, already registered) are logged but not returned.
"""
if not isinstance(cfg, dict):
return []
effective_accept = _resolve_effective_accept(cfg, accept_hooks)
specs = _parse_hooks_block(cfg.get("hooks"))
if not specs:
return []
registered: List[ShellHookSpec] = []
# Import lazily — avoids circular imports at module-load time.
from hermes_cli.plugins import get_plugin_manager
manager = get_plugin_manager()
# Idempotence + allowlist read happen under the lock; the TTY
# prompt runs outside so other threads aren't parked on a blocking
# input(). Mutation re-takes the lock with a defensive idempotence
# re-check in case two callers ever race through the prompt.
for spec in specs:
key = (spec.event, spec.matcher, spec.command)
with _registered_lock:
if key in _registered:
continue
already_allowlisted = _is_allowlisted(spec.event, spec.command)
if not already_allowlisted:
if not _prompt_and_record(
spec.event, spec.command, accept_hooks=effective_accept,
):
logger.warning(
"shell hook for %s (%s) not allowlisted — skipped. "
"Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
"hooks_auto_accept: true, or approve at the TTY "
"prompt next run.",
spec.event, spec.command,
)
continue
with _registered_lock:
if key in _registered:
continue
manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
_registered.add(key)
registered.append(spec)
logger.info(
"shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
spec.event, spec.command, spec.matcher, spec.timeout,
)
return registered
def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
"""Return the parsed ``ShellHookSpec`` entries from config without
registering anything. Used by ``hermes hooks list`` and ``doctor``."""
if not isinstance(cfg, dict):
return []
return _parse_hooks_block(cfg.get("hooks"))
def reset_for_tests() -> None:
"""Clear the idempotence set. Test-only helper."""
with _registered_lock:
_registered.clear()
# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------
def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
"""Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
Malformed entries warn-and-skip we never raise from config parsing
because a broken hook must not crash the agent.
"""
from hermes_cli.plugins import VALID_HOOKS
if not isinstance(hooks_cfg, dict):
return []
specs: List[ShellHookSpec] = []
for event_name, entries in hooks_cfg.items():
if event_name not in VALID_HOOKS:
suggestion = difflib.get_close_matches(
str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
)
if suggestion:
logger.warning(
"unknown hook event %r in hooks: config — did you mean %r?",
event_name, suggestion[0],
)
else:
logger.warning(
"unknown hook event %r in hooks: config (valid: %s)",
event_name, ", ".join(sorted(VALID_HOOKS)),
)
continue
if entries is None:
continue
if not isinstance(entries, list):
logger.warning(
"hooks.%s must be a list of hook definitions; got %s",
event_name, type(entries).__name__,
)
continue
for i, raw in enumerate(entries):
spec = _parse_single_entry(event_name, i, raw)
if spec is not None:
specs.append(spec)
return specs
def _parse_single_entry(
event: str, index: int, raw: Any,
) -> Optional[ShellHookSpec]:
if not isinstance(raw, dict):
logger.warning(
"hooks.%s[%d] must be a mapping with a 'command' key; got %s",
event, index, type(raw).__name__,
)
return None
command = raw.get("command")
if not isinstance(command, str) or not command.strip():
logger.warning(
"hooks.%s[%d] is missing a non-empty 'command' field",
event, index,
)
return None
matcher = raw.get("matcher")
if matcher is not None and not isinstance(matcher, str):
logger.warning(
"hooks.%s[%d].matcher must be a string regex; ignoring",
event, index,
)
matcher = None
if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
logger.warning(
"hooks.%s[%d].matcher=%r will be ignored at runtime — the "
"matcher field is only honored for pre_tool_call / "
"post_tool_call. The hook will fire on every %s event.",
event, index, matcher, event,
)
matcher = None
timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
try:
timeout = int(timeout_raw)
except (TypeError, ValueError):
logger.warning(
"hooks.%s[%d].timeout must be an int (got %r); using default %ds",
event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
)
timeout = DEFAULT_TIMEOUT_SECONDS
if timeout < 1:
logger.warning(
"hooks.%s[%d].timeout must be >=1; using default %ds",
event, index, DEFAULT_TIMEOUT_SECONDS,
)
timeout = DEFAULT_TIMEOUT_SECONDS
if timeout > MAX_TIMEOUT_SECONDS:
logger.warning(
"hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
event, index, timeout, MAX_TIMEOUT_SECONDS,
)
timeout = MAX_TIMEOUT_SECONDS
return ShellHookSpec(
event=event,
command=command.strip(),
matcher=matcher,
timeout=timeout,
)
# ---------------------------------------------------------------------------
# Subprocess callback
# ---------------------------------------------------------------------------
_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
"""Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
Returns a diagnostic dict with the same keys for every outcome
(``returncode``, ``stdout``, ``stderr``, ``timed_out``,
``elapsed_seconds``, ``error``). This is the single place the
subprocess is actually invoked both the live callback path
(:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
go through it.
"""
result: Dict[str, Any] = {
"returncode": None,
"stdout": "",
"stderr": "",
"timed_out": False,
"elapsed_seconds": 0.0,
"error": None,
}
try:
argv = shlex.split(os.path.expanduser(spec.command))
except ValueError as exc:
result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
return result
if not argv:
result["error"] = "empty command"
return result
t0 = time.monotonic()
try:
proc = subprocess.run(
argv,
input=stdin_json,
capture_output=True,
timeout=spec.timeout,
text=True,
shell=False,
)
except subprocess.TimeoutExpired:
result["timed_out"] = True
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
return result
except FileNotFoundError:
result["error"] = "command not found"
return result
except PermissionError:
result["error"] = "command not executable"
return result
except Exception as exc: # pragma: no cover — defensive
result["error"] = str(exc)
return result
result["returncode"] = proc.returncode
result["stdout"] = proc.stdout or ""
result["stderr"] = proc.stderr or ""
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
return result
def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
"""Build the closure that ``invoke_hook()`` will call per firing."""
def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
# Matcher gate — only meaningful for tool-scoped events.
if spec.event in ("pre_tool_call", "post_tool_call"):
if not spec.matches_tool(kwargs.get("tool_name")):
return None
r = _spawn(spec, _serialize_payload(spec.event, kwargs))
if r["error"]:
logger.warning(
"shell hook failed (event=%s command=%s): %s",
spec.event, spec.command, r["error"],
)
return None
if r["timed_out"]:
logger.warning(
"shell hook timed out after %.2fs (event=%s command=%s)",
r["elapsed_seconds"], spec.event, spec.command,
)
return None
stderr = r["stderr"].strip()
if stderr:
logger.debug(
"shell hook stderr (event=%s command=%s): %s",
spec.event, spec.command, stderr[:400],
)
# Non-zero exits: log but still parse stdout so scripts that
# signal failure via exit code can also return a block directive.
if r["returncode"] != 0:
logger.warning(
"shell hook exited %d (event=%s command=%s); stderr=%s",
r["returncode"], spec.event, spec.command, stderr[:400],
)
return _parse_response(spec.event, r["stdout"])
_callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
_callback.__qualname__ = _callback.__name__
return _callback
def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
"""Render the stdin JSON payload. Unserialisable values are
stringified via ``default=str`` rather than dropped."""
extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
try:
cwd = str(Path.cwd())
except OSError:
cwd = ""
payload = {
"hook_event_name": event,
"tool_name": kwargs.get("tool_name"),
"tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
"session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
"cwd": cwd,
"extra": extras,
}
return json.dumps(payload, ensure_ascii=False, default=str)
def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
"""Translate stdout JSON into a Hermes wire-shape dict.
For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
"reason": "..."}`` payload is translated into the canonical Hermes
``{"action": "block", "message": "..."}`` shape expected by
:func:`hermes_cli.plugins.get_pre_tool_call_block_message`. This is
the single most important correctness invariant in this module
skipping the translation silently breaks every ``pre_tool_call``
block directive.
For ``pre_llm_call``, ``{"context": "..."}`` is passed through
unchanged to match the existing plugin-hook contract.
Anything else returns ``None``.
"""
stdout = (stdout or "").strip()
if not stdout:
return None
try:
data = json.loads(stdout)
except json.JSONDecodeError:
logger.warning(
"shell hook stdout was not valid JSON (event=%s): %s",
event, stdout[:200],
)
return None
if not isinstance(data, dict):
return None
if event == "pre_tool_call":
if data.get("action") == "block":
message = data.get("message") or data.get("reason") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
if data.get("decision") == "block":
message = data.get("reason") or data.get("message") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
return None
context = data.get("context")
if isinstance(context, str) and context.strip():
return {"context": context}
return None
# ---------------------------------------------------------------------------
# Allowlist / consent
# ---------------------------------------------------------------------------
def allowlist_path() -> Path:
"""Path to the per-user shell-hook allowlist file."""
return get_hermes_home() / ALLOWLIST_FILENAME
def load_allowlist() -> Dict[str, Any]:
"""Return the parsed allowlist, or an empty skeleton if absent."""
try:
raw = json.loads(allowlist_path().read_text())
except (FileNotFoundError, json.JSONDecodeError, OSError):
return {"approvals": []}
if not isinstance(raw, dict):
return {"approvals": []}
approvals = raw.get("approvals")
if not isinstance(approvals, list):
raw["approvals"] = []
return raw
def save_allowlist(data: Dict[str, Any]) -> None:
"""Atomically persist the allowlist via per-process ``mkstemp`` +
``os.replace``. Cross-process read-modify-write races are handled
by :func:`_locked_update_approvals` (``fcntl.flock``). On OSError
the failure is logged; the in-process hook still registers but
the approval won't survive across runs."""
p = allowlist_path()
try:
p.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(
prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
)
try:
with os.fdopen(fd, "w") as fh:
fh.write(json.dumps(data, indent=2, sort_keys=True))
os.replace(tmp_path, p)
except Exception:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
except OSError as exc:
logger.warning(
"Failed to persist shell hook allowlist to %s: %s. "
"The approval is in-memory for this run, but the next "
"startup will re-prompt (or skip registration on non-TTY "
"runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
p, exc,
)
def _is_allowlisted(event: str, command: str) -> bool:
data = load_allowlist()
return any(
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
for e in data.get("approvals", [])
)
@contextmanager
def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
"""Serialise read-modify-write on the allowlist across processes.
Holds an exclusive ``flock`` on a sibling lock file for the duration
of the update so concurrent ``_record_approval``/``revoke`` callers
cannot clobber each other's changes (the race Codex reproduced with
2050 simultaneous writers). Falls back to an in-process lock on
platforms without ``fcntl``.
"""
p = allowlist_path()
p.parent.mkdir(parents=True, exist_ok=True)
lock_path = p.with_suffix(p.suffix + ".lock")
if fcntl is None: # pragma: no cover — non-POSIX fallback
with _allowlist_write_lock:
data = load_allowlist()
yield data
save_allowlist(data)
return
with open(lock_path, "a+") as lock_fh:
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
try:
data = load_allowlist()
yield data
save_allowlist(data)
finally:
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
def _prompt_and_record(
event: str, command: str, *, accept_hooks: bool,
) -> bool:
"""Decide whether to approve an unseen ``(event, command)`` pair.
Returns ``True`` iff the approval was granted and recorded.
"""
if accept_hooks:
_record_approval(event, command)
logger.info(
"shell hook auto-approved via --accept-hooks / env / config: "
"%s -> %s", event, command,
)
return True
if not sys.stdin.isatty():
return False
print(
f"\n⚠ Hermes is about to register a shell hook that will run a\n"
f" command on your behalf.\n\n"
f" Event: {event}\n"
f" Command: {command}\n\n"
f" Commands run with your full user credentials. Only approve\n"
f" commands you trust."
)
try:
answer = input("Allow this hook to run? [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
print() # keep the terminal tidy after ^C
return False
if answer in ("y", "yes"):
_record_approval(event, command)
return True
return False
def _record_approval(event: str, command: str) -> None:
entry = {
"event": event,
"command": command,
"approved_at": _utc_now_iso(),
"script_mtime_at_approval": script_mtime_iso(command),
}
with _locked_update_approvals() as data:
data["approvals"] = [
e for e in data.get("approvals", [])
if not (
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
)
] + [entry]
def _utc_now_iso() -> str:
return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
def revoke(command: str) -> int:
"""Remove every allowlist entry matching ``command``.
Returns the number of entries removed. Does not unregister any
callbacks that are already live on the plugin manager in the current
process restart the CLI / gateway to drop them.
"""
with _locked_update_approvals() as data:
before = len(data.get("approvals", []))
data["approvals"] = [
e for e in data.get("approvals", [])
if not (isinstance(e, dict) and e.get("command") == command)
]
after = len(data["approvals"])
return before - after
_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
".sh", ".bash", ".zsh", ".fish",
".py", ".pyw",
".rb", ".pl", ".lua",
".js", ".mjs", ".cjs", ".ts",
)
def _command_script_path(command: str) -> str:
"""Return the script path from ``command`` for doctor / drift checks.
Prefers a token ending in a known script extension, then a token
containing ``/`` or leading ``~``, then the first token. Handles
``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
common bare-path form.
"""
try:
parts = shlex.split(command)
except ValueError:
return command
if not parts:
return command
for part in parts:
if part.lower().endswith(_SCRIPT_EXTENSIONS):
return part
for part in parts:
if "/" in part or part.startswith("~"):
return part
return parts[0]
# ---------------------------------------------------------------------------
# Helpers for accept-hooks resolution
# ---------------------------------------------------------------------------
def _resolve_effective_accept(
cfg: Dict[str, Any], accept_hooks_arg: bool,
) -> bool:
"""Combine all three opt-in channels into a single boolean.
Precedence (any truthy source flips us on):
1. ``--accept-hooks`` flag (CLI) / explicit argument
2. ``HERMES_ACCEPT_HOOKS`` env var
3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
"""
if accept_hooks_arg:
return True
env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
if env in ("1", "true", "yes", "on"):
return True
cfg_val = cfg.get("hooks_auto_accept", False)
if isinstance(cfg_val, bool):
return cfg_val
if isinstance(cfg_val, str):
return cfg_val.strip().lower() in ("1", "true", "yes", "on")
return False
# ---------------------------------------------------------------------------
# Introspection (used by `hermes hooks` CLI)
# ---------------------------------------------------------------------------
def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
"""Return the allowlist record for this pair, if any."""
for e in load_allowlist().get("approvals", []):
if (
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
):
return e
return None
def script_mtime_iso(command: str) -> Optional[str]:
"""ISO-8601 mtime of the resolved script path, or ``None`` if the
script is missing."""
path = _command_script_path(command)
if not path:
return None
try:
expanded = os.path.expanduser(path)
return datetime.fromtimestamp(
os.path.getmtime(expanded), tz=timezone.utc,
).isoformat().replace("+00:00", "Z")
except OSError:
return None
def script_is_executable(command: str) -> bool:
"""Return ``True`` iff ``command`` is runnable as configured.
For a bare invocation (``/path/hook.sh``) the script itself must be
executable. For interpreter-prefixed commands (``python3
/path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
to be readable the interpreter doesn't care about the ``X_OK``
bit. Mirrors what ``_spawn`` would actually do at runtime."""
path = _command_script_path(command)
if not path:
return False
expanded = os.path.expanduser(path)
if not os.path.isfile(expanded):
return False
try:
argv = shlex.split(command)
except ValueError:
return False
is_bare_invocation = bool(argv) and argv[0] == path
required = os.X_OK if is_bare_invocation else os.R_OK
return os.access(expanded, required)
def run_once(
spec: ShellHookSpec, kwargs: Dict[str, Any],
) -> Dict[str, Any]:
"""Fire a single shell-hook invocation with a synthetic payload.
Used by ``hermes hooks test`` and ``hermes hooks doctor``.
``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
would pass at runtime. It is routed through :func:`_serialize_payload`
so the synthetic stdin exactly matches what a real hook firing would
produce otherwise scripts tested via ``hermes hooks test`` could
diverge silently from production behaviour.
Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
holding the canonical Hermes-wire-shape response."""
stdin_json = _serialize_payload(spec.event, kwargs)
result = _spawn(spec, stdin_json)
result["parsed"] = _parse_response(spec.event, result["stdout"])
return result
+35 -43
View File
@@ -1,29 +1,49 @@
"""Shared slash command helpers for skills.
"""Shared slash command helpers for skills and built-in prompt-style modes.
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
can invoke skills via /skill-name commands.
can invoke skills via /skill-name commands and prompt-only built-ins like
/plan.
"""
import json
import logging
import re
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import display_hermes_home
from agent.skill_preprocessing import (
expand_inline_shell as _expand_inline_shell,
load_skills_config as _load_skills_config,
substitute_template_vars as _substitute_template_vars,
)
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
def build_plan_path(
user_instruction: str = "",
*,
now: datetime | None = None,
) -> Path:
"""Return the default workspace-relative markdown path for a /plan invocation.
Relative paths are intentional: file tools are task/backend-aware and resolve
them against the active working directory for local, docker, ssh, modal,
daytona, and similar terminal backends. That keeps the plan with the active
workspace instead of the Hermes host's global home directory.
"""
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
if slug:
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
slug = slug or "conversation-plan"
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
raw_identifier = (skill_identifier or "").strip()
@@ -42,9 +62,7 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
else:
normalized = raw_identifier.lstrip("/")
loaded_skill = json.loads(
skill_view(normalized, task_id=task_id, preprocess=False)
)
loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
except Exception:
return None
@@ -115,36 +133,14 @@ def _build_skill_message(
activation_note: str,
user_instruction: str = "",
runtime_note: str = "",
session_id: str | None = None,
) -> str:
"""Format a loaded skill into a user/system message payload."""
from tools.skills_tool import SKILLS_DIR
content = str(loaded_skill.get("content") or "")
# ── Template substitution and inline-shell expansion ──
# Done before anything else so downstream blocks (setup notes,
# supporting-file hints) see the expanded content.
skills_cfg = _load_skills_config()
if skills_cfg.get("template_vars", True):
content = _substitute_template_vars(content, skill_dir, session_id)
if skills_cfg.get("inline_shell", False):
timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
content = _expand_inline_shell(content, skill_dir, timeout)
parts = [activation_note, "", content.strip()]
# ── Inject the absolute skill directory so the agent can reference
# bundled scripts without an extra skill_view() round-trip. ──
if skill_dir:
parts.append("")
parts.append(f"[Skill directory: {skill_dir}]")
parts.append(
"Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
"`templates/config.yaml`) against that directory, then run them "
"with the terminal tool using the absolute path."
)
# ── Inject resolved skill config values ──
_inject_skill_config(loaded_skill, parts)
@@ -192,13 +188,11 @@ def _build_skill_message(
# Skill is from an external dir — use the skill name instead
skill_view_target = skill_dir.name
parts.append("")
parts.append("[This skill has supporting files:]")
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
for sf in supporting:
parts.append(f"- {sf} -> {skill_dir / sf}")
parts.append(f"- {sf}")
parts.append(
f'\nLoad any of these with skill_view(name="{skill_view_target}", '
f'file_path="<path>"), or run scripts directly by absolute path '
f"(e.g. `node {skill_dir}/scripts/foo.js`)."
f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
)
if user_instruction:
@@ -222,7 +216,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
_skill_commands = {}
try:
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
from agent.skill_utils import get_external_skills_dirs
disabled = _get_disabled_skill_names()
seen_names: set = set()
@@ -233,7 +227,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
dirs_to_scan.extend(get_external_skills_dirs())
for scan_dir in dirs_to_scan:
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
for skill_md in scan_dir.rglob("SKILL.md"):
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
continue
try:
@@ -329,7 +323,7 @@ def build_skill_invocation_message(
loaded_skill, skill_dir, skill_name = loaded
activation_note = (
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
"you to follow its instructions. The full skill content is loaded below.]"
)
return _build_skill_message(
@@ -338,7 +332,6 @@ def build_skill_invocation_message(
activation_note,
user_instruction=user_instruction,
runtime_note=runtime_note,
session_id=task_id,
)
@@ -368,7 +361,7 @@ def build_preloaded_skills_prompt(
loaded_skill, skill_dir, skill_name = loaded
activation_note = (
f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
"preloaded. Treat its instructions as active guidance for the duration of this "
"session unless the user overrides them.]"
)
@@ -377,7 +370,6 @@ def build_preloaded_skills_prompt(
loaded_skill,
skill_dir,
activation_note,
session_id=task_id,
)
)
loaded_names.append(skill_name)
-131
View File
@@ -1,131 +0,0 @@
"""Shared SKILL.md preprocessing helpers."""
import logging
import re
import subprocess
from pathlib import Path
logger = logging.getLogger(__name__)
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
# left as-is so the user can debug them.
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
# Matches inline shell snippets like: !`date +%Y-%m-%d`
# Non-greedy, single-line only -- no newlines inside the backticks.
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
# Cap inline-shell output so a runaway command can't blow out the context.
_INLINE_SHELL_MAX_OUTPUT = 4000
def load_skills_config() -> dict:
"""Load the ``skills`` section of config.yaml (best-effort)."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
skills_cfg = cfg.get("skills")
if isinstance(skills_cfg, dict):
return skills_cfg
except Exception:
logger.debug("Could not read skills config", exc_info=True)
return {}
def substitute_template_vars(
content: str,
skill_dir: Path | None,
session_id: str | None,
) -> str:
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
Only substitutes tokens for which a concrete value is available --
unresolved tokens are left in place so the author can spot them.
"""
if not content:
return content
skill_dir_str = str(skill_dir) if skill_dir else None
def _replace(match: re.Match) -> str:
token = match.group(1)
if token == "HERMES_SKILL_DIR" and skill_dir_str:
return skill_dir_str
if token == "HERMES_SESSION_ID" and session_id:
return str(session_id)
return match.group(0)
return _SKILL_TEMPLATE_RE.sub(_replace, content)
def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
"""Execute a single inline-shell snippet and return its stdout (trimmed).
Failures return a short ``[inline-shell error: ...]`` marker instead of
raising, so one bad snippet can't wreck the whole skill message.
"""
try:
completed = subprocess.run(
["bash", "-c", command],
cwd=str(cwd) if cwd else None,
capture_output=True,
text=True,
timeout=max(1, int(timeout)),
check=False,
)
except subprocess.TimeoutExpired:
return f"[inline-shell timeout after {timeout}s: {command}]"
except FileNotFoundError:
return "[inline-shell error: bash not found]"
except Exception as exc:
return f"[inline-shell error: {exc}]"
output = (completed.stdout or "").rstrip("\n")
if not output and completed.stderr:
output = completed.stderr.rstrip("\n")
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
return output
def expand_inline_shell(
content: str,
skill_dir: Path | None,
timeout: int,
) -> str:
"""Replace every !`cmd` snippet in ``content`` with its stdout.
Runs each snippet with the skill directory as CWD so relative paths in
the snippet work the way the author expects.
"""
if "!`" not in content:
return content
def _replace(match: re.Match) -> str:
cmd = match.group(1).strip()
if not cmd:
return ""
return run_inline_shell(cmd, skill_dir, timeout)
return _INLINE_SHELL_RE.sub(_replace, content)
def preprocess_skill_content(
content: str,
skill_dir: Path | None,
session_id: str | None = None,
skills_cfg: dict | None = None,
) -> str:
"""Apply configured SKILL.md template and inline-shell preprocessing."""
if not content:
return content
cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
if cfg.get("template_vars", True):
content = substitute_template_vars(content, skill_dir, session_id)
if cfg.get("inline_shell", False):
timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
content = expand_inline_shell(content, skill_dir, timeout)
return content
+1 -1
View File
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
Excludes ``.git``, ``.github``, ``.hub`` directories.
"""
matches = []
for root, dirs, files in os.walk(skills_dir, followlinks=True):
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
if filename in files:
matches.append(Path(root) / filename)
+195
View File
@@ -0,0 +1,195 @@
"""Helpers for optional cheap-vs-strong model routing."""
from __future__ import annotations
import os
import re
from typing import Any, Dict, Optional
from utils import is_truthy_value
_COMPLEX_KEYWORDS = {
"debug",
"debugging",
"implement",
"implementation",
"refactor",
"patch",
"traceback",
"stacktrace",
"exception",
"error",
"analyze",
"analysis",
"investigate",
"architecture",
"design",
"compare",
"benchmark",
"optimize",
"optimise",
"review",
"terminal",
"shell",
"tool",
"tools",
"pytest",
"test",
"tests",
"plan",
"planning",
"delegate",
"subagent",
"cron",
"docker",
"kubernetes",
}
_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
def _coerce_bool(value: Any, default: bool = False) -> bool:
return is_truthy_value(value, default=default)
def _coerce_int(value: Any, default: int) -> int:
try:
return int(value)
except (TypeError, ValueError):
return default
def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""Return the configured cheap-model route when a message looks simple.
Conservative by design: if the message has signs of code/tool/debugging/
long-form work, keep the primary model.
"""
cfg = routing_config or {}
if not _coerce_bool(cfg.get("enabled"), False):
return None
cheap_model = cfg.get("cheap_model") or {}
if not isinstance(cheap_model, dict):
return None
provider = str(cheap_model.get("provider") or "").strip().lower()
model = str(cheap_model.get("model") or "").strip()
if not provider or not model:
return None
text = (user_message or "").strip()
if not text:
return None
max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
max_words = _coerce_int(cfg.get("max_simple_words"), 28)
if len(text) > max_chars:
return None
if len(text.split()) > max_words:
return None
if text.count("\n") > 1:
return None
if "```" in text or "`" in text:
return None
if _URL_RE.search(text):
return None
lowered = text.lower()
words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
if words & _COMPLEX_KEYWORDS:
return None
route = dict(cheap_model)
route["provider"] = provider
route["model"] = model
route["routing_reason"] = "simple_turn"
return route
def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
"""Resolve the effective model/runtime for one turn.
Returns a dict with model/runtime/signature/label fields.
"""
route = choose_cheap_model_route(user_message, routing_config)
if not route:
return {
"model": primary.get("model"),
"runtime": {
"api_key": primary.get("api_key"),
"base_url": primary.get("base_url"),
"provider": primary.get("provider"),
"api_mode": primary.get("api_mode"),
"command": primary.get("command"),
"args": list(primary.get("args") or []),
"credential_pool": primary.get("credential_pool"),
},
"label": None,
"signature": (
primary.get("model"),
primary.get("provider"),
primary.get("base_url"),
primary.get("api_mode"),
primary.get("command"),
tuple(primary.get("args") or ()),
),
}
from hermes_cli.runtime_provider import resolve_runtime_provider
explicit_api_key = None
api_key_env = str(route.get("api_key_env") or "").strip()
if api_key_env:
explicit_api_key = os.getenv(api_key_env) or None
try:
runtime = resolve_runtime_provider(
requested=route.get("provider"),
explicit_api_key=explicit_api_key,
explicit_base_url=route.get("base_url"),
)
except Exception:
return {
"model": primary.get("model"),
"runtime": {
"api_key": primary.get("api_key"),
"base_url": primary.get("base_url"),
"provider": primary.get("provider"),
"api_mode": primary.get("api_mode"),
"command": primary.get("command"),
"args": list(primary.get("args") or []),
"credential_pool": primary.get("credential_pool"),
},
"label": None,
"signature": (
primary.get("model"),
primary.get("provider"),
primary.get("base_url"),
primary.get("api_mode"),
primary.get("command"),
tuple(primary.get("args") or ()),
),
}
return {
"model": route.get("model"),
"runtime": {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
"credential_pool": runtime.get("credential_pool"),
},
"label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
"signature": (
route.get("model"),
runtime.get("provider"),
runtime.get("base_url"),
runtime.get("api_mode"),
runtime.get("command"),
tuple(runtime.get("args") or ()),
),
}
+5 -34
View File
@@ -6,18 +6,12 @@ adds latency to the user-facing reply.
import logging
import threading
from typing import Callable, Optional
from typing import Optional
from agent.auxiliary_client import call_llm
logger = logging.getLogger(__name__)
# Callback signature: (task_name, exception) -> None. Used to surface
# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
# become visible instead of piling up as NULL session titles.
FailureCallback = Callable[[str, BaseException], None]
_TITLE_PROMPT = (
"Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
"following exchange. The title should capture the main topic or intent. "
@@ -25,21 +19,11 @@ _TITLE_PROMPT = (
)
def generate_title(
user_message: str,
assistant_response: str,
timeout: float = 30.0,
failure_callback: Optional[FailureCallback] = None,
) -> Optional[str]:
def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
"""Generate a session title from the first exchange.
Uses the auxiliary LLM client (cheapest/fastest available model).
Returns the title string or None on failure.
``failure_callback`` is invoked with ``(task, exception)`` when the
auxiliary call raises the caller typically wires this to
``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
of silently accumulating untitled sessions.
"""
# Truncate long messages to keep the request small
user_snippet = user_message[:500] if user_message else ""
@@ -54,7 +38,7 @@ def generate_title(
response = call_llm(
task="title_generation",
messages=messages,
max_tokens=500,
max_tokens=30,
temperature=0.3,
timeout=timeout,
)
@@ -68,15 +52,7 @@ def generate_title(
title = title[:77] + "..."
return title if title else None
except Exception as e:
# Log at WARNING so this shows up in agent.log without debug mode.
# Full detail at debug level for operators who need the stack.
logger.warning("Title generation failed: %s", e)
logger.debug("Title generation traceback", exc_info=True)
if failure_callback is not None:
try:
failure_callback("title generation", e)
except Exception:
logger.debug("Title generation failure_callback raised", exc_info=True)
logger.debug("Title generation failed: %s", e)
return None
@@ -85,7 +61,6 @@ def auto_title_session(
session_id: str,
user_message: str,
assistant_response: str,
failure_callback: Optional[FailureCallback] = None,
) -> None:
"""Generate and set a session title if one doesn't already exist.
@@ -106,9 +81,7 @@ def auto_title_session(
except Exception:
return
title = generate_title(
user_message, assistant_response, failure_callback=failure_callback
)
title = generate_title(user_message, assistant_response)
if not title:
return
@@ -125,7 +98,6 @@ def maybe_auto_title(
user_message: str,
assistant_response: str,
conversation_history: list,
failure_callback: Optional[FailureCallback] = None,
) -> None:
"""Fire-and-forget title generation after the first exchange.
@@ -147,7 +119,6 @@ def maybe_auto_title(
thread = threading.Thread(
target=auto_title_session,
args=(session_db, session_id, user_message, assistant_response),
kwargs={"failure_callback": failure_callback},
daemon=True,
name="auto-title",
)
-68
View File
@@ -1,68 +0,0 @@
"""Transport layer types and registry for provider response normalization.
Usage:
from agent.transports import get_transport
transport = get_transport("anthropic_messages")
result = transport.normalize_response(raw_response)
"""
from agent.transports.types import (
NormalizedResponse,
ToolCall,
Usage,
build_tool_call,
map_finish_reason,
) # noqa: F401
_REGISTRY: dict = {}
_discovered: bool = False
def register_transport(api_mode: str, transport_cls: type) -> None:
"""Register a transport class for an api_mode string."""
_REGISTRY[api_mode] = transport_cls
def get_transport(api_mode: str):
"""Get a transport instance for the given api_mode.
Returns None if no transport is registered for this api_mode.
This allows gradual migration call sites can check for None
and fall back to the legacy code path.
"""
global _discovered
if not _discovered:
_discover_transports()
cls = _REGISTRY.get(api_mode)
if cls is None:
# The registry can be partially populated when a specific transport
# module was imported directly (for example chat_completions before
# codex). Discover on misses, not only when the registry is empty, so
# test/order-dependent imports do not make valid api_modes unavailable.
_discover_transports()
cls = _REGISTRY.get(api_mode)
if cls is None:
return None
return cls()
def _discover_transports() -> None:
"""Import all transport modules to trigger auto-registration."""
global _discovered
_discovered = True
try:
import agent.transports.anthropic # noqa: F401
except ImportError:
pass
try:
import agent.transports.codex # noqa: F401
except ImportError:
pass
try:
import agent.transports.chat_completions # noqa: F401
except ImportError:
pass
try:
import agent.transports.bedrock # noqa: F401
except ImportError:
pass
-177
View File
@@ -1,177 +0,0 @@
"""Anthropic Messages API transport.
Delegates to the existing adapter functions in agent/anthropic_adapter.py.
This transport owns format conversion and normalization NOT client lifecycle.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse
class AnthropicTransport(ProviderTransport):
"""Transport for api_mode='anthropic_messages'.
Wraps the existing functions in anthropic_adapter.py behind the
ProviderTransport ABC. Each method delegates no logic is duplicated.
"""
@property
def api_mode(self) -> str:
return "anthropic_messages"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI messages to Anthropic (system, messages) tuple.
kwargs:
base_url: Optional[str] affects thinking signature handling.
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
base_url = kwargs.get("base_url")
return convert_messages_to_anthropic(messages, base_url=base_url)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Anthropic input_schema format."""
from agent.anthropic_adapter import convert_tools_to_anthropic
return convert_tools_to_anthropic(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Anthropic messages.create() kwargs.
Calls convert_messages and convert_tools internally.
params (all optional):
max_tokens: int
reasoning_config: dict | None
tool_choice: str | None
is_oauth: bool
preserve_dots: bool
context_length: int | None
base_url: str | None
fast_mode: bool
"""
from agent.anthropic_adapter import build_anthropic_kwargs
return build_anthropic_kwargs(
model=model,
messages=messages,
tools=tools,
max_tokens=params.get("max_tokens", 16384),
reasoning_config=params.get("reasoning_config"),
tool_choice=params.get("tool_choice"),
is_oauth=params.get("is_oauth", False),
preserve_dots=params.get("preserve_dots", False),
context_length=params.get("context_length"),
base_url=params.get("base_url"),
fast_mode=params.get("fast_mode", False),
)
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Anthropic response to NormalizedResponse.
Parses content blocks (text, thinking, tool_use), maps stop_reason
to OpenAI finish_reason, and collects reasoning_details in provider_data.
"""
import json
from agent.anthropic_adapter import _to_plain_data
from agent.transports.types import ToolCall
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
_MCP_PREFIX = "mcp_"
text_parts = []
reasoning_parts = []
reasoning_details = []
tool_calls = []
for block in response.content:
if block.type == "text":
text_parts.append(block.text)
elif block.type == "thinking":
reasoning_parts.append(block.thinking)
block_dict = _to_plain_data(block)
if isinstance(block_dict, dict):
reasoning_details.append(block_dict)
elif block.type == "tool_use":
name = block.name
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
name = name[len(_MCP_PREFIX):]
tool_calls.append(
ToolCall(
id=block.id,
name=name,
arguments=json.dumps(block.input),
)
)
finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
provider_data = {}
if reasoning_details:
provider_data["reasoning_details"] = reasoning_details
return NormalizedResponse(
content="\n".join(text_parts) if text_parts else None,
tool_calls=tool_calls or None,
finish_reason=finish_reason,
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
usage=None,
provider_data=provider_data or None,
)
def validate_response(self, response: Any) -> bool:
"""Check Anthropic response structure is valid.
An empty content list is legitimate when ``stop_reason == "end_turn"``
the model's canonical way of signalling "nothing more to add" after
a tool turn that already delivered the user-facing text. Treating it
as invalid falsely retries a completed response.
"""
if response is None:
return False
content_blocks = getattr(response, "content", None)
if not isinstance(content_blocks, list):
return False
if not content_blocks:
return getattr(response, "stop_reason", None) == "end_turn"
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Extract Anthropic cache_read and cache_creation token counts."""
usage = getattr(response, "usage", None)
if usage is None:
return None
cached = getattr(usage, "cache_read_input_tokens", 0) or 0
written = getattr(usage, "cache_creation_input_tokens", 0) or 0
if cached or written:
return {"cached_tokens": cached, "creation_tokens": written}
return None
# Promote the adapter's canonical mapping to module level so it's shared
_STOP_REASON_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Anthropic stop_reason to OpenAI finish_reason."""
return self._STOP_REASON_MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("anthropic_messages", AnthropicTransport)
-89
View File
@@ -1,89 +0,0 @@
"""Abstract base for provider transports.
A transport owns the data path for one api_mode:
convert_messages convert_tools build_kwargs normalize_response
It does NOT own: client construction, streaming, credential refresh,
prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
"""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from agent.transports.types import NormalizedResponse
class ProviderTransport(ABC):
"""Base class for provider-specific format conversion and normalization."""
@property
@abstractmethod
def api_mode(self) -> str:
"""The api_mode string this transport handles (e.g. 'anthropic_messages')."""
...
@abstractmethod
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI-format messages to provider-native format.
Returns provider-specific structure (e.g. (system, messages) for Anthropic,
or the messages list unchanged for chat_completions).
"""
...
@abstractmethod
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI-format tool definitions to provider-native format.
Returns provider-specific tool list (e.g. Anthropic input_schema format).
"""
...
@abstractmethod
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build the complete API call kwargs dict.
This is the primary entry point it typically calls convert_messages()
and convert_tools() internally, then adds model-specific config.
Returns a dict ready to be passed to the provider's SDK client.
"""
...
@abstractmethod
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize a raw provider response to the shared NormalizedResponse type.
This is the only method that returns a transport-layer type.
"""
...
def validate_response(self, response: Any) -> bool:
"""Optional: check if the raw response is structurally valid.
Returns True if valid, False if the response should be treated as invalid.
Default implementation always returns True.
"""
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Optional: extract provider-specific cache hit/creation stats.
Returns dict with 'cached_tokens' and 'creation_tokens', or None.
Default returns None.
"""
return None
def map_finish_reason(self, raw_reason: str) -> str:
"""Optional: map provider-specific stop reason to OpenAI equivalent.
Default returns the raw reason unchanged. Override for providers
with different stop reason vocabularies.
"""
return raw_reason
-154
View File
@@ -1,154 +0,0 @@
"""AWS Bedrock Converse API transport.
Delegates to the existing adapter functions in agent/bedrock_adapter.py.
Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
owns format conversion and normalization, while client construction and
boto3 calls stay on AIAgent.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage
class BedrockTransport(ProviderTransport):
"""Transport for api_mode='bedrock_converse'."""
@property
def api_mode(self) -> str:
return "bedrock_converse"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI messages to Bedrock Converse format."""
from agent.bedrock_adapter import convert_messages_to_converse
return convert_messages_to_converse(messages)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
from agent.bedrock_adapter import convert_tools_to_converse
return convert_tools_to_converse(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Bedrock converse() kwargs.
Calls convert_messages and convert_tools internally.
params:
max_tokens: int output token limit (default 4096)
temperature: float | None
guardrail_config: dict | None Bedrock guardrails
region: str AWS region (default 'us-east-1')
"""
from agent.bedrock_adapter import build_converse_kwargs
region = params.get("region", "us-east-1")
guardrail = params.get("guardrail_config")
kwargs = build_converse_kwargs(
model=model,
messages=messages,
tools=tools,
max_tokens=params.get("max_tokens", 4096),
temperature=params.get("temperature"),
guardrail_config=guardrail,
)
# Sentinel keys for dispatch — agent pops these before the boto3 call
kwargs["__bedrock_converse__"] = True
kwargs["__bedrock_region__"] = region
return kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Bedrock response to NormalizedResponse.
Handles two shapes:
1. Raw boto3 dict (from direct converse() calls)
2. Already-normalized SimpleNamespace with .choices (from dispatch site)
"""
from agent.bedrock_adapter import normalize_converse_response
# Normalize to OpenAI-compatible SimpleNamespace
if hasattr(response, "choices") and response.choices:
# Already normalized at dispatch site
ns = response
else:
# Raw boto3 dict
ns = normalize_converse_response(response)
choice = ns.choices[0]
msg = choice.message
finish_reason = choice.finish_reason or "stop"
tool_calls = None
if msg.tool_calls:
tool_calls = [
ToolCall(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
)
for tc in msg.tool_calls
]
usage = None
if hasattr(ns, "usage") and ns.usage:
u = ns.usage
usage = Usage(
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
total_tokens=getattr(u, "total_tokens", 0) or 0,
)
reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
return NormalizedResponse(
content=msg.content,
tool_calls=tool_calls,
finish_reason=finish_reason,
reasoning=reasoning,
usage=usage,
)
def validate_response(self, response: Any) -> bool:
"""Check Bedrock response structure.
After normalize_converse_response, the response has OpenAI-compatible
.choices same check as chat_completions.
"""
if response is None:
return False
# Raw Bedrock dict response — check for 'output' key
if isinstance(response, dict):
return "output" in response
# Already-normalized SimpleNamespace
if hasattr(response, "choices"):
return bool(response.choices)
return False
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Bedrock stop reason to OpenAI finish_reason.
The adapter already does this mapping inside normalize_converse_response,
so this is only used for direct access to raw responses.
"""
_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"guardrail_intervened": "content_filter",
"content_filtered": "content_filter",
}
return _MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("bedrock_converse", BedrockTransport)
-402
View File
@@ -1,402 +0,0 @@
"""OpenAI Chat Completions transport.
Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
Messages and tools are already in OpenAI format convert_messages and
convert_tools are near-identity. The complexity lives in build_kwargs
which has provider-specific conditionals for max_tokens defaults,
reasoning configuration, temperature handling, and extra_body assembly.
"""
import copy
from typing import Any
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage
class ChatCompletionsTransport(ProviderTransport):
"""Transport for api_mode='chat_completions'.
The default path for OpenAI-compatible providers.
"""
@property
def api_mode(self) -> str:
return "chat_completions"
def convert_messages(
self, messages: list[dict[str, Any]], **kwargs
) -> list[dict[str, Any]]:
"""Messages are already in OpenAI format — sanitize Codex leaks only.
Strips Codex Responses API fields (``codex_reasoning_items`` /
``codex_message_items`` on the message, ``call_id``/``response_item_id``
on tool_calls) that strict chat-completions providers reject with 400/422.
"""
needs_sanitize = False
for msg in messages:
if not isinstance(msg, dict):
continue
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
needs_sanitize = True
break
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict) and (
"call_id" in tc or "response_item_id" in tc
):
needs_sanitize = True
break
if needs_sanitize:
break
if not needs_sanitize:
return messages
sanitized = copy.deepcopy(messages)
for msg in sanitized:
if not isinstance(msg, dict):
continue
msg.pop("codex_reasoning_items", None)
msg.pop("codex_message_items", None)
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict):
tc.pop("call_id", None)
tc.pop("response_item_id", None)
return sanitized
def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Tools are already in OpenAI format — identity."""
return tools
def build_kwargs(
self,
model: str,
messages: list[dict[str, Any]],
tools: list[dict[str, Any]] | None = None,
**params,
) -> dict[str, Any]:
"""Build chat.completions.create() kwargs.
params (all optional):
timeout: float API call timeout
max_tokens: int | None user-configured max tokens
ephemeral_max_output_tokens: int | None one-shot override
max_tokens_param_fn: callable returns {max_tokens: N} or {max_completion_tokens: N}
reasoning_config: dict | None
request_overrides: dict | None
session_id: str | None
model_lower: str lowercase model name for pattern matching
# Provider profile path (all per-provider quirks live in providers/)
provider_profile: ProviderProfile | None when present, delegates to
_build_kwargs_from_profile(); all flag params below are bypassed.
# Remaining flags — only used by the legacy fallback for unregistered
# providers (i.e. get_provider_profile() returned None). Known
# providers all go through provider_profile.
qwen_session_metadata: dict | None
supports_reasoning: bool
anthropic_max_output: int | None
extra_body_additions: dict | None
"""
# Codex sanitization: drop reasoning_items / call_id / response_item_id
sanitized = self.convert_messages(messages)
# ── Provider profile: single-path when present ──────────────────
_profile = params.get("provider_profile")
if _profile:
return self._build_kwargs_from_profile(
_profile, model, sanitized, tools, params
)
# ── Legacy fallback (unregistered / unknown provider) ───────────
# Reached only when get_provider_profile() returned None.
# Known providers always go through the profile path above.
# Developer role swap for GPT-5/Codex models
model_lower = params.get("model_lower", (model or "").lower())
if (
sanitized
and isinstance(sanitized[0], dict)
and sanitized[0].get("role") == "system"
and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
):
sanitized = list(sanitized)
sanitized[0] = {**sanitized[0], "role": "developer"}
api_kwargs: dict[str, Any] = {
"model": model,
"messages": sanitized,
}
timeout = params.get("timeout")
if timeout is not None:
api_kwargs["timeout"] = timeout
# Tools
if tools:
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
# etc.) compatible, in addition to direct moonshot.ai endpoints.
if is_moonshot_model(model):
tools = sanitize_moonshot_tools(tools)
api_kwargs["tools"] = tools
# max_tokens resolution — priority: ephemeral > user > provider default
max_tokens_fn = params.get("max_tokens_param_fn")
ephemeral = params.get("ephemeral_max_output_tokens")
max_tokens = params.get("max_tokens")
anthropic_max_out = params.get("anthropic_max_output")
if ephemeral is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(ephemeral))
elif max_tokens is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(max_tokens))
elif anthropic_max_out is not None:
api_kwargs["max_tokens"] = anthropic_max_out
# extra_body assembly
extra_body: dict[str, Any] = {}
# Generic reasoning passthrough for unknown providers
if params.get("supports_reasoning", False):
reasoning_config = params.get("reasoning_config")
if reasoning_config is not None:
extra_body["reasoning"] = dict(reasoning_config)
else:
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
# Merge any pre-built extra_body additions
additions = params.get("extra_body_additions")
if additions:
extra_body.update(additions)
if extra_body:
api_kwargs["extra_body"] = extra_body
# Request overrides last (service_tier etc.)
overrides = params.get("request_overrides")
if overrides:
api_kwargs.update(overrides)
return api_kwargs
def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
"""Build API kwargs using a ProviderProfile — single path, no legacy flags.
This method replaces the entire flag-based kwargs assembly when a
provider_profile is passed. Every quirk comes from the profile object.
"""
from providers.base import OMIT_TEMPERATURE
# Message preprocessing
sanitized = profile.prepare_messages(sanitized)
# Developer role swap — model-name-based, applies to all providers
_model_lower = (model or "").lower()
if (
sanitized
and isinstance(sanitized[0], dict)
and sanitized[0].get("role") == "system"
and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
):
sanitized = list(sanitized)
sanitized[0] = {**sanitized[0], "role": "developer"}
api_kwargs: dict[str, Any] = {
"model": model,
"messages": sanitized,
}
# Temperature
if profile.fixed_temperature is OMIT_TEMPERATURE:
pass # Don't include temperature at all
elif profile.fixed_temperature is not None:
api_kwargs["temperature"] = profile.fixed_temperature
else:
# Use caller's temperature if provided
temp = params.get("temperature")
if temp is not None:
api_kwargs["temperature"] = temp
# Timeout
timeout = params.get("timeout")
if timeout is not None:
api_kwargs["timeout"] = timeout
# Tools — apply Moonshot/Kimi schema sanitization regardless of path
if tools:
if is_moonshot_model(model):
tools = sanitize_moonshot_tools(tools)
api_kwargs["tools"] = tools
# max_tokens resolution — priority: ephemeral > user > profile default
max_tokens_fn = params.get("max_tokens_param_fn")
ephemeral = params.get("ephemeral_max_output_tokens")
user_max = params.get("max_tokens")
anthropic_max = params.get("anthropic_max_output")
if ephemeral is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(ephemeral))
elif user_max is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(user_max))
elif profile.default_max_tokens and max_tokens_fn:
api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
elif anthropic_max is not None:
api_kwargs["max_tokens"] = anthropic_max
# Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
reasoning_config = params.get("reasoning_config")
extra_body_from_profile, top_level_from_profile = (
profile.build_api_kwargs_extras(
reasoning_config=reasoning_config,
supports_reasoning=params.get("supports_reasoning", False),
qwen_session_metadata=params.get("qwen_session_metadata"),
model=model,
ollama_num_ctx=params.get("ollama_num_ctx"),
)
)
api_kwargs.update(top_level_from_profile)
# extra_body assembly
extra_body: dict[str, Any] = {}
# Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
profile_body = profile.build_extra_body(
session_id=params.get("session_id"),
provider_preferences=params.get("provider_preferences"),
)
if profile_body:
extra_body.update(profile_body)
# Profile's reasoning/thinking extra_body entries
if extra_body_from_profile:
extra_body.update(extra_body_from_profile)
# Merge any pre-built extra_body additions from the caller
additions = params.get("extra_body_additions")
if additions:
extra_body.update(additions)
# Request overrides (user config)
overrides = params.get("request_overrides")
if overrides:
for k, v in overrides.items():
if k == "extra_body" and isinstance(v, dict):
extra_body.update(v)
else:
api_kwargs[k] = v
if extra_body:
api_kwargs["extra_body"] = extra_body
return api_kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
For chat_completions, this is near-identity the response is already
in OpenAI format. extra_content on tool_calls (Gemini thought_signature)
is preserved via ToolCall.provider_data. reasoning_details (OpenRouter
unified format) and reasoning_content (DeepSeek/Moonshot) are also
preserved for downstream replay.
"""
choice = response.choices[0]
msg = choice.message
finish_reason = choice.finish_reason or "stop"
tool_calls = None
if msg.tool_calls:
tool_calls = []
for tc in msg.tool_calls:
# Preserve provider-specific extras on the tool call.
# Gemini 3 thinking models attach extra_content with
# thought_signature — without replay on the next turn the API
# rejects the request with 400.
tc_provider_data: dict[str, Any] = {}
extra = getattr(tc, "extra_content", None)
if extra is None and hasattr(tc, "model_extra"):
extra = (tc.model_extra or {}).get("extra_content")
if extra is not None:
if hasattr(extra, "model_dump"):
try:
extra = extra.model_dump()
except Exception:
pass
tc_provider_data["extra_content"] = extra
tool_calls.append(
ToolCall(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
provider_data=tc_provider_data or None,
)
)
usage = None
if hasattr(response, "usage") and response.usage:
u = response.usage
usage = Usage(
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
total_tokens=getattr(u, "total_tokens", 0) or 0,
)
# Preserve reasoning fields separately. DeepSeek/Moonshot use
# ``reasoning_content``; others use ``reasoning``. Downstream code
# (_extract_reasoning, thinking-prefill retry) reads both distinctly,
# so keep them apart in provider_data rather than merging.
reasoning = getattr(msg, "reasoning", None)
reasoning_content = getattr(msg, "reasoning_content", None)
provider_data: dict[str, Any] = {}
if reasoning_content:
provider_data["reasoning_content"] = reasoning_content
rd = getattr(msg, "reasoning_details", None)
if rd:
provider_data["reasoning_details"] = rd
return NormalizedResponse(
content=msg.content,
tool_calls=tool_calls,
finish_reason=finish_reason,
reasoning=reasoning,
usage=usage,
provider_data=provider_data or None,
)
def validate_response(self, response: Any) -> bool:
"""Check that response has valid choices."""
if response is None:
return False
if not hasattr(response, "choices") or response.choices is None:
return False
if not response.choices:
return False
return True
def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
usage = getattr(response, "usage", None)
if usage is None:
return None
details = getattr(usage, "prompt_tokens_details", None)
if details is None:
return None
cached = getattr(details, "cached_tokens", 0) or 0
written = getattr(details, "cache_write_tokens", 0) or 0
if cached or written:
return {"cached_tokens": cached, "creation_tokens": written}
return None
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("chat_completions", ChatCompletionsTransport)
-237
View File
@@ -1,237 +0,0 @@
"""OpenAI Responses API (Codex) transport.
Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
This transport owns format conversion and normalization NOT client lifecycle,
streaming, or the _run_codex_stream() call path.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage
class ResponsesApiTransport(ProviderTransport):
"""Transport for api_mode='codex_responses'.
Wraps the functions extracted into codex_responses_adapter.py (PR 1).
"""
@property
def api_mode(self) -> str:
return "codex_responses"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI chat messages to Responses API input items."""
from agent.codex_responses_adapter import _chat_messages_to_responses_input
return _chat_messages_to_responses_input(messages)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Responses API function definitions."""
from agent.codex_responses_adapter import _responses_tools
return _responses_tools(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Responses API kwargs.
Calls convert_messages and convert_tools internally.
params:
instructions: str system prompt (extracted from messages[0] if not given)
reasoning_config: dict | None {effort, enabled}
session_id: str | None used for prompt_cache_key + xAI conv header
max_tokens: int | None max_output_tokens
request_overrides: dict | None extra kwargs merged in
provider: str | None provider name for backend-specific logic
base_url: str | None endpoint URL
base_url_hostname: str | None hostname for backend detection
is_github_responses: bool Copilot/GitHub models backend
is_codex_backend: bool chatgpt.com/backend-api/codex
is_xai_responses: bool xAI/Grok backend
github_reasoning_extra: dict | None Copilot reasoning params
"""
from agent.codex_responses_adapter import (
_chat_messages_to_responses_input,
_responses_tools,
)
from run_agent import DEFAULT_AGENT_IDENTITY
instructions = params.get("instructions", "")
payload_messages = messages
if not instructions:
if messages and messages[0].get("role") == "system":
instructions = str(messages[0].get("content") or "").strip()
payload_messages = messages[1:]
if not instructions:
instructions = DEFAULT_AGENT_IDENTITY
is_github_responses = params.get("is_github_responses", False)
is_codex_backend = params.get("is_codex_backend", False)
is_xai_responses = params.get("is_xai_responses", False)
# Resolve reasoning effort
reasoning_effort = "medium"
reasoning_enabled = True
reasoning_config = params.get("reasoning_config")
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is False:
reasoning_enabled = False
elif reasoning_config.get("effort"):
reasoning_effort = reasoning_config["effort"]
_effort_clamp = {"minimal": "low"}
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
kwargs = {
"model": model,
"instructions": instructions,
"input": _chat_messages_to_responses_input(payload_messages),
"tools": _responses_tools(tools),
"tool_choice": "auto",
"parallel_tool_calls": True,
"store": False,
}
session_id = params.get("session_id")
if not is_github_responses and session_id:
kwargs["prompt_cache_key"] = session_id
if reasoning_enabled and is_xai_responses:
kwargs["include"] = ["reasoning.encrypted_content"]
elif reasoning_enabled:
if is_github_responses:
github_reasoning = params.get("github_reasoning_extra")
if github_reasoning is not None:
kwargs["reasoning"] = github_reasoning
else:
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
kwargs["include"] = ["reasoning.encrypted_content"]
elif not is_github_responses and not is_xai_responses:
kwargs["include"] = []
request_overrides = params.get("request_overrides")
if request_overrides:
kwargs.update(request_overrides)
if is_codex_backend:
prompt_cache_key = kwargs.get("prompt_cache_key")
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
if cache_scope_id:
existing_extra_headers = kwargs.get("extra_headers")
merged_extra_headers: Dict[str, str] = {}
if isinstance(existing_extra_headers, dict):
merged_extra_headers.update(
{
str(key): str(value)
for key, value in existing_extra_headers.items()
if key and value is not None
}
)
merged_extra_headers["session_id"] = cache_scope_id
merged_extra_headers["x-client-request-id"] = cache_scope_id
kwargs["extra_headers"] = merged_extra_headers
max_tokens = params.get("max_tokens")
if max_tokens is not None and not is_codex_backend:
kwargs["max_output_tokens"] = max_tokens
if is_xai_responses and session_id:
kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
return kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Codex Responses API response to NormalizedResponse."""
from agent.codex_responses_adapter import (
_normalize_codex_response,
_extract_responses_message_text,
_extract_responses_reasoning_text,
)
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
msg, finish_reason = _normalize_codex_response(response)
tool_calls = None
if msg and msg.tool_calls:
tool_calls = []
for tc in msg.tool_calls:
provider_data = {}
if hasattr(tc, "call_id") and tc.call_id:
provider_data["call_id"] = tc.call_id
if hasattr(tc, "response_item_id") and tc.response_item_id:
provider_data["response_item_id"] = tc.response_item_id
tool_calls.append(ToolCall(
id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
provider_data=provider_data or None,
))
# Extract reasoning items for provider_data
provider_data = {}
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
provider_data["codex_message_items"] = msg.codex_message_items
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
provider_data["reasoning_details"] = msg.reasoning_details
return NormalizedResponse(
content=msg.content if msg else None,
tool_calls=tool_calls,
finish_reason=finish_reason or "stop",
reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
usage=None, # Codex usage is extracted separately in normalize_usage()
provider_data=provider_data or None,
)
def validate_response(self, response: Any) -> bool:
"""Check Codex Responses API response has valid output structure.
Returns True only if response.output is a non-empty list.
Does NOT check output_text fallback the caller handles that
with diagnostic logging for stream backfill recovery.
"""
if response is None:
return False
output = getattr(response, "output", None)
if not isinstance(output, list) or not output:
return False
return True
def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
"""Validate and sanitize Codex API kwargs before the call.
Normalizes input items, strips unsupported fields, validates structure.
"""
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Codex response.status to OpenAI finish_reason.
Codex uses response.status ('completed', 'incomplete') +
response.incomplete_details.reason for granular mapping.
This method handles the simple status string; the caller
should check incomplete_details separately for 'max_output_tokens'.
"""
_MAP = {
"completed": "stop",
"incomplete": "length",
"failed": "stop",
"cancelled": "stop",
}
return _MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("codex_responses", ResponsesApiTransport)
-162
View File
@@ -1,162 +0,0 @@
"""Shared types for normalized provider responses.
These dataclasses define the canonical shape that all provider adapters
normalize responses to. The shared surface is intentionally minimal
only fields that every downstream consumer reads are top-level.
Protocol-specific state goes in ``provider_data`` dicts (response-level
and per-tool-call) so that protocol-aware code paths can access it
without polluting the shared type.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any
@dataclass
class ToolCall:
"""A normalized tool call from any provider.
``id`` is the protocol's canonical identifier — what gets used in
``tool_call_id`` / ``tool_use_id`` when constructing tool result
messages. May be ``None`` when the provider omits it; the agent
fills it via ``_deterministic_call_id()`` before storing in history.
``provider_data`` carries per-tool-call protocol metadata that only
protocol-aware code reads:
* Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
* Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
* Others: ``None``
"""
id: str | None
name: str
arguments: str # JSON string
provider_data: dict[str, Any] | None = field(default=None, repr=False)
# ── Backward compatibility ──────────────────────────────────
# The agent loop reads tc.function.name / tc.function.arguments
# throughout run_agent.py (45+ sites). These properties let
# NormalizedResponse pass through without the _nr_to_assistant_message
# shim, while keeping ToolCall's canonical fields flat.
@property
def type(self) -> str:
return "function"
@property
def function(self) -> ToolCall:
"""Return self so tc.function.name / tc.function.arguments work."""
return self
@property
def call_id(self) -> str | None:
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
return (self.provider_data or {}).get("call_id")
@property
def response_item_id(self) -> str | None:
"""Codex response_item_id from provider_data."""
return (self.provider_data or {}).get("response_item_id")
@property
def extra_content(self) -> Optional[Dict[str, Any]]:
"""Gemini extra_content (thought_signature) from provider_data.
Gemini 3 thinking models attach ``extra_content`` with a
``thought_signature`` to each tool call. This signature must be
replayed on subsequent API calls without it the API rejects the
request with HTTP 400. The chat_completions transport stores this
in ``provider_data["extra_content"]``; this property exposes it so
``_build_assistant_message`` can ``getattr(tc, "extra_content")``
uniformly.
"""
return (self.provider_data or {}).get("extra_content")
@dataclass
class Usage:
"""Token usage from an API response."""
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
cached_tokens: int = 0
@dataclass
class NormalizedResponse:
"""Normalized API response from any provider.
Shared fields are truly cross-provider every caller can rely on
them without branching on api_mode. Protocol-specific state goes in
``provider_data`` so that only protocol-aware code paths read it.
Response-level ``provider_data`` examples:
* Anthropic: ``{"reasoning_details": [...]}``
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
* Others: ``None``
"""
content: str | None
tool_calls: list[ToolCall] | None
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
reasoning: str | None = None
usage: Usage | None = None
provider_data: dict[str, Any] | None = field(default=None, repr=False)
# ── Backward compatibility ──────────────────────────────────
# The shim _nr_to_assistant_message() mapped these from provider_data.
# These properties let NormalizedResponse pass through directly.
@property
def reasoning_content(self) -> str | None:
pd = self.provider_data or {}
return pd.get("reasoning_content")
@property
def reasoning_details(self):
pd = self.provider_data or {}
return pd.get("reasoning_details")
@property
def codex_reasoning_items(self):
pd = self.provider_data or {}
return pd.get("codex_reasoning_items")
@property
def codex_message_items(self):
pd = self.provider_data or {}
return pd.get("codex_message_items")
# ---------------------------------------------------------------------------
# Factory helpers
# ---------------------------------------------------------------------------
def build_tool_call(
id: str | None,
name: str,
arguments: Any,
**provider_fields: Any,
) -> ToolCall:
"""Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
Any extra keyword arguments are collected into ``provider_data``.
"""
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
pd = dict(provider_fields) if provider_fields else None
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
"""Translate a provider-specific stop reason to the normalised set.
Falls back to ``"stop"`` for unknown or ``None`` reasons.
"""
if reason is None:
return "stop"
return mapping.get(reason, "stop")
+1 -14
View File
@@ -6,7 +6,6 @@ from decimal import Decimal
from typing import Any, Dict, Literal, Optional
from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
from utils import base_url_host_matches
DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
@@ -394,7 +393,7 @@ def resolve_billing_route(
if provider_name == "openai-codex":
return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
if provider_name == "openrouter" or "openrouter.ai" in base:
return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
if provider_name == "anthropic":
return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
@@ -533,22 +532,10 @@ def normalize_usage(
prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
details = getattr(response_usage, "prompt_tokens_details", None)
# Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
# top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
# AI Gateway, Cline) expose when routing Claude models — without this
# fallback, cache writes are undercounted as 0 and cache reads can be
# missed when the proxy only surfaces them at the top level.
# Port of cline/cline#10266.
cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
if not cache_read_tokens:
cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
cache_write_tokens = _to_int(
getattr(details, "cache_write_tokens", 0) if details else 0
)
if not cache_write_tokens:
cache_write_tokens = _to_int(
getattr(response_usage, "cache_creation_input_tokens", 0)
)
input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)
reasoning_tokens = 0
+10 -7
View File
@@ -444,7 +444,6 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
if not reasoning.get("has_any_reasoning", True):
print(f" 🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
discarded_no_reasoning += 1
completed_in_batch.append(prompt_index)
continue
# Get and normalize tool stats for consistent schema across all entries
@@ -951,9 +950,13 @@ class BatchRunner:
root_logger.setLevel(original_level)
# Aggregate all batch statistics and update checkpoint
all_completed_prompts = list(completed_prompts_set)
total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
for batch_result in results:
# Add newly completed prompts
all_completed_prompts.extend(batch_result.get("completed_prompts", []))
# Aggregate tool stats
for tool_name, stats in batch_result.get("tool_stats", {}).items():
if tool_name not in total_tool_stats:
@@ -973,7 +976,7 @@ class BatchRunner:
# Save final checkpoint (best-effort; incremental writes already happened)
try:
checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
checkpoint_data["completed_prompts"] = all_completed_prompts
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
except Exception as ckpt_err:
print(f"⚠️ Warning: Failed to save final checkpoint: {ckpt_err}")
@@ -1186,12 +1189,12 @@ def main(
"""
# Handle list distributions
if list_distributions:
from toolset_distributions import print_distribution_info
from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
print("📊 Available Toolset Distributions")
print("=" * 70)
all_dists = list_distributions()
all_dists = get_all_dists()
for dist_name in sorted(all_dists.keys()):
print_distribution_info(dist_name)
+25 -137
View File
@@ -63,38 +63,7 @@ model:
# Leave unset to use the model's native output ceiling (recommended).
# Set only if you want to deliberately limit individual response length.
#
# max_tokens: 8192
# Named provider overrides (optional)
# Use this for per-provider request timeouts, non-stream stale timeouts,
# and per-model exceptions.
# Applies to the primary turn client on every api_mode (OpenAI-wire, native
# Anthropic, and Anthropic-compatible providers), the fallback chain, and
# client rebuilds during credential rotation. For OpenAI-wire chat
# completions (streaming and non-streaming) the configured value is also
# used as the per-request ``timeout=`` kwarg so it wins over the legacy
# HERMES_API_TIMEOUT env var (which still applies when no config is set).
# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and
# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these
# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s).
#
# Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock
# SDK paths) — those use boto3 with its own timeout configuration.
#
# providers:
# ollama-local:
# request_timeout_seconds: 300 # Longer timeout for local cold-starts
# stale_timeout_seconds: 900 # Explicitly re-enable stale detection on local endpoints
# anthropic:
# request_timeout_seconds: 30 # Fast-fail cloud requests
# models:
# claude-opus-4.6:
# timeout_seconds: 600 # Longer timeout for extended-thinking Opus calls
# openai-codex:
# models:
# gpt-5.4:
# stale_timeout_seconds: 1800 # Longer non-stream stale timeout for slow large-context turns
# max_tokens: 8192
# =============================================================================
# OpenRouter Provider Routing (only applies when using OpenRouter)
@@ -122,6 +91,20 @@ model:
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
# # data_collection: "deny"
# =============================================================================
# Smart Model Routing (optional)
# =============================================================================
# Use a cheaper model for short/simple turns while keeping your main model for
# more complex requests. Disabled by default.
#
# smart_model_routing:
# enabled: true
# max_simple_chars: 160
# max_simple_words: 28
# cheap_model:
# provider: openrouter
# model: google/gemini-2.5-flash
# =============================================================================
# Git Worktree Isolation
# =============================================================================
@@ -326,16 +309,6 @@ compression:
# To pin a specific model/provider for compression summaries, use the
# auxiliary section below (auxiliary.compression.provider / model).
# =============================================================================
# Anthropic prompt caching TTL
# =============================================================================
# When prompt caching is active (Claude via OpenRouter or native Anthropic),
# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
# "1h". Other values are ignored and "5m" is used.
#
prompt_caching:
cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
# =============================================================================
# Auxiliary Models (Advanced — Experimental)
# =============================================================================
@@ -384,18 +357,6 @@ prompt_caching:
# web_extract:
# provider: "auto"
# model: ""
#
# # Session search — summarizes matching past sessions
# session_search:
# provider: "auto"
# model: ""
# timeout: 30
# max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s
# extra_body: {} # Provider-specific OpenAI-compatible request fields
# # Example for providers that support request-body
# # reasoning controls:
# # extra_body:
# # enable_thinking: false
# =============================================================================
# Persistent Memory
@@ -517,13 +478,6 @@ agent:
# finish, then interrupts anything still running after this timeout.
# 0 = no drain, interrupt immediately.
# restart_drain_timeout: 60
# Max app-level retry attempts for API errors (connection drops, provider
# timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
# to 1 if you use fallback providers and want fast failover on flaky
# primaries (default 3). The OpenAI SDK does its own low-level retries
# underneath this wrapper — this is the Hermes-level loop.
# api_max_retries: 3
# Enable verbose logging
verbose: false
@@ -606,7 +560,6 @@ platform_toolsets:
signal: [hermes-signal]
homeassistant: [hermes-homeassistant]
qqbot: [hermes-qqbot]
yuanbao: [hermes-yuanbao]
# =============================================================================
# Gateway Platform Settings
@@ -788,20 +741,10 @@ code_execution:
# Subagent Delegation
# =============================================================================
# The delegate_task tool spawns child agents with isolated context.
# Supports single tasks and batch mode (default 3 parallel, configurable).
# Supports single tasks and batch mode (up to 3 parallel).
delegation:
max_iterations: 50 # Max tool-calling turns per child (default: 50)
# max_concurrent_children: 3 # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
# WARNING: values above 10 multiply API cost linearly.
# max_spawn_depth: 1 # Delegation tree depth cap (range: 1-3, default: 1 = flat).
# Raise to 2 to allow workers to spawn their own subagents.
# Requires role="orchestrator" on intermediate agents.
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
# subagent_auto_approve: false # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
# or auto-approve "once" (true) instead of blocking on stdin.
# The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
# Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
# # Resolves full credentials (base_url, api_key) automatically.
@@ -825,9 +768,7 @@ delegation:
# Display
# =============================================================================
display:
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
# true: Compact single-line banner
# false: Full ASCII banner with tool/skill summary (default)
# Use compact banner mode
compact: false
# Tool progress display level (CLI and gateway)
@@ -841,19 +782,12 @@ display:
# Gateway-only natural mid-turn assistant updates.
# When true, completed assistant status messages are sent as separate chat
# messages. This is independent of tool_progress and gateway streaming.
# true: Send mid-turn assistant updates as separate messages (default)
# false: Only send the final response
interim_assistant_messages: true
# What Enter does when Hermes is already busy (CLI and gateway platforms).
# What Enter does when Hermes is already busy in the CLI.
# interrupt: Interrupt the current run and redirect Hermes (default)
# queue: Queue your message for the next turn
# steer: Inject your message mid-run via /steer, arriving at the agent
# after the next tool call — no interrupt, no role violation.
# Falls back to 'queue' if the agent isn't running yet or if
# images are attached (steer only carries text).
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
# Toggle at runtime with /busy <interrupt|queue|steer>.
# Ctrl+C always interrupts regardless of this setting.
busy_input_mode: interrupt
# Background process notifications (gateway/messaging only).
@@ -869,22 +803,17 @@ display:
# Play terminal bell when agent finishes a response.
# Useful for long-running tasks — your terminal will ding when the agent is done.
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
# true: Ring the terminal bell on each response
# false: Silent (default)
bell_on_complete: false
# Show model reasoning/thinking before each response.
# When enabled, a dim box shows the model's thought process above the response.
# Toggle at runtime with /reasoning show or /reasoning hide.
# true: Show the reasoning box
# false: Hide reasoning (default)
show_reasoning: false
# Stream tokens to the terminal as they arrive instead of waiting for the
# full response. The response box opens on first token and text appears
# line-by-line. Tool calls are still captured silently.
# true: Stream tokens as they arrive (default)
# false: Wait for the full response before rendering
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
streaming: true
# ───────────────────────────────────────────────────────────────────────────
@@ -894,15 +823,10 @@ display:
# response box label, and branding text. Change at runtime with /skin <name>.
#
# Built-in skins:
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
# daylight — Bright light-mode theme
# warm-lightmode — Warm paper-tone light-mode theme
# poseidon — Sea-green/teal Olympian theme
# sisyphus — Earthy stone-and-moss theme
# charizard — Fiery orange dragon theme
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
#
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
# Schema (all fields optional, missing values inherit from default):
@@ -964,39 +888,3 @@ display:
# # Names and usernames are NOT affected (user-chosen, publicly visible).
# # Routing/delivery still uses the original values internally.
# redact_pii: false
# =============================================================================
# Shell-script hooks
# =============================================================================
# Register shell scripts as plugin-hook callbacks. Each entry is executed as
# a subprocess (shell=False, shlex.split) with a JSON payload on stdin. On
# stdout the script may return JSON that either blocks the tool call or
# injects context into the next LLM call.
#
# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
# pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
# pre_api_request, post_api_request, on_session_start, on_session_end,
# on_session_finalize, on_session_reset, subagent_stop
#
# First-use consent: each (event, command) pair prompts once on a TTY, then
# is persisted to ~/.hermes/shell-hooks-allowlist.json. Non-interactive
# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
# hooks_auto_accept key below.
#
# See website/docs/user-guide/features/hooks.md for the full JSON wire
# protocol and worked examples.
#
# hooks:
# pre_tool_call:
# - matcher: "terminal"
# command: "~/.hermes/agent-hooks/block-rm-rf.sh"
# timeout: 10
# post_tool_call:
# - matcher: "write_file|patch"
# command: "~/.hermes/agent-hooks/auto-format.sh"
# pre_llm_call:
# - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
# subagent_stop:
# - command: "~/.hermes/agent-hooks/log-orchestration.sh"
#
# hooks_auto_accept: false
+532 -1343
View File
File diff suppressed because it is too large Load Diff
+47 -154
View File
@@ -9,14 +9,13 @@ import copy
import json
import logging
import tempfile
import threading
import os
import re
import uuid
from datetime import datetime, timedelta
from pathlib import Path
from hermes_constants import get_hermes_home
from typing import Optional, Dict, List, Any, Union
from typing import Optional, Dict, List, Any
logger = logging.getLogger(__name__)
@@ -35,11 +34,6 @@ except ImportError:
HERMES_DIR = get_hermes_home().resolve()
CRON_DIR = HERMES_DIR / "cron"
JOBS_FILE = CRON_DIR / "jobs.json"
# In-process lock protecting load_jobs→modify→save_jobs cycles.
# Required when tick() runs jobs in parallel threads — without this,
# concurrent mark_job_run / advance_next_run calls can clobber each other.
_jobs_file_lock = threading.Lock()
OUTPUT_DIR = CRON_DIR / "output"
ONESHOT_GRACE_SECONDS = 120
@@ -311,12 +305,6 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
elif schedule["kind"] == "cron":
if not HAS_CRONITER:
logger.warning(
"Cannot compute next run for cron schedule %r: 'croniter' "
"is not installed. Install the 'cron' extra (pip install "
"'hermes-agent[cron]') to re-enable recurring cron jobs.",
schedule.get("expr"),
)
return None
cron = croniter(schedule["expr"], now)
next_run = cron.get_next(datetime)
@@ -377,39 +365,6 @@ def save_jobs(jobs: List[Dict[str, Any]]):
raise
def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
"""Normalize and validate a cron job workdir.
Rules:
- Empty / None None (feature off, preserves old behaviour).
- ``~`` is expanded. Relative paths are rejected cron jobs run detached
from any shell cwd, so relative paths have no stable meaning.
- The path must exist and be a directory at create/update time. We do
NOT re-check at run time (a user might briefly unmount the dir; the
scheduler will just fall back to old behaviour with a logged warning).
Returns the absolute path string, or None when disabled.
Raises ValueError on invalid input.
"""
if workdir is None:
return None
raw = str(workdir).strip()
if not raw:
return None
expanded = Path(raw).expanduser()
if not expanded.is_absolute():
raise ValueError(
f"Cron workdir must be an absolute path (got {raw!r}). "
f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous."
)
resolved = expanded.resolve()
if not resolved.exists():
raise ValueError(f"Cron workdir does not exist: {resolved}")
if not resolved.is_dir():
raise ValueError(f"Cron workdir is not a directory: {resolved}")
return str(resolved)
def create_job(
prompt: str,
schedule: str,
@@ -423,9 +378,6 @@ def create_job(
provider: Optional[str] = None,
base_url: Optional[str] = None,
script: Optional[str] = None,
context_from: Optional[Union[str, List[str]]] = None,
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
) -> Dict[str, Any]:
"""
Create a new cron job.
@@ -445,18 +397,6 @@ def create_job(
script: Optional path to a Python script whose stdout is injected into the
prompt each run. The script runs before the agent turn, and its output
is prepended as context. Useful for data collection / change detection.
context_from: Optional job ID (or list of job IDs) whose most recent output
is injected into the prompt as context before each run.
Useful for chaining cron jobs: job A finds data, job B processes it.
enabled_toolsets: Optional list of toolset names to restrict the agent to.
When set, only tools from these toolsets are loaded, reducing
token overhead. When omitted, all default tools are loaded.
workdir: Optional absolute path. When set, the job runs as if launched
from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
that directory are injected into the system prompt, and the
terminal/file/code_exec tools use it as their working directory
(via TERMINAL_CWD). When unset, the old behaviour is preserved
(no context files injected, tools use the scheduler's cwd).
Returns:
The created job dict
@@ -487,17 +427,6 @@ def create_job(
normalized_base_url = normalized_base_url or None
normalized_script = str(script).strip() if isinstance(script, str) else None
normalized_script = normalized_script or None
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
normalized_toolsets = normalized_toolsets or None
normalized_workdir = _normalize_workdir(workdir)
# Normalize context_from: accept str or list of str, store as list or None
if isinstance(context_from, str):
context_from = [context_from.strip()] if context_from.strip() else None
elif isinstance(context_from, list):
context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
else:
context_from = None
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
job = {
@@ -510,7 +439,6 @@ def create_job(
"provider": normalized_provider,
"base_url": normalized_base_url,
"script": normalized_script,
"context_from": context_from,
"schedule": parsed_schedule,
"schedule_display": parsed_schedule.get("display", schedule),
"repeat": {
@@ -530,8 +458,6 @@ def create_job(
# Delivery configuration
"deliver": deliver,
"origin": origin, # Tracks where job was created for "origin" delivery
"enabled_toolsets": normalized_toolsets,
"workdir": normalized_workdir,
}
jobs = load_jobs()
@@ -565,15 +491,6 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
if job["id"] != job_id:
continue
# Validate / normalize workdir if present in updates. Empty string or
# None both mean "clear the field" (restore old behaviour).
if "workdir" in updates:
_wd = updates["workdir"]
if _wd in (None, "", False):
updates["workdir"] = None
else:
updates["workdir"] = _normalize_workdir(_wd)
updated = _apply_skill_fields({**job, **updates})
schedule_changed = "schedule" in updates
@@ -677,66 +594,43 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
``delivery_error`` is tracked separately from the agent error a job
can succeed (agent produced output) but fail delivery (platform down).
"""
with _jobs_file_lock:
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# If no next run, decide whether this is terminal completion
# (one-shot) or a transient failure (recurring schedule couldn't
# compute — e.g. 'croniter' missing from the runtime env).
# Recurring jobs must NEVER be silently disabled: that turns a
# missing runtime dep into "job completed" and the user's
# schedule quietly goes off. See issue #16265.
if job["next_run_at"] is None:
kind = job.get("schedule", {}).get("kind")
if kind in ("cron", "interval"):
job["state"] = "error"
if not job.get("last_error"):
job["last_error"] = (
"Failed to compute next run for recurring "
"schedule (is the 'croniter' package "
"installed in the gateway's Python env?)"
)
logger.error(
"Job '%s' (%s) could not compute next_run_at; "
"leaving enabled and marking state=error so the "
"job is not silently disabled.",
job.get("name", job["id"]),
kind,
)
else:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
save_jobs(jobs)
return
save_jobs(jobs)
return
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
def advance_next_run(job_id: str) -> bool:
@@ -751,21 +645,20 @@ def advance_next_run(job_id: str) -> bool:
Returns True if next_run_at was advanced, False otherwise.
"""
with _jobs_file_lock:
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in ("cron", "interval"):
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
if new_next and new_next != job.get("next_run_at"):
job["next_run_at"] = new_next
save_jobs(jobs)
return True
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in ("cron", "interval"):
return False
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
if new_next and new_next != job.get("next_run_at"):
job["next_run_at"] = new_next
save_jobs(jobs)
return True
return False
return False
def get_due_jobs() -> List[Dict[str, Any]]:
+62 -266
View File
@@ -40,44 +40,13 @@ from hermes_time import now as _hermes_now
logger = logging.getLogger(__name__)
def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
"""Resolve the toolset list for a cron job.
Precedence:
1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
Keeps the agent's job-scoped toolset override intact — #6130.
2. Per-platform ``hermes tools`` config for the ``cron`` platform.
Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
so users can gate cron toolsets globally without recreating every job.
3. ``None`` on any lookup failure AIAgent loads the full default set
(legacy behavior before this change, preserved as the safety net).
_DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
``_get_platform_tools`` for unconfigured platforms, so fresh installs
get cron WITHOUT ``moa`` by default (issue reported by Norbert
surprise $4.63 run).
"""
per_job = job.get("enabled_toolsets")
if per_job:
return per_job
try:
from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load
return sorted(_get_platform_tools(cfg or {}, "cron"))
except Exception as exc:
logger.warning(
"Cron toolset resolution failed, falling back to full default toolset: %s",
exc,
)
return None
# Valid delivery platforms — used to validate user-supplied platform names
# in cron delivery targets, preventing env var enumeration via crafted names.
_KNOWN_DELIVERY_PLATFORMS = frozenset({
"telegram", "discord", "slack", "whatsapp", "signal",
"matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
"wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
"qqbot", "yuanbao",
"qqbot",
})
# Platforms that support a configured cron/notification home target, mapped to
@@ -283,11 +252,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
future = asyncio.run_coroutine_threadsafe(coro, loop)
try:
result = future.result(timeout=30)
except TimeoutError:
future.cancel()
raise
result = future.result(timeout=30)
if result and not getattr(result, "success", True):
logger.warning(
"Job '%s': media send failed for %s: %s",
@@ -337,7 +302,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
"sms": Platform.SMS,
"bluebubbles": Platform.BLUEBUBBLES,
"qqbot": Platform.QQBOT,
"yuanbao": Platform.YUANBAO,
}
# Optionally wrap the content with a header/footer so the user knows this
@@ -418,11 +382,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
try:
send_result = future.result(timeout=60)
except TimeoutError:
future.cancel()
raise
send_result = future.result(timeout=60)
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
@@ -462,6 +422,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
@@ -672,51 +633,10 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
f"{prompt}"
)
# Inject output from referenced cron jobs as context.
context_from = job.get("context_from")
if context_from:
from cron.jobs import OUTPUT_DIR
if isinstance(context_from, str):
context_from = [context_from]
for source_job_id in context_from:
# Guard against path traversal — valid job IDs are 12-char hex strings
if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
logger.warning("context_from: skipping invalid job_id %r", source_job_id)
continue
try:
job_output_dir = OUTPUT_DIR / source_job_id
if not job_output_dir.exists():
continue # silent skip — no output yet
output_files = sorted(
job_output_dir.glob("*.md"),
key=lambda f: f.stat().st_mtime,
reverse=True,
)
if not output_files:
continue # silent skip — no output yet
latest_output = output_files[0].read_text(encoding="utf-8").strip()
# Truncate to 8K characters to avoid prompt bloat
_MAX_CONTEXT_CHARS = 8000
if len(latest_output) > _MAX_CONTEXT_CHARS:
latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
if latest_output:
prompt = (
f"## Output from job '{source_job_id}'\n"
"The following is the most recent output from a preceding "
"cron job. Use it as context for your analysis.\n\n"
f"```\n{latest_output}\n```\n\n"
f"{prompt}"
)
else:
continue # silent skip — empty output
except (OSError, PermissionError) as e:
logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
# silent skip — do not pollute the prompt with error messages
# Always prepend cron execution guidance so the agent knows how
# delivery works and can suppress delivery when appropriate.
cron_hint = (
"[IMPORTANT: You are running as a scheduled cron job. "
"[SYSTEM: You are running as a scheduled cron job. "
"DELIVERY: Your final response will be automatically delivered "
"to the user — do NOT use send_message or try to deliver "
"the output yourself. Just produce your report/output as your "
@@ -752,7 +672,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
parts.append("")
parts.extend(
[
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
"",
content,
]
@@ -760,7 +680,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
if skipped:
notice = (
f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
f"and were skipped: {', '.join(skipped)}. "
f"Start your response with a brief notice so the user is aware, e.g.: "
f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
@@ -822,48 +742,19 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
agent = None
# Mark this as a cron session so the approval system can apply cron_mode.
# This env var is process-wide and persists for the lifetime of the
# scheduler process — every job this process runs is a cron job.
os.environ["HERMES_CRON_SESSION"] = "1"
# Use ContextVars for per-job session/delivery state so parallel jobs
# don't clobber each other's targets (os.environ is process-global).
from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
_ctx_tokens = set_session_vars(
platform=origin["platform"] if origin else "",
chat_id=str(origin["chat_id"]) if origin else "",
chat_name=origin.get("chat_name", "") if origin else "",
)
# Per-job working directory. When set (and validated at create/update
# time), we point TERMINAL_CWD at it so:
# - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md /
# .cursorrules from the job's project dir, AND
# - the terminal, file, and code-exec tools run commands from there.
#
# tick() serializes workdir-jobs outside the parallel pool, so mutating
# os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less
# jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
# (skip_context_files=True, tools use whatever cwd the scheduler has).
_job_workdir = (job.get("workdir") or "").strip() or None
if _job_workdir and not Path(_job_workdir).is_dir():
# Directory was removed between create-time validation and now. Log
# and drop back to old behaviour rather than crashing the job.
logger.warning(
"Job '%s': configured workdir %r no longer exists — running without it",
job_id, _job_workdir,
)
_job_workdir = None
_prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_")
if _job_workdir:
os.environ["TERMINAL_CWD"] = _job_workdir
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
try:
# Inject origin context so the agent's send_message tool knows the chat.
# Must be INSIDE the try block so the finally cleanup always runs.
if origin:
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
if origin.get("chat_name"):
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
from dotenv import load_dotenv
@@ -874,10 +765,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
delivery_target = _resolve_delivery_target(job)
if delivery_target:
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
if delivery_target.get("thread_id") is not None:
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
@@ -916,13 +807,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
prefill_messages = None
prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
if prefill_file:
import json as _json
pfpath = Path(prefill_file).expanduser()
if not pfpath.is_absolute():
pfpath = _hermes_home / pfpath
if pfpath.exists():
try:
with open(pfpath, "r", encoding="utf-8") as _pf:
prefill_messages = json.load(_pf)
prefill_messages = _json.load(_pf)
if not isinstance(prefill_messages, list):
prefill_messages = None
except Exception as e:
@@ -934,12 +826,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
# Provider routing
pr = _cfg.get("provider_routing", {})
smart_routing = _cfg.get("smart_model_routing", {}) or {}
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
format_runtime_provider_error,
)
from hermes_cli.auth import AuthError
try:
runtime_kwargs = {
"requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
@@ -947,35 +839,28 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
if job.get("base_url"):
runtime_kwargs["explicit_base_url"] = job.get("base_url")
runtime = resolve_runtime_provider(**runtime_kwargs)
except AuthError as auth_exc:
# Primary provider auth failed — try fallback chain before giving up.
logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
runtime = None
for entry in fb_list:
if not isinstance(entry, dict):
continue
try:
fb_kwargs = {"requested": entry.get("provider")}
if entry.get("base_url"):
fb_kwargs["explicit_base_url"] = entry["base_url"]
if entry.get("api_key"):
fb_kwargs["explicit_api_key"] = entry["api_key"]
runtime = resolve_runtime_provider(**fb_kwargs)
logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider"))
break
except Exception as fb_exc:
logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc)
if runtime is None:
raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
except Exception as exc:
message = format_runtime_provider_error(exc)
raise RuntimeError(message) from exc
from agent.smart_model_routing import resolve_turn_route
turn_route = resolve_turn_route(
prompt,
smart_routing,
{
"model": model,
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
},
)
fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
credential_pool = None
runtime_provider = str(runtime.get("provider") or "").strip().lower()
runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower()
if runtime_provider:
try:
from agent.credential_pool import load_pool
@@ -992,13 +877,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)
agent = AIAgent(
model=model,
api_key=runtime.get("api_key"),
base_url=runtime.get("base_url"),
provider=runtime.get("provider"),
api_mode=runtime.get("api_mode"),
acp_command=runtime.get("command"),
acp_args=runtime.get("args"),
model=turn_route["model"],
api_key=turn_route["runtime"].get("api_key"),
base_url=turn_route["runtime"].get("base_url"),
provider=turn_route["runtime"].get("provider"),
api_mode=turn_route["runtime"].get("api_mode"),
acp_command=turn_route["runtime"].get("command"),
acp_args=turn_route["runtime"].get("args"),
max_iterations=max_iterations,
reasoning_config=reasoning_config,
prefill_messages=prefill_messages,
@@ -1008,13 +893,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
providers_ignored=pr.get("ignore"),
providers_order=pr.get("order"),
provider_sort=pr.get("sort"),
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
disabled_toolsets=["cronjob", "messaging", "clarify"],
quiet_mode=True,
# When a workdir is configured, inject AGENTS.md / CLAUDE.md /
# .cursorrules from that directory; otherwise preserve the old
# behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
skip_context_files=not bool(_job_workdir),
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
skip_memory=True, # Cron system prompts would corrupt user representations
platform="cron",
session_id=_cron_session_id,
@@ -1098,12 +979,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
f"— last activity: {_last_desc}"
)
# Guard against non-dict returns from run_conversation under error conditions
if not isinstance(result, dict):
raise RuntimeError(
f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
)
final_response = result.get("final_response", "") or ""
# Strip leaked placeholder text that upstream may inject on empty completions.
if final_response.strip() == "(No response generated)":
@@ -1153,16 +1028,16 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
return False, output, "", error_msg
finally:
# Restore TERMINAL_CWD to whatever it was before this job ran. We
# only ever mutate it when the job has a workdir; see the setup block
# at the top of run_job for the serialization guarantee.
if _job_workdir:
if _prior_terminal_cwd == "_UNSET_":
os.environ.pop("TERMINAL_CWD", None)
else:
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
# Clean up ContextVar session/delivery state for this job.
clear_session_vars(_ctx_tokens)
# Clean up injected env vars so they don't leak to other jobs
for key in (
"HERMES_SESSION_PLATFORM",
"HERMES_SESSION_CHAT_ID",
"HERMES_SESSION_CHAT_NAME",
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
):
os.environ.pop(key, None)
if _session_db:
try:
_session_db.end_session(_cron_session_id, "cron_complete")
@@ -1172,24 +1047,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
_session_db.close()
except (Exception, KeyboardInterrupt) as e:
logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
# Release subprocesses, terminal sandboxes, browser daemons, and the
# main OpenAI/httpx client held by this ephemeral cron agent. Without
# this, a gateway that ticks cron every N minutes leaks fds per job
# until it hits EMFILE (#10200 / "too many open files").
try:
if agent is not None:
agent.close()
except (Exception, KeyboardInterrupt) as e:
logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
# Each cron run spins up a short-lived worker thread whose event loop
# dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
# httpx clients cached under that loop are now unusable — reap them
# so their transports don't accumulate in the process-global cache.
try:
from agent.auxiliary_client import cleanup_stale_async_clients
cleanup_stale_async_clients()
except Exception as e:
logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)
def tick(verbose: bool = True, adapters=None, loop=None) -> int:
@@ -1233,41 +1090,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
if verbose:
logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
# Advance next_run_at for all recurring jobs FIRST, under the file lock,
# before any execution begins. This preserves at-most-once semantics.
executed = 0
for job in due_jobs:
advance_next_run(job["id"])
# Resolve max parallel workers: env var > config.yaml > unbounded.
# Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
_max_workers: Optional[int] = None
try:
_env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
if _env_par:
_max_workers = int(_env_par) or None
except (ValueError, TypeError):
logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
if _max_workers is None:
try:
_ucfg = load_config() or {}
_cfg_par = (
_ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
).get("max_parallel_jobs")
if _cfg_par is not None:
_max_workers = int(_cfg_par) or None
except Exception:
pass
# For recurring jobs (cron/interval), advance next_run_at to the
# next future occurrence BEFORE execution. This way, if the
# process crashes mid-run, the job won't re-fire on restart.
# One-shot jobs are left alone so they can retry on restart.
advance_next_run(job["id"])
if verbose:
logger.info(
"Running %d job(s) in parallel (max_workers=%s)",
len(due_jobs),
_max_workers if _max_workers else "unbounded",
)
def _process_job(job: dict) -> bool:
"""Run one due job end-to-end: execute, save, deliver, mark."""
try:
success, output, final_response, error = run_job(job)
output_file = save_job_output(job["id"], output)
@@ -1299,48 +1130,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
return True
executed += 1
except Exception as e:
logger.error("Error processing job %s: %s", job['id'], e)
mark_job_run(job["id"], False, str(e))
return False
# Partition due jobs: those with a per-job workdir mutate
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
# so they MUST run sequentially to avoid corrupting each other. Jobs
# without a workdir leave env untouched and stay parallel-safe.
workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
_results: list = []
# Sequential pass for workdir jobs.
for job in workdir_jobs:
_ctx = contextvars.copy_context()
_results.append(_ctx.run(_process_job, job))
# Parallel pass for the rest — same behaviour as before.
if parallel_jobs:
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
_futures = []
for job in parallel_jobs:
_ctx = contextvars.copy_context()
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
_results.extend(f.result() for f in _futures)
# Best-effort sweep of MCP stdio subprocesses that survived their
# session teardown during this tick. Runs AFTER every job has
# finished so active sessions (including live user chats) are
# never touched — only PIDs explicitly detected as orphans in
# tools.mcp_tool._run_stdio's finally block are reaped.
try:
from tools.mcp_tool import _kill_orphaned_mcp_children
_kill_orphaned_mcp_children()
except Exception as _e:
logger.debug("Post-tick MCP orphan cleanup failed: %s", _e)
return sum(_results)
return executed
finally:
if fcntl:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
-52
View File
@@ -1,52 +0,0 @@
#
# docker-compose.yml for Hermes Agent
#
# Usage:
# HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
#
# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
# files created inside the container stay readable/writable on the host.
# The entrypoint remaps the internal `hermes` user to these values via
# usermod/groupmod + gosu.
#
# Security notes:
# - The dashboard service binds to 127.0.0.1 by default. It stores API
# keys; exposing it on LAN without auth is unsafe. If you want remote
# access, use an SSH tunnel or put it behind a reverse proxy that
# adds authentication — do NOT pass --insecure --host 0.0.0.0.
# - The gateway's API server is off unless you uncomment API_SERVER_KEY
# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
# this on an internet-facing host.
#
services:
gateway:
build: .
image: hermes-agent
container_name: hermes
restart: unless-stopped
network_mode: host
volumes:
- ~/.hermes:/opt/data
environment:
- HERMES_UID=${HERMES_UID:-10000}
- HERMES_GID=${HERMES_GID:-10000}
# To expose the OpenAI-compatible API server beyond localhost,
# uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
# - API_SERVER_HOST=0.0.0.0
# - API_SERVER_KEY=${API_SERVER_KEY}
command: ["gateway", "run"]
dashboard:
image: hermes-agent
container_name: hermes-dashboard
restart: unless-stopped
network_mode: host
depends_on:
- gateway
volumes:
- ~/.hermes:/opt/data
environment:
- HERMES_UID=${HERMES_UID:-10000}
- HERMES_GID=${HERMES_GID:-10000}
# Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
+2 -35
View File
@@ -22,18 +22,9 @@ if [ "$(id -u)" = "0" ]; then
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
fi
# Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
# files created by previous runs (under the old UID) become inaccessible.
# Always chown -R when UID was remapped; otherwise only if top-level is wrong.
actual_hermes_uid=$(id -u hermes)
needs_chown=false
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
needs_chown=true
elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
needs_chown=true
fi
if [ "$needs_chown" = true ]; then
echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
# In rootless Podman the container's "root" is mapped to an unprivileged
# host UID — chown will fail. That's fine: the volume is already owned
# by the mapped user on the host side.
@@ -41,15 +32,6 @@ if [ "$(id -u)" = "0" ]; then
echo "Warning: chown failed (rootless container?) — continuing anyway"
fi
# Ensure config.yaml is readable by the hermes runtime user even if it was
# edited on the host after initial ownership setup. Must run here (as root)
# rather than after the gosu drop, otherwise a non-root caller like
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
if [ -f "$HERMES_HOME/config.yaml" ]; then
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
fi
echo "Dropping root privileges"
exec gosu hermes "$0" "$@"
fi
@@ -86,19 +68,4 @@ if [ -d "$INSTALL_DIR/skills" ]; then
python3 "$INSTALL_DIR/tools/skills_sync.py"
fi
# Final exec: two supported invocation patterns.
#
# docker run <image> -> exec `hermes` with no args (legacy default)
# docker run <image> chat -q "..." -> exec `hermes chat -q "..."` (legacy wrap)
# docker run <image> sleep infinity -> exec `sleep infinity` directly
# docker run <image> bash -> exec `bash` directly
#
# If the first positional arg resolves to an executable on PATH, we assume the
# caller wants to run it directly (needed by the launcher which runs long-lived
# `sleep infinity` sandbox containers — see tools/environments/docker.py).
# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
# preserving the documented `docker run <image> <subcommand>` behavior.
if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
exec "$@"
fi
exec hermes "$@"
+1
View File
@@ -53,6 +53,7 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
try:
loop = asyncio.get_running_loop()
# We're in an async context -- need to run in thread
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(
handle_function_call, tool_name, arguments, task_id
-1
View File
@@ -36,7 +36,6 @@
imports = [
./nix/packages.nix
./nix/overlays.nix
./nix/nixosModules.nix
./nix/checks.nix
./nix/devShell.nix
+14 -67
View File
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
# Build / refresh
# ---------------------------------------------------------------------------
async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
"""
Build a channel directory from connected platform adapters and session data.
@@ -72,7 +72,7 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
if platform == Platform.DISCORD:
platforms["discord"] = _build_discord(adapter)
elif platform == Platform.SLACK:
platforms["slack"] = await _build_slack(adapter)
platforms["slack"] = _build_slack(adapter)
except Exception as e:
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
@@ -136,66 +136,21 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
return channels
async def _build_slack(adapter) -> List[Dict[str, Any]]:
"""List Slack channels the bot has joined across all workspaces.
Uses ``users.conversations`` against each workspace's web client. Pulls
public + private channels the bot is a member of, then merges in DMs
discovered from session history (IMs aren't useful to enumerate
proactively).
"""
team_clients = getattr(adapter, "_team_clients", None) or {}
if not team_clients:
def _build_slack(adapter) -> List[Dict[str, str]]:
"""List Slack channels the bot has joined."""
# Slack adapter may expose a web client
client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
if not client:
return _build_from_sessions("slack")
channels: List[Dict[str, Any]] = []
seen_ids: set = set()
try:
from tools.send_message_tool import _send_slack # noqa: F401
# Use the Slack Web API directly if available
except Exception:
pass
for team_id, client in team_clients.items():
try:
cursor: Optional[str] = None
for _page in range(20): # safety cap on pagination
response = await client.users_conversations(
types="public_channel,private_channel",
exclude_archived=True,
limit=200,
cursor=cursor,
)
if not response.get("ok"):
logger.warning(
"Channel directory: users.conversations not ok for team %s: %s",
team_id,
response.get("error", "unknown"),
)
break
for ch in response.get("channels", []):
cid = ch.get("id")
name = ch.get("name")
if not cid or not name or cid in seen_ids:
continue
seen_ids.add(cid)
channels.append({
"id": cid,
"name": name,
"type": "private" if ch.get("is_private") else "channel",
})
cursor = (response.get("response_metadata") or {}).get("next_cursor")
if not cursor:
break
except Exception as e:
logger.warning(
"Channel directory: failed to list Slack channels for team %s: %s",
team_id, e,
)
continue
# Merge in DM/group entries discovered from session history.
for entry in _build_from_sessions("slack"):
if entry.get("id") not in seen_ids:
channels.append(entry)
seen_ids.add(entry.get("id"))
return channels
# Fallback to session data
return _build_from_sessions("slack")
def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
@@ -268,14 +223,6 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
if not channels:
return None
# 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
# raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
# in _parse_target_ref hasn't recognized them as explicit.
raw = name.strip()
for ch in channels:
if ch.get("id") == raw:
return ch["id"]
query = _normalize_channel_query(name)
# 1. Exact name match, including the display labels shown by send_message(action="list")
+9 -117
View File
@@ -67,7 +67,6 @@ class Platform(Enum):
WEIXIN = "weixin"
BLUEBUBBLES = "bluebubbles"
QQBOT = "qqbot"
YUANBAO = "yuanbao"
@dataclass
@@ -136,7 +135,7 @@ class SessionResetPolicy:
mode=mode if mode is not None else "both",
at_hour=at_hour if at_hour is not None else 4,
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
notify=_coerce_bool(notify, True),
notify=notify if notify is not None else True,
notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
)
@@ -179,7 +178,7 @@ class PlatformConfig:
home_channel = HomeChannel.from_dict(data["home_channel"])
return cls(
enabled=_coerce_bool(data.get("enabled"), False),
enabled=data.get("enabled", False),
token=data.get("token"),
api_key=data.get("api_key"),
home_channel=home_channel,
@@ -196,14 +195,6 @@ class StreamingConfig:
edit_interval: float = 1.0 # Seconds between message edits (Telegram rate-limits at ~1/s)
buffer_threshold: int = 40 # Chars before forcing an edit
cursor: str = "" # Cursor shown during streaming
# Ported from openclaw/openclaw#72038. When >0, the final edit for
# a long-running streamed response is delivered as a fresh message
# if the original preview has been visible for at least this many
# seconds, so the platform's visible timestamp reflects completion
# time instead of the preview creation time. Currently applied to
# Telegram only (other platforms ignore the setting). Default 60s
# matches the OpenClaw rollout. Set to 0 to disable.
fresh_final_after_seconds: float = 60.0
def to_dict(self) -> Dict[str, Any]:
return {
@@ -212,7 +203,6 @@ class StreamingConfig:
"edit_interval": self.edit_interval,
"buffer_threshold": self.buffer_threshold,
"cursor": self.cursor,
"fresh_final_after_seconds": self.fresh_final_after_seconds,
}
@classmethod
@@ -225,9 +215,6 @@ class StreamingConfig:
edit_interval=float(data.get("edit_interval", 1.0)),
buffer_threshold=int(data.get("buffer_threshold", 40)),
cursor=data.get("cursor", ""),
fresh_final_after_seconds=float(
data.get("fresh_final_after_seconds", 60.0)
),
)
@@ -327,9 +314,6 @@ class GatewayConfig:
# QQBot uses extra dict for app credentials
elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
connected.append(platform)
# Yuanbao uses extra dict for app credentials
elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
connected.append(platform)
# DingTalk uses client_id/client_secret from config.extra or env vars
elif platform == Platform.DINGTALK and (
config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
@@ -451,7 +435,7 @@ class GatewayConfig:
reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
quick_commands=quick_commands,
sessions_dir=sessions_dir,
always_log_local=_coerce_bool(data.get("always_log_local"), True),
always_log_local=data.get("always_log_local", True),
stt_enabled=_coerce_bool(stt_enabled, True),
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@@ -566,8 +550,6 @@ def load_gateway_config() -> GatewayConfig:
existing = {}
# Deep-merge extra dicts so gateway.json defaults survive
merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
if plat_name == Platform.SLACK.value and "enabled" in plat_block:
merged_extra["_enabled_explicit"] = True
merged = {**existing, **plat_block}
if merged_extra:
merged["extra"] = merged_extra
@@ -588,23 +570,13 @@ def load_gateway_config() -> GatewayConfig:
)
if "reply_prefix" in platform_cfg:
bridged["reply_prefix"] = platform_cfg["reply_prefix"]
if "reply_in_thread" in platform_cfg:
bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
if "require_mention" in platform_cfg:
bridged["require_mention"] = platform_cfg["require_mention"]
if "free_response_channels" in platform_cfg:
bridged["free_response_channels"] = platform_cfg["free_response_channels"]
if "mention_patterns" in platform_cfg:
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
if "dm_policy" in platform_cfg:
bridged["dm_policy"] = platform_cfg["dm_policy"]
if "allow_from" in platform_cfg:
bridged["allow_from"] = platform_cfg["allow_from"]
if "group_policy" in platform_cfg:
bridged["group_policy"] = platform_cfg["group_policy"]
if "group_allow_from" in platform_cfg:
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
channel_prompts = platform_cfg["channel_prompts"]
@@ -612,21 +584,16 @@ def load_gateway_config() -> GatewayConfig:
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
else:
bridged["channel_prompts"] = channel_prompts
enabled_was_explicit = "enabled" in platform_cfg
if not bridged and not enabled_was_explicit:
if not bridged:
continue
plat_data = platforms_data.setdefault(plat.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[plat.value] = plat_data
if enabled_was_explicit:
plat_data["enabled"] = platform_cfg["enabled"]
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
if plat == Platform.SLACK and enabled_was_explicit:
extra["_enabled_explicit"] = True
extra.update(bridged)
# Slack settings → env vars (env vars take precedence)
@@ -634,8 +601,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(slack_cfg, dict):
if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
frc = slack_cfg.get("free_response_channels")
@@ -643,8 +608,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()
# Discord settings → env vars (env vars take precedence)
discord_cfg = yaml_cfg.get("discord", {})
@@ -699,7 +662,8 @@ def load_gateway_config() -> GatewayConfig:
if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
import json as _json
os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
frc = telegram_cfg.get("free_response_chats")
if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
if isinstance(frc, list):
@@ -714,11 +678,6 @@ def load_gateway_config() -> GatewayConfig:
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
gac = telegram_cfg["group_allowed_chats"]
if isinstance(gac, list):
gac = ",".join(str(v) for v in gac)
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
if "disable_link_previews" in telegram_cfg:
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
if not isinstance(plat_data, dict):
@@ -741,20 +700,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
af = whatsapp_cfg.get("allow_from")
if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
if isinstance(af, list):
af = ",".join(str(v) for v in af)
os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
gaf = whatsapp_cfg.get("group_allow_from")
if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
if isinstance(gaf, list):
gaf = ",".join(str(v) for v in gaf)
os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)
# DingTalk settings → env vars (env vars take precedence)
dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -945,20 +890,8 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
slack_token = os.getenv("SLACK_BOT_TOKEN")
if slack_token:
if Platform.SLACK not in config.platforms:
# No yaml config for Slack — env-only setup, enable it
config.platforms[Platform.SLACK] = PlatformConfig()
config.platforms[Platform.SLACK].enabled = True
else:
slack_config = config.platforms[Platform.SLACK]
enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
if not slack_config.enabled and not enabled_was_explicit:
# Top-level Slack settings such as channel prompts should not
# turn an env-token setup into a disabled platform. Only an
# explicit slack.enabled/platforms.slack.enabled false should.
slack_config.enabled = True
# If yaml config exists, respect its enabled flag (don't override
# explicit enabled: false). Token is still stored so skills that
# send Slack messages can use it without activating the gateway adapter.
config.platforms[Platform.SLACK].enabled = True
config.platforms[Platform.SLACK].token = slack_token
slack_home = os.getenv("SLACK_HOME_CHANNEL")
if slack_home and Platform.SLACK in config.platforms:
@@ -1304,6 +1237,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
if legacy_home:
qq_home = legacy_home
qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
import logging
logging.getLogger(__name__).warning(
"QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
"in your .env for consistency with the platform key."
@@ -1315,48 +1249,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
)
# Yuanbao — YUANBAO_APP_ID preferred
yuanbao_app_id = os.getenv("YUANBAO_APP_ID") or os.getenv("YUANBAO_APP_KEY")
yuanbao_app_secret = os.getenv("YUANBAO_APP_SECRET")
if yuanbao_app_id and yuanbao_app_secret:
if Platform.YUANBAO not in config.platforms:
config.platforms[Platform.YUANBAO] = PlatformConfig()
config.platforms[Platform.YUANBAO].enabled = True
extra = config.platforms[Platform.YUANBAO].extra
extra["app_id"] = yuanbao_app_id
extra["app_secret"] = yuanbao_app_secret
yuanbao_bot_id = os.getenv("YUANBAO_BOT_ID")
if yuanbao_bot_id:
extra["bot_id"] = yuanbao_bot_id
yuanbao_ws_url = os.getenv("YUANBAO_WS_URL")
if yuanbao_ws_url:
extra["ws_url"] = yuanbao_ws_url
yuanbao_api_domain = os.getenv("YUANBAO_API_DOMAIN")
if yuanbao_api_domain:
extra["api_domain"] = yuanbao_api_domain
yuanbao_route_env = os.getenv("YUANBAO_ROUTE_ENV")
if yuanbao_route_env:
extra["route_env"] = yuanbao_route_env
yuanbao_home = os.getenv("YUANBAO_HOME_CHANNEL")
if yuanbao_home:
config.platforms[Platform.YUANBAO].home_channel = HomeChannel(
platform=Platform.YUANBAO,
chat_id=yuanbao_home,
name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
)
yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
if yuanbao_dm_policy:
extra["dm_policy"] = yuanbao_dm_policy.strip().lower()
yuanbao_dm_allow_from = os.getenv("YUANBAO_DM_ALLOW_FROM")
if yuanbao_dm_allow_from:
extra["dm_allow_from"] = yuanbao_dm_allow_from
yuanbao_group_policy = os.getenv("YUANBAO_GROUP_POLICY")
if yuanbao_group_policy:
extra["group_policy"] = yuanbao_group_policy.strip().lower()
yuanbao_group_allow_from = os.getenv("YUANBAO_GROUP_ALLOW_FROM")
if yuanbao_group_allow_from:
extra["group_allow_from"] = yuanbao_group_allow_from
# Session settings
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
if idle_minutes:
+1 -3
View File
@@ -79,9 +79,7 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
"discord": _TIER_HIGH,
# Tier 2 — edit support, often customer/workspace channels
# Slack: tool_progress off by default — Bolt posts cannot be edited like CLI;
# "new"/"all" spam permanent lines in channels (hermes-agent#14663).
"slack": {**_TIER_MEDIUM, "tool_progress": "off"},
"slack": _TIER_MEDIUM,
"mattermost": _TIER_MEDIUM,
"matrix": _TIER_MEDIUM,
"feishu": _TIER_MEDIUM,
+11 -44
View File
@@ -135,22 +135,9 @@ class HookRegistry:
except Exception as e:
print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)
def _resolve_handlers(self, event_type: str) -> List[Callable]:
"""Return all handlers that should fire for ``event_type``.
Exact matches fire first, followed by wildcard matches (e.g.
``command:*`` matches ``command:reset``).
"""
handlers = list(self._handlers.get(event_type, []))
if ":" in event_type:
base = event_type.split(":")[0]
wildcard_key = f"{base}:*"
handlers.extend(self._handlers.get(wildcard_key, []))
return handlers
async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
"""
Fire all handlers registered for an event, discarding return values.
Fire all handlers registered for an event.
Supports wildcard matching: handlers registered for "command:*" will
fire for any "command:..." event. Handlers registered for a base type
@@ -164,7 +151,16 @@ class HookRegistry:
if context is None:
context = {}
for fn in self._resolve_handlers(event_type):
# Collect handlers: exact match + wildcard match
handlers = list(self._handlers.get(event_type, []))
# Check for wildcard patterns (e.g., "command:*" matches "command:reset")
if ":" in event_type:
base = event_type.split(":")[0]
wildcard_key = f"{base}:*"
handlers.extend(self._handlers.get(wildcard_key, []))
for fn in handlers:
try:
result = fn(event_type, context)
# Support both sync and async handlers
@@ -172,32 +168,3 @@ class HookRegistry:
await result
except Exception as e:
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
async def emit_collect(
self,
event_type: str,
context: Optional[Dict[str, Any]] = None,
) -> List[Any]:
"""Fire handlers and return their non-None return values in order.
Like :meth:`emit` but captures each handler's return value. Used for
decision-style hooks (e.g. ``command:<name>`` policies that want to
allow/deny/rewrite the command before normal dispatch).
Exceptions from individual handlers are logged but do not abort the
remaining handlers.
"""
if context is None:
context = {}
results: List[Any] = []
for fn in self._resolve_handlers(event_type):
try:
result = fn(event_type, context)
if asyncio.iscoroutine(result):
result = await result
if result is not None:
results.append(result)
except Exception as e:
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
return results
+11 -57
View File
@@ -28,7 +28,6 @@ def mirror_to_session(
message_text: str,
source_label: str = "cli",
thread_id: Optional[str] = None,
user_id: Optional[str] = None,
) -> bool:
"""
Append a delivery-mirror message to the target session's transcript.
@@ -40,20 +39,9 @@ def mirror_to_session(
All errors are caught -- this is never fatal.
"""
try:
session_id = _find_session_id(
platform,
str(chat_id),
thread_id=thread_id,
user_id=user_id,
)
session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
if not session_id:
logger.debug(
"Mirror: no session found for %s:%s:%s:%s",
platform,
chat_id,
thread_id,
user_id,
)
logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
return False
mirror_msg = {
@@ -71,33 +59,17 @@ def mirror_to_session(
return True
except Exception as e:
logger.debug(
"Mirror failed for %s:%s:%s:%s: %s",
platform,
chat_id,
thread_id,
user_id,
e,
)
logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
return False
def _find_session_id(
platform: str,
chat_id: str,
thread_id: Optional[str] = None,
user_id: Optional[str] = None,
) -> Optional[str]:
def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
"""
Find the active session_id for a platform + chat_id pair.
Scans sessions.json entries and matches where origin.chat_id == chat_id
on the right platform. DM session keys don't embed the chat_id
(e.g. "agent:main:telegram:dm"), so we check the origin dict.
When *user_id* is provided, prefer exact sender matches. If multiple
same-chat candidates exist and none matches the user, return None instead
of guessing and contaminating another participant's session.
"""
if not _SESSIONS_INDEX.exists():
return None
@@ -109,7 +81,8 @@ def _find_session_id(
return None
platform_lower = platform.lower()
candidates = []
best_match = None
best_updated = ""
for _key, entry in data.items():
origin = entry.get("origin") or {}
@@ -123,31 +96,12 @@ def _find_session_id(
origin_thread_id = origin.get("thread_id")
if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
continue
candidates.append(entry)
updated = entry.get("updated_at", "")
if updated > best_updated:
best_updated = updated
best_match = entry.get("session_id")
if not candidates:
return None
if user_id:
exact_user_matches = [
entry for entry in candidates
if str((entry.get("origin") or {}).get("user_id") or "") == str(user_id)
]
if exact_user_matches:
candidates = exact_user_matches
elif len(candidates) > 1:
return None
elif len(candidates) > 1:
distinct_user_ids = {
str((entry.get("origin") or {}).get("user_id") or "").strip()
for entry in candidates
if str((entry.get("origin") or {}).get("user_id") or "").strip()
}
if len(distinct_user_ids) > 1:
return None
best_entry = max(candidates, key=lambda entry: entry.get("updated_at", ""))
return best_entry.get("session_id")
return best_match
def _append_to_jsonl(session_id: str, message: dict) -> None:
-2
View File
@@ -10,12 +10,10 @@ Each adapter handles:
from .base import BasePlatformAdapter, MessageEvent, SendResult
from .qqbot import QQAdapter
from .yuanbao import YuanbaoAdapter
__all__ = [
"BasePlatformAdapter",
"MessageEvent",
"SendResult",
"QQAdapter",
"YuanbaoAdapter",
]
+82 -387
View File
@@ -9,7 +9,6 @@ Exposes an HTTP server with endpoints:
- GET /v1/models lists hermes-agent as an available model
- POST /v1/runs start a run, returns run_id immediately (202)
- GET /v1/runs/{run_id}/events SSE stream of structured lifecycle events
- POST /v1/runs/{run_id}/stop interrupt a running agent
- GET /health health check
- GET /health/detailed rich status for cross-container dashboard probing
@@ -118,160 +117,6 @@ def _normalize_chat_content(
return ""
# Content part type aliases used by the OpenAI Chat Completions and Responses
# APIs. We accept both spellings on input and emit a single canonical internal
# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
# rest of the agent pipeline already understands.
_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
_FILE_PART_TYPES = frozenset({"file", "input_file"})
def _normalize_multimodal_content(content: Any) -> Any:
"""Validate and normalize multimodal content for the API server.
Returns a plain string when the content is text-only, or a list of
``{"type": "text"|"image_url", ...}`` parts when images are present.
The output shape is the native OpenAI Chat Completions vision format,
which the agent pipeline accepts verbatim (OpenAI-wire providers) or
converts (``_preprocess_anthropic_content`` for Anthropic).
Raises ``ValueError`` with an OpenAI-style code on invalid input:
* ``unsupported_content_type`` file/input_file/file_id parts, or
non-image ``data:`` URLs.
* ``invalid_image_url`` missing URL or unsupported scheme.
* ``invalid_content_part`` malformed text/image objects.
Callers translate the ValueError into a 400 response.
"""
# Scalar passthrough mirrors ``_normalize_chat_content``.
if content is None:
return ""
if isinstance(content, str):
return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
if not isinstance(content, list):
# Mirror the legacy text-normalizer's fallback so callers that
# pre-existed image support still get a string back.
return _normalize_chat_content(content)
items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
normalized_parts: List[Dict[str, Any]] = []
text_accum_len = 0
for part in items:
if isinstance(part, str):
if part:
trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
normalized_parts.append({"type": "text", "text": trimmed})
text_accum_len += len(trimmed)
continue
if not isinstance(part, dict):
# Ignore unknown scalars for forward compatibility with future
# Responses API additions (e.g. ``refusal``). The same policy
# the text normalizer applies.
continue
raw_type = part.get("type")
part_type = str(raw_type or "").strip().lower()
if part_type in _TEXT_PART_TYPES:
text = part.get("text")
if text is None:
continue
if not isinstance(text, str):
text = str(text)
if text:
trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
normalized_parts.append({"type": "text", "text": trimmed})
text_accum_len += len(trimmed)
continue
if part_type in _IMAGE_PART_TYPES:
detail = part.get("detail")
image_ref = part.get("image_url")
# OpenAI Responses sends ``input_image`` with a top-level
# ``image_url`` string; Chat Completions sends ``image_url`` as
# ``{"url": "...", "detail": "..."}``. Support both.
if isinstance(image_ref, dict):
url_value = image_ref.get("url")
detail = image_ref.get("detail", detail)
else:
url_value = image_ref
if not isinstance(url_value, str) or not url_value.strip():
raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
url_value = url_value.strip()
lowered = url_value.lower()
if lowered.startswith("data:"):
if not lowered.startswith("data:image/") or "," not in url_value:
raise ValueError(
"unsupported_content_type:Only image data URLs are supported. "
"Non-image data payloads are not supported."
)
elif not (lowered.startswith("http://") or lowered.startswith("https://")):
raise ValueError(
"invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
)
image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
if detail is not None:
if not isinstance(detail, str) or not detail.strip():
raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
image_part["image_url"]["detail"] = detail.strip()
normalized_parts.append(image_part)
continue
if part_type in _FILE_PART_TYPES:
raise ValueError(
"unsupported_content_type:Inline image inputs are supported, "
"but uploaded files and document inputs are not supported on this endpoint."
)
# Unknown part type — reject explicitly so clients get a clear error
# instead of a silently dropped turn.
raise ValueError(
f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
"Only text and image_url/input_image parts are supported."
)
if not normalized_parts:
return ""
# Text-only: collapse to a plain string so downstream logging/trajectory
# code sees the native shape and prompt caching on text-only turns is
# unaffected.
if all(p.get("type") == "text" for p in normalized_parts):
return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
return normalized_parts
def _content_has_visible_payload(content: Any) -> bool:
"""True when content has any text or image attachment. Used to reject empty turns."""
if isinstance(content, str):
return bool(content.strip())
if isinstance(content, list):
for part in content:
if isinstance(part, dict):
ptype = str(part.get("type") or "").strip().lower()
if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
return True
if ptype in _IMAGE_PART_TYPES:
return True
return False
def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
"""Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
raw = str(exc)
code, _, message = raw.partition(":")
if not message:
code, message = "invalid_content_part", raw
return web.json_response(
_openai_error(message, code=code, param=param),
status=400,
)
def check_api_server_requirements() -> bool:
"""Check if API server dependencies are available."""
return AIOHTTP_AVAILABLE
@@ -324,6 +169,7 @@ class ResponseStore:
).fetchone()
if row is None:
return None
import time
self._conn.execute(
"UPDATE responses SET accessed_at = ? WHERE response_id = ?",
(time.time(), response_id),
@@ -333,6 +179,7 @@ class ResponseStore:
def put(self, response_id: str, data: Dict[str, Any]) -> None:
"""Store a response, evicting the oldest if at capacity."""
import time
self._conn.execute(
"INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
(response_id, json.dumps(data, default=str), time.time()),
@@ -468,12 +315,12 @@ class _IdempotencyCache:
def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
from collections import OrderedDict
self._store = OrderedDict()
self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
self._ttl = ttl_seconds
self._max = max_items
def _purge(self):
now = time.time()
import time as _t
now = _t.time()
expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
for k in expired:
self._store.pop(k, None)
@@ -485,27 +332,11 @@ class _IdempotencyCache:
item = self._store.get(key)
if item and item["fp"] == fingerprint:
return item["resp"]
inflight_key = (key, fingerprint)
task = self._inflight.get(inflight_key)
if task is None:
async def _compute_and_store():
resp = await compute_coro()
import time as _t
self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
self._purge()
return resp
task = asyncio.create_task(_compute_and_store())
self._inflight[inflight_key] = task
def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
if self._inflight.get(inflight_key) is done_task:
self._inflight.pop(inflight_key, None)
task.add_done_callback(_clear_inflight)
return await asyncio.shield(task)
resp = await compute_coro()
import time as _t
self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
self._purge()
return resp
_idem_cache = _IdempotencyCache()
@@ -535,30 +366,6 @@ def _derive_chat_session_id(
return f"api-{digest}"
_CRON_AVAILABLE = False
try:
from cron.jobs import (
list_jobs as _cron_list,
get_job as _cron_get,
create_job as _cron_create,
update_job as _cron_update,
remove_job as _cron_remove,
pause_job as _cron_pause,
resume_job as _cron_resume,
trigger_job as _cron_trigger,
)
_CRON_AVAILABLE = True
except ImportError:
_cron_list = None
_cron_get = None
_cron_create = None
_cron_update = None
_cron_remove = None
_cron_pause = None
_cron_resume = None
_cron_trigger = None
class APIServerAdapter(BasePlatformAdapter):
"""
OpenAI-compatible HTTP API server adapter.
@@ -587,9 +394,6 @@ class APIServerAdapter(BasePlatformAdapter):
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
# Creation timestamps for orphaned-run TTL sweep
self._run_streams_created: Dict[str, float] = {}
# Active run agent/task references for stop support
self._active_run_agents: Dict[str, Any] = {}
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
@staticmethod
@@ -833,32 +637,26 @@ class APIServerAdapter(BasePlatformAdapter):
system_prompt = None
conversation_messages: List[Dict[str, str]] = []
for idx, msg in enumerate(messages):
for msg in messages:
role = msg.get("role", "")
raw_content = msg.get("content", "")
content = _normalize_chat_content(msg.get("content", ""))
if role == "system":
# System messages don't support images (Anthropic rejects, OpenAI
# text-model systems don't render them). Flatten to text.
content = _normalize_chat_content(raw_content)
# Accumulate system messages
if system_prompt is None:
system_prompt = content
else:
system_prompt = system_prompt + "\n" + content
elif role in ("user", "assistant"):
try:
content = _normalize_multimodal_content(raw_content)
except ValueError as exc:
return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
conversation_messages.append({"role": role, "content": content})
# Extract the last user message as the primary input
user_message: Any = ""
user_message = ""
history = []
if conversation_messages:
user_message = conversation_messages[-1].get("content", "")
history = conversation_messages[:-1]
if not _content_has_visible_payload(user_message):
if not user_message:
return web.json_response(
{"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
status=400,
@@ -1208,12 +1006,10 @@ class APIServerAdapter(BasePlatformAdapter):
If the client disconnects mid-stream, ``agent.interrupt()`` is
called so the agent stops issuing upstream LLM calls, then the
asyncio task is cancelled. When ``store=True`` an initial
``in_progress`` snapshot is persisted immediately after
``response.created`` and disconnects update it to an
``incomplete`` snapshot so GET /v1/responses/{id} and
``previous_response_id`` chaining still have something to
recover from.
asyncio task is cancelled. When ``store=True`` the full response
is persisted to the ResponseStore in a ``finally`` block so GET
/v1/responses/{id} and ``previous_response_id`` chaining work the
same as the batch path.
"""
import queue as _q
@@ -1275,60 +1071,6 @@ class APIServerAdapter(BasePlatformAdapter):
final_response_text = ""
agent_error: Optional[str] = None
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
terminal_snapshot_persisted = False
def _persist_response_snapshot(
response_env: Dict[str, Any],
*,
conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
) -> None:
if not store:
return
if conversation_history_snapshot is None:
conversation_history_snapshot = list(conversation_history)
conversation_history_snapshot.append({"role": "user", "content": user_message})
self._response_store.put(response_id, {
"response": response_env,
"conversation_history": conversation_history_snapshot,
"instructions": instructions,
"session_id": session_id,
})
if conversation:
self._response_store.set_conversation(conversation, response_id)
def _persist_incomplete_if_needed() -> None:
"""Persist an ``incomplete`` snapshot if no terminal one was written.
Called from both the client-disconnect (``ConnectionResetError``)
and server-cancellation (``asyncio.CancelledError``) paths so
GET /v1/responses/{id} and ``previous_response_id`` chaining keep
working after abrupt stream termination.
"""
if not store or terminal_snapshot_persisted:
return
incomplete_text = "".join(final_text_parts) or final_response_text
incomplete_items: List[Dict[str, Any]] = list(emitted_items)
if incomplete_text:
incomplete_items.append({
"type": "message",
"role": "assistant",
"content": [{"type": "output_text", "text": incomplete_text}],
})
incomplete_env = _envelope("incomplete")
incomplete_env["output"] = incomplete_items
incomplete_env["usage"] = {
"input_tokens": usage.get("input_tokens", 0),
"output_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
}
incomplete_history = list(conversation_history)
incomplete_history.append({"role": "user", "content": user_message})
if incomplete_text:
incomplete_history.append({"role": "assistant", "content": incomplete_text})
_persist_response_snapshot(
incomplete_env,
conversation_history_snapshot=incomplete_history,
)
try:
# response.created — initial envelope, status=in_progress
@@ -1338,7 +1080,6 @@ class APIServerAdapter(BasePlatformAdapter):
"type": "response.created",
"response": created_env,
})
_persist_response_snapshot(created_env)
last_activity = time.monotonic()
async def _open_message_item() -> None:
@@ -1595,18 +1336,6 @@ class APIServerAdapter(BasePlatformAdapter):
"output_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
}
_failed_history = list(conversation_history)
_failed_history.append({"role": "user", "content": user_message})
if final_response_text or agent_error:
_failed_history.append({
"role": "assistant",
"content": final_response_text or agent_error,
})
_persist_response_snapshot(
failed_env,
conversation_history_snapshot=_failed_history,
)
terminal_snapshot_persisted = True
await _write_event("response.failed", {
"type": "response.failed",
"response": failed_env,
@@ -1619,24 +1348,30 @@ class APIServerAdapter(BasePlatformAdapter):
"output_tokens": usage.get("output_tokens", 0),
"total_tokens": usage.get("total_tokens", 0),
}
full_history = list(conversation_history)
full_history.append({"role": "user", "content": user_message})
if isinstance(result, dict) and result.get("messages"):
full_history.extend(result["messages"])
else:
full_history.append({"role": "assistant", "content": final_response_text})
_persist_response_snapshot(
completed_env,
conversation_history_snapshot=full_history,
)
terminal_snapshot_persisted = True
await _write_event("response.completed", {
"type": "response.completed",
"response": completed_env,
})
# Persist for future chaining / GET retrieval, mirroring
# the batch path behavior.
if store:
full_history = list(conversation_history)
full_history.append({"role": "user", "content": user_message})
if isinstance(result, dict) and result.get("messages"):
full_history.extend(result["messages"])
else:
full_history.append({"role": "assistant", "content": final_response_text})
self._response_store.put(response_id, {
"response": completed_env,
"conversation_history": full_history,
"instructions": instructions,
"session_id": session_id,
})
if conversation:
self._response_store.set_conversation(conversation, response_id)
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
_persist_incomplete_if_needed()
# Client disconnected — interrupt the agent so it stops
# making upstream LLM calls, then cancel the task.
agent = agent_ref[0] if agent_ref else None
@@ -1652,22 +1387,6 @@ class APIServerAdapter(BasePlatformAdapter):
except (asyncio.CancelledError, Exception):
pass
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
except asyncio.CancelledError:
# Server-side cancellation (e.g. shutdown, request timeout) —
# persist an incomplete snapshot so GET /v1/responses/{id} and
# previous_response_id chaining still work, then re-raise so the
# runtime's cancellation semantics are respected.
_persist_incomplete_if_needed()
agent = agent_ref[0] if agent_ref else None
if agent is not None:
try:
agent.interrupt("SSE task cancelled")
except Exception:
pass
if not agent_task.done():
agent_task.cancel()
logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
raise
return response
@@ -1705,19 +1424,16 @@ class APIServerAdapter(BasePlatformAdapter):
# No error if conversation doesn't exist yet — it's a new conversation
# Normalize input to message list
input_messages: List[Dict[str, Any]] = []
input_messages: List[Dict[str, str]] = []
if isinstance(raw_input, str):
input_messages = [{"role": "user", "content": raw_input}]
elif isinstance(raw_input, list):
for idx, item in enumerate(raw_input):
for item in raw_input:
if isinstance(item, str):
input_messages.append({"role": "user", "content": item})
elif isinstance(item, dict):
role = item.get("role", "user")
try:
content = _normalize_multimodal_content(item.get("content", ""))
except ValueError as exc:
return _multimodal_validation_error(exc, param=f"input[{idx}].content")
content = _normalize_chat_content(item.get("content", ""))
input_messages.append({"role": role, "content": content})
else:
return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1726,7 +1442,7 @@ class APIServerAdapter(BasePlatformAdapter):
# This lets stateless clients supply their own history instead of
# relying on server-side response chaining via previous_response_id.
# Precedence: explicit conversation_history > previous_response_id.
conversation_history: List[Dict[str, Any]] = []
conversation_history: List[Dict[str, str]] = []
raw_history = body.get("conversation_history")
if raw_history:
if not isinstance(raw_history, list):
@@ -1740,11 +1456,7 @@ class APIServerAdapter(BasePlatformAdapter):
_openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
status=400,
)
try:
entry_content = _normalize_multimodal_content(entry["content"])
except ValueError as exc:
return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
conversation_history.append({"role": str(entry["role"]), "content": entry_content})
conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
if previous_response_id:
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
@@ -1764,8 +1476,8 @@ class APIServerAdapter(BasePlatformAdapter):
conversation_history.append(msg)
# Last input message is the user_message
user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
if not _content_has_visible_payload(user_message):
user_message = input_messages[-1].get("content", "") if input_messages else ""
if not user_message:
return web.json_response(_openai_error("No user message found in input"), status=400)
# Truncation support
@@ -1970,16 +1682,44 @@ class APIServerAdapter(BasePlatformAdapter):
# Cron jobs API
# ------------------------------------------------------------------
# Check cron module availability once (not per-request)
_CRON_AVAILABLE = False
try:
from cron.jobs import (
list_jobs as _cron_list,
get_job as _cron_get,
create_job as _cron_create,
update_job as _cron_update,
remove_job as _cron_remove,
pause_job as _cron_pause,
resume_job as _cron_resume,
trigger_job as _cron_trigger,
)
# Wrap as staticmethod to prevent descriptor binding — these are plain
# module functions, not instance methods. Without this, self._cron_*()
# injects ``self`` as the first positional argument and every call
# raises TypeError.
_cron_list = staticmethod(_cron_list)
_cron_get = staticmethod(_cron_get)
_cron_create = staticmethod(_cron_create)
_cron_update = staticmethod(_cron_update)
_cron_remove = staticmethod(_cron_remove)
_cron_pause = staticmethod(_cron_pause)
_cron_resume = staticmethod(_cron_resume)
_cron_trigger = staticmethod(_cron_trigger)
_CRON_AVAILABLE = True
except ImportError:
pass
_JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
# Allowed fields for update — prevents clients injecting arbitrary keys
_UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
_MAX_NAME_LENGTH = 200
_MAX_PROMPT_LENGTH = 5000
@staticmethod
def _check_jobs_available() -> Optional["web.Response"]:
def _check_jobs_available(self) -> Optional["web.Response"]:
"""Return error response if cron module isn't available."""
if not _CRON_AVAILABLE:
if not self._CRON_AVAILABLE:
return web.json_response(
{"error": "Cron module not available"}, status=501,
)
@@ -2004,7 +1744,7 @@ class APIServerAdapter(BasePlatformAdapter):
return cron_err
try:
include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
jobs = _cron_list(include_disabled=include_disabled)
jobs = self._cron_list(include_disabled=include_disabled)
return web.json_response({"jobs": jobs})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -2052,7 +1792,7 @@ class APIServerAdapter(BasePlatformAdapter):
if repeat is not None:
kwargs["repeat"] = repeat
job = _cron_create(**kwargs)
job = self._cron_create(**kwargs)
return web.json_response({"job": job})
except Exception as e:
return web.json_response({"error": str(e)}, status=500)
@@ -2069,7 +1809,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_get(job_id)
job = self._cron_get(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2102,7 +1842,7 @@ class APIServerAdapter(BasePlatformAdapter):
return web.json_response(
{"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
)
job = _cron_update(job_id, sanitized)
job = self._cron_update(job_id, sanitized)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2121,7 +1861,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
success = _cron_remove(job_id)
success = self._cron_remove(job_id)
if not success:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"ok": True})
@@ -2140,7 +1880,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_pause(job_id)
job = self._cron_pause(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2159,7 +1899,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_resume(job_id)
job = self._cron_resume(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2178,7 +1918,7 @@ class APIServerAdapter(BasePlatformAdapter):
if id_err:
return id_err
try:
job = _cron_trigger(job_id)
job = self._cron_trigger(job_id)
if not job:
return web.json_response({"error": "Job not found"}, status=404)
return web.json_response({"job": job})
@@ -2445,7 +2185,6 @@ class APIServerAdapter(BasePlatformAdapter):
stream_delta_callback=_text_cb,
tool_progress_callback=event_cb,
)
self._active_run_agents[run_id] = agent
def _run_sync():
r = agent.run_conversation(
user_message=user_message,
@@ -2485,11 +2224,8 @@ class APIServerAdapter(BasePlatformAdapter):
q.put_nowait(None)
except Exception:
pass
self._active_run_agents.pop(run_id, None)
self._active_run_tasks.pop(run_id, None)
task = asyncio.create_task(_run_and_close())
self._active_run_tasks[run_id] = task
try:
self._background_tasks.add(task)
except TypeError:
@@ -2548,44 +2284,6 @@ class APIServerAdapter(BasePlatformAdapter):
return response
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
auth_err = self._check_auth(request)
if auth_err:
return auth_err
run_id = request.match_info["run_id"]
agent = self._active_run_agents.get(run_id)
task = self._active_run_tasks.get(run_id)
if agent is None and task is None:
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
if agent is not None:
try:
agent.interrupt("Stop requested via API")
except Exception:
pass
if task is not None and not task.done():
task.cancel()
# Bounded wait: run_conversation() executes in the default
# executor thread which task.cancel() cannot preempt — we rely on
# agent.interrupt() above to break the loop. Cap the wait so a
# slow/unresponsive interrupt can't hang this handler.
try:
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
except asyncio.TimeoutError:
logger.warning(
"[api_server] stop for run %s timed out after 5s; "
"agent may still be finishing the current step",
run_id,
)
except (asyncio.CancelledError, Exception):
pass
return web.json_response({"run_id": run_id, "status": "stopping"})
async def _sweep_orphaned_runs(self) -> None:
"""Periodically clean up run streams that were never consumed."""
while True:
@@ -2600,8 +2298,6 @@ class APIServerAdapter(BasePlatformAdapter):
logger.debug("[api_server] sweeping orphaned run %s", run_id)
self._run_streams.pop(run_id, None)
self._run_streams_created.pop(run_id, None)
self._active_run_agents.pop(run_id, None)
self._active_run_tasks.pop(run_id, None)
# ------------------------------------------------------------------
# BasePlatformAdapter interface
@@ -2637,7 +2333,6 @@ class APIServerAdapter(BasePlatformAdapter):
# Structured event streaming
self._app.router.add_post("/v1/runs", self._handle_runs)
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
# Start background sweep to clean up orphaned (unconsumed) run streams
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
try:
+60 -720
View File
File diff suppressed because it is too large Load Diff
+3 -20
View File
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
def check_bluebubbles_requirements() -> bool:
try:
import aiohttp # noqa: F401
import httpx # noqa: F401
import httpx as _httpx # noqa: F401
except ImportError:
return False
return True
@@ -99,7 +99,6 @@ def _normalize_server_url(raw: str) -> str:
class BlueBubblesAdapter(BasePlatformAdapter):
platform = Platform.BLUEBUBBLES
SUPPORTS_MESSAGE_EDITING = False
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
def __init__(self, config: PlatformConfig):
@@ -392,13 +391,6 @@ class BlueBubblesAdapter(BasePlatformAdapter):
# Text sending
# ------------------------------------------------------------------
@staticmethod
def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
# Use the base splitter but skip pagination indicators — iMessage
# bubbles flow naturally without "(1/3)" suffixes.
chunks = BasePlatformAdapter.truncate_message(content, max_length)
return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
async def send(
self,
chat_id: str,
@@ -406,19 +398,10 @@ class BlueBubblesAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
text = self.format_message(content)
text = strip_markdown(content or "")
if not text:
return SendResult(success=False, error="BlueBubbles send requires text")
# Split on paragraph breaks first (double newlines) so each thought
# becomes its own iMessage bubble, then truncate any that are still
# too long.
paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
chunks: List[str] = []
for para in (paragraphs or [text]):
if len(para) <= self.MAX_MESSAGE_LENGTH:
chunks.append(para)
else:
chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
last = SendResult(success=True)
for chunk in chunks:
guid = await self._resolve_chat_guid(chat_id)
+115 -367
View File
@@ -23,7 +23,6 @@ from typing import Callable, Dict, Optional, Any
logger = logging.getLogger(__name__)
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
try:
import discord
@@ -499,7 +498,6 @@ class DiscordAdapter(BasePlatformAdapter):
self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering
# Voice channel state (per-guild)
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave
# Text batching: merge rapid successive messages (Telegram-style)
self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
@@ -528,7 +526,6 @@ class DiscordAdapter(BasePlatformAdapter):
# Reply threading mode: "off" (no replies), "first" (reply on first
# chunk only, default), "all" (reply-reference on every chunk).
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
self._slash_commands: bool = self.config.extra.get("slash_commands", True)
async def connect(self) -> bool:
"""Connect to Discord and start receiving events."""
@@ -543,6 +540,7 @@ class DiscordAdapter(BasePlatformAdapter):
# ctypes.util.find_library fails on macOS with Homebrew-installed libs,
# so fall back to known Homebrew paths if needed.
if not opus_path:
import sys
_homebrew_paths = (
"/opt/homebrew/lib/libopus.dylib", # Apple Silicon
"/usr/local/lib/libopus.dylib", # Intel Mac
@@ -638,15 +636,6 @@ class DiscordAdapter(BasePlatformAdapter):
@self._client.event
async def on_message(message: DiscordMessage):
# Block until _resolve_allowed_usernames has swapped
# any raw usernames in DISCORD_ALLOWED_USERS for numeric
# IDs (otherwise on_message's author.id lookup can miss).
if not adapter_self._ready_event.is_set():
try:
await asyncio.wait_for(adapter_self._ready_event.wait(), timeout=30.0)
except asyncio.TimeoutError:
pass
# Dedup: Discord RESUME replays events after reconnects (#4777)
if adapter_self._dedup.is_duplicate(str(message.id)):
return
@@ -746,8 +735,7 @@ class DiscordAdapter(BasePlatformAdapter):
)
# Register slash commands
if self._slash_commands:
self._register_slash_commands()
self._register_slash_commands()
# Start the bot in background
self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -803,27 +791,8 @@ class DiscordAdapter(BasePlatformAdapter):
if not self._client:
return
try:
sync_policy = self._get_discord_command_sync_policy()
if sync_policy == "off":
logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
return
if sync_policy == "bulk":
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
return
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
logger.info(
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
self.name,
summary["total"],
summary["unchanged"],
summary["updated"],
summary["recreated"],
summary["created"],
summary["deleted"],
)
synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
except asyncio.TimeoutError:
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
except asyncio.CancelledError:
@@ -831,183 +800,6 @@ class DiscordAdapter(BasePlatformAdapter):
except Exception as e: # pragma: no cover - defensive logging
logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)
def _get_discord_command_sync_policy(self) -> str:
raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
if raw in _DISCORD_COMMAND_SYNC_POLICIES:
return raw
if raw:
logger.warning(
"[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
self.name,
raw,
)
return "safe"
def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Reduce command payloads to the semantic fields Hermes manages."""
contexts = payload.get("contexts")
integration_types = payload.get("integration_types")
return {
"type": int(payload.get("type", 1) or 1),
"name": str(payload.get("name", "") or ""),
"description": str(payload.get("description", "") or ""),
"default_member_permissions": self._normalize_permissions(
payload.get("default_member_permissions")
),
"dm_permission": bool(payload.get("dm_permission", True)),
"nsfw": bool(payload.get("nsfw", False)),
"contexts": sorted(int(c) for c in contexts) if contexts else None,
"integration_types": (
sorted(int(i) for i in integration_types) if integration_types else None
),
"options": [
self._canonicalize_app_command_option(item)
for item in payload.get("options", []) or []
if isinstance(item, dict)
],
}
@staticmethod
def _normalize_permissions(value: Any) -> Optional[str]:
"""Discord emits default_member_permissions as str server-side but discord.py
sets it as int locally. Normalize to str-or-None so the comparison is stable."""
if value is None:
return None
return str(value)
def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
"""Build a canonical-ready dict from an AppCommand.
discord.py's AppCommand.to_dict() does NOT include nsfw,
dm_permission, or default_member_permissions (they live only on the
attributes). Pull them from the attributes so the canonicalizer sees
the real server-side values instead of defaults otherwise any
command using non-default permissions would diff on every startup.
"""
payload = dict(command.to_dict())
nsfw = getattr(command, "nsfw", None)
if nsfw is not None:
payload["nsfw"] = bool(nsfw)
guild_only = getattr(command, "guild_only", None)
if guild_only is not None:
payload["dm_permission"] = not bool(guild_only)
default_permissions = getattr(command, "default_member_permissions", None)
if default_permissions is not None:
payload["default_member_permissions"] = getattr(
default_permissions, "value", default_permissions
)
return payload
def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
return {
"type": int(payload.get("type", 0) or 0),
"name": str(payload.get("name", "") or ""),
"description": str(payload.get("description", "") or ""),
"required": bool(payload.get("required", False)),
"autocomplete": bool(payload.get("autocomplete", False)),
"choices": [
{
"name": str(choice.get("name", "") or ""),
"value": choice.get("value"),
}
for choice in payload.get("choices", []) or []
if isinstance(choice, dict)
],
"channel_types": list(payload.get("channel_types", []) or []),
"min_value": payload.get("min_value"),
"max_value": payload.get("max_value"),
"min_length": payload.get("min_length"),
"max_length": payload.get("max_length"),
"options": [
self._canonicalize_app_command_option(item)
for item in payload.get("options", []) or []
if isinstance(item, dict)
],
}
def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Fields supported by discord.py's edit_global_command route."""
canonical = self._canonicalize_app_command_payload(payload)
return {
"name": canonical["name"],
"description": canonical["description"],
"options": canonical["options"],
}
async def _safe_sync_slash_commands(self) -> Dict[str, int]:
"""Diff existing global commands and only mutate the commands that changed."""
if not self._client:
return {
"total": 0,
"unchanged": 0,
"updated": 0,
"recreated": 0,
"created": 0,
"deleted": 0,
}
tree = self._client.tree
app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
if not app_id:
raise RuntimeError("Discord application ID is unavailable for slash command sync")
desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
desired_by_key = {
(int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
for payload in desired_payloads
}
existing_commands = await tree.fetch_commands()
existing_by_key = {
(
int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
str(command.name or "").lower(),
): command
for command in existing_commands
}
unchanged = 0
updated = 0
recreated = 0
created = 0
deleted = 0
http = self._client.http
for key, desired in desired_by_key.items():
current = existing_by_key.pop(key, None)
if current is None:
await http.upsert_global_command(app_id, desired)
created += 1
continue
current_existing_payload = self._existing_command_to_payload(current)
current_payload = self._canonicalize_app_command_payload(current_existing_payload)
desired_payload = self._canonicalize_app_command_payload(desired)
if current_payload == desired_payload:
unchanged += 1
continue
if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
await http.delete_global_command(app_id, current.id)
await http.upsert_global_command(app_id, desired)
recreated += 1
continue
await http.edit_global_command(app_id, current.id, desired)
updated += 1
for current in existing_by_key.values():
await http.delete_global_command(app_id, current.id)
deleted += 1
return {
"total": len(desired_payloads),
"unchanged": unchanged,
"updated": updated,
"recreated": recreated,
"created": created,
"deleted": deleted,
}
async def _add_reaction(self, message: Any, emoji: str) -> bool:
"""Add an emoji reaction to a Discord message."""
if not message or not hasattr(message, "add_reaction"):
@@ -1279,8 +1071,6 @@ class DiscordAdapter(BasePlatformAdapter):
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""Edit a previously sent Discord message."""
if not self._client:
@@ -1447,53 +1237,51 @@ class DiscordAdapter(BasePlatformAdapter):
return False
guild_id = channel.guild.id
async with self._voice_locks.setdefault(guild_id, asyncio.Lock()):
# Already connected in this guild?
existing = self._voice_clients.get(guild_id)
if existing and existing.is_connected():
if existing.channel.id == channel.id:
self._reset_voice_timeout(guild_id)
return True
await existing.move_to(channel)
# Already connected in this guild?
existing = self._voice_clients.get(guild_id)
if existing and existing.is_connected():
if existing.channel.id == channel.id:
self._reset_voice_timeout(guild_id)
return True
vc = await channel.connect()
self._voice_clients[guild_id] = vc
await existing.move_to(channel)
self._reset_voice_timeout(guild_id)
# Start voice receiver (Phase 2: listen to users)
try:
receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
receiver.start()
self._voice_receivers[guild_id] = receiver
self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
self._voice_listen_loop(guild_id)
)
except Exception as e:
logger.warning("Voice receiver failed to start: %s", e)
return True
vc = await channel.connect()
self._voice_clients[guild_id] = vc
self._reset_voice_timeout(guild_id)
# Start voice receiver (Phase 2: listen to users)
try:
receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
receiver.start()
self._voice_receivers[guild_id] = receiver
self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
self._voice_listen_loop(guild_id)
)
except Exception as e:
logger.warning("Voice receiver failed to start: %s", e)
return True
async def leave_voice_channel(self, guild_id: int) -> None:
"""Disconnect from the voice channel in a guild."""
async with self._voice_locks.setdefault(guild_id, asyncio.Lock()):
# Stop voice receiver first
receiver = self._voice_receivers.pop(guild_id, None)
if receiver:
receiver.stop()
listen_task = self._voice_listen_tasks.pop(guild_id, None)
if listen_task:
listen_task.cancel()
# Stop voice receiver first
receiver = self._voice_receivers.pop(guild_id, None)
if receiver:
receiver.stop()
listen_task = self._voice_listen_tasks.pop(guild_id, None)
if listen_task:
listen_task.cancel()
vc = self._voice_clients.pop(guild_id, None)
if vc and vc.is_connected():
await vc.disconnect()
task = self._voice_timeout_tasks.pop(guild_id, None)
if task:
task.cancel()
self._voice_text_channels.pop(guild_id, None)
self._voice_sources.pop(guild_id, None)
vc = self._voice_clients.pop(guild_id, None)
if vc and vc.is_connected():
await vc.disconnect()
task = self._voice_timeout_tasks.pop(guild_id, None)
if task:
task.cancel()
self._voice_text_channels.pop(guild_id, None)
self._voice_sources.pop(guild_id, None)
# Maximum seconds to wait for voice playback before giving up
PLAYBACK_TIMEOUT = 120
@@ -1620,7 +1408,8 @@ class DiscordAdapter(BasePlatformAdapter):
speaking_user_ids: set = set()
receiver = self._voice_receivers.get(guild_id)
if receiver:
now = time.monotonic()
import time as _time
now = _time.monotonic()
with receiver._lock:
for ssrc, last_t in receiver._last_packet_time.items():
# Consider "speaking" if audio received within last 2 seconds
@@ -2246,6 +2035,10 @@ class DiscordAdapter(BasePlatformAdapter):
async def slash_usage(interaction: discord.Interaction):
await self._run_simple_slash(interaction, "/usage")
@tree.command(name="provider", description="Show available providers")
async def slash_provider(interaction: discord.Interaction):
await self._run_simple_slash(interaction, "/provider")
@tree.command(name="help", description="Show available commands")
async def slash_help(interaction: discord.Interaction):
await self._run_simple_slash(interaction, "/help")
@@ -2315,46 +2108,19 @@ class DiscordAdapter(BasePlatformAdapter):
async def slash_background(interaction: discord.Interaction, prompt: str):
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
@tree.command(name="btw", description="Ephemeral side question using session context")
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
async def slash_btw(interaction: discord.Interaction, question: str):
await self._run_simple_slash(interaction, f"/btw {question}")
# ── Auto-register any gateway-available commands not yet on the tree ──
# This ensures new commands added to COMMAND_REGISTRY in
# hermes_cli/commands.py automatically appear as Discord slash
# commands without needing a manual entry here.
def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
"""Build a discord.app_commands.Command that proxies to _run_simple_slash."""
discord_name = _name.lower()[:32]
desc = (_description or f"Run /{_name}")[:100]
has_args = bool(_args_hint)
if has_args:
def _make_args_handler(__name: str, __hint: str):
@discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
async def _handler(interaction: discord.Interaction, args: str = ""):
await self._run_simple_slash(
interaction, f"/{__name} {args}".strip()
)
_handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
return _handler
handler = _make_args_handler(_name, _args_hint)
else:
def _make_simple_handler(__name: str):
async def _handler(interaction: discord.Interaction):
await self._run_simple_slash(interaction, f"/{__name}")
_handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
return _handler
handler = _make_simple_handler(_name)
return discord.app_commands.Command(
name=discord_name,
description=desc,
callback=handler,
)
already_registered: set[str] = set()
try:
from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates
already_registered = set()
try:
already_registered = {cmd.name for cmd in tree.get_commands()}
except Exception:
@@ -2369,10 +2135,38 @@ class DiscordAdapter(BasePlatformAdapter):
discord_name = cmd_def.name.lower()[:32]
if discord_name in already_registered:
continue
auto_cmd = _build_auto_slash_command(
cmd_def.name,
cmd_def.description,
cmd_def.args_hint,
# Skip aliases that overlap with already-registered names
# (aliases for explicitly registered commands are handled above).
desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
has_args = bool(cmd_def.args_hint)
if has_args:
# Command takes optional arguments — create handler with
# an optional ``args`` string parameter.
def _make_args_handler(_name: str, _hint: str):
@discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
async def _handler(interaction: discord.Interaction, args: str = ""):
await self._run_simple_slash(
interaction, f"/{_name} {args}".strip()
)
_handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
return _handler
handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
else:
# Parameterless command.
def _make_simple_handler(_name: str):
async def _handler(interaction: discord.Interaction):
await self._run_simple_slash(interaction, f"/{_name}")
_handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
return _handler
handler = _make_simple_handler(cmd_def.name)
auto_cmd = discord.app_commands.Command(
name=discord_name,
description=desc,
callback=handler,
)
try:
tree.add_command(auto_cmd)
@@ -2389,35 +2183,6 @@ class DiscordAdapter(BasePlatformAdapter):
except Exception as e:
logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)
# ── Plugin-registered slash commands ──
# Plugins register via PluginContext.register_command(); we mirror
# those into Discord's native slash picker so users get the same
# autocomplete UX as for built-in commands. No per-platform plugin
# API needed — plugin commands are platform-agnostic.
try:
from hermes_cli.commands import _iter_plugin_command_entries
for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
discord_name = plugin_name.lower()[:32]
if discord_name in already_registered:
continue
auto_cmd = _build_auto_slash_command(
plugin_name,
plugin_desc,
plugin_args_hint,
)
try:
tree.add_command(auto_cmd)
already_registered.add(discord_name)
except Exception:
# Silently skip commands that fail registration (e.g.
# name conflict with a subcommand group).
pass
except Exception as e:
logger.warning(
"Discord auto-register from plugin commands failed: %s", e
)
# Register skills under a single /skill command group with category
# subcommand groups. This uses 1 top-level slot instead of N,
# supporting up to 25 categories × 25 skills = 625 skills.
@@ -2679,8 +2444,21 @@ class DiscordAdapter(BasePlatformAdapter):
skills: ["skill-a", "skill-b"]
Also checks parent_id so forum threads inherit the forum's bindings.
"""
from gateway.platforms.base import resolve_channel_skills
return resolve_channel_skills(self.config.extra, channel_id, parent_id)
bindings = self.config.extra.get("channel_skill_bindings", [])
if not bindings:
return None
ids_to_check = {channel_id}
if parent_id:
ids_to_check.add(parent_id)
for entry in bindings:
entry_id = str(entry.get("id", ""))
if entry_id in ids_to_check:
skills = entry.get("skills") or entry.get("skill")
if isinstance(skills, str):
return [skills]
if isinstance(skills, list) and skills:
return list(dict.fromkeys(skills)) # dedup, preserve order
return None
def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
"""Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
@@ -2697,12 +2475,7 @@ class DiscordAdapter(BasePlatformAdapter):
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
def _discord_free_response_channels(self) -> set:
"""Return Discord channel IDs where no bot mention is required.
A single ``"*"`` entry (either from a list or a comma-separated
string) is preserved in the returned set so callers can short-circuit
on wildcard membership, consistent with ``allowed_channels``.
"""
"""Return Discord channel IDs where no bot mention is required."""
raw = self.config.extra.get("free_response_channels")
if raw is None:
raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
@@ -3175,17 +2948,6 @@ class DiscordAdapter(BasePlatformAdapter):
parent_channel_id = self._get_parent_channel_id(message.channel)
is_voice_linked_channel = False
# Save mention-stripped text before auto-threading since create_thread()
# can clobber message.content, breaking /command detection in channels.
raw_content = message.content.strip()
normalized_content = raw_content
mention_prefix = False
if self._client.user and self._client.user in message.mentions:
mention_prefix = True
normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
message.content = normalized_content
if not isinstance(message.channel, discord.DMChannel):
channel_ids = {str(message.channel.id)}
if parent_channel_id:
@@ -3195,14 +2957,14 @@ class DiscordAdapter(BasePlatformAdapter):
allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
if allowed_channels_raw:
allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
if "*" not in allowed_channels and not (channel_ids & allowed_channels):
if not (channel_ids & allowed_channels):
logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
return
# Check ignored channels - never respond even when mentioned
ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
if "*" in ignored_channels or (channel_ids & ignored_channels):
if channel_ids & ignored_channels:
logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
return
@@ -3216,19 +2978,20 @@ class DiscordAdapter(BasePlatformAdapter):
voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
current_channel_id = str(message.channel.id)
is_voice_linked_channel = current_channel_id in voice_linked_ids
is_free_channel = (
"*" in free_channels
or bool(channel_ids & free_channels)
or is_voice_linked_channel
)
is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel
# Skip the mention check if the message is in a thread where
# the bot has previously participated (auto-created or replied in).
in_bot_thread = is_thread and thread_id in self._threads
if require_mention and not is_free_channel and not in_bot_thread:
if self._client.user not in message.mentions and not mention_prefix:
if self._client.user not in message.mentions:
return
if self._client.user and self._client.user in message.mentions:
message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
# Auto-thread: when enabled, automatically create a thread for every
# @mention in a text channel so each conversation is isolated (like Slack).
# Messages already inside threads or DMs are unaffected.
@@ -3243,7 +3006,6 @@ class DiscordAdapter(BasePlatformAdapter):
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
thread = await self._auto_create_thread(message)
if thread:
parent_channel_id = str(message.channel.id)
is_thread = True
thread_id = str(thread.id)
auto_threaded_channel = thread
@@ -3251,7 +3013,7 @@ class DiscordAdapter(BasePlatformAdapter):
# Determine message type
msg_type = MessageType.TEXT
if normalized_content.startswith("/"):
if message.content.startswith("/"):
msg_type = MessageType.COMMAND
elif message.attachments:
# Check attachment types
@@ -3294,7 +3056,6 @@ class DiscordAdapter(BasePlatformAdapter):
chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)
# Build source
guild = getattr(message, "guild", None)
source = self.build_source(
chat_id=str(effective_channel.id),
chat_name=chat_name,
@@ -3304,9 +3065,6 @@ class DiscordAdapter(BasePlatformAdapter):
thread_id=thread_id,
chat_topic=chat_topic,
is_bot=getattr(message.author, "bot", False),
guild_id=str(guild.id) if guild else None,
parent_chat_id=parent_channel_id,
message_id=str(message.id),
)
# Build media URLs -- download image attachments to local cache so the
@@ -3395,9 +3153,7 @@ class DiscordAdapter(BasePlatformAdapter):
att.filename, e, exc_info=True,
)
# Use normalized_content (saved before auto-threading) instead of message.content,
# to detect /slash commands in channel messages.
event_text = normalized_content
event_text = message.content
if pending_text_injection:
event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection
@@ -3858,15 +3614,6 @@ if DISCORD_AVAILABLE:
self.resolved = True
model_id = interaction.data["values"][0]
self.clear_items()
await interaction.response.edit_message(
embed=discord.Embed(
title="⚙ Switching Model",
description=f"Switching to `{model_id}`...",
color=discord.Color.blue(),
),
view=None,
)
try:
result_text = await self.on_model_selected(
@@ -3877,13 +3624,14 @@ if DISCORD_AVAILABLE:
except Exception as exc:
result_text = f"Error switching model: {exc}"
await interaction.edit_original_response(
self.clear_items()
await interaction.response.edit_message(
embed=discord.Embed(
title="⚙ Model Switched",
description=result_text,
color=discord.Color.green(),
),
view=None,
view=self,
)
async def _on_back(self, interaction: discord.Interaction):
-4
View File
@@ -28,7 +28,6 @@ from email.header import decode_header
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email.utils import formatdate
from email import encoders
from pathlib import Path
from typing import Any, Dict, List, Optional
@@ -505,7 +504,6 @@ class EmailAdapter(BasePlatformAdapter):
msg["In-Reply-To"] = original_msg_id
msg["References"] = original_msg_id
msg["Date"] = formatdate(localtime=True)
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
msg["Message-ID"] = msg_id
@@ -547,7 +545,6 @@ class EmailAdapter(BasePlatformAdapter):
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send a file as an email attachment."""
try:
@@ -588,7 +585,6 @@ class EmailAdapter(BasePlatformAdapter):
msg["In-Reply-To"] = original_msg_id
msg["References"] = original_msg_id
msg["Date"] = formatdate(localtime=True)
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
msg["Message-ID"] = msg_id
File diff suppressed because it is too large Load Diff
-9
View File
@@ -57,15 +57,6 @@ class MessageDeduplicator:
if len(self._seen) > self._max_size:
cutoff = now - self._ttl
self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
if len(self._seen) > self._max_size:
# TTL pruning alone does not cap the cache when every entry is
# still fresh. Keep the newest entries so the helper's
# max_size bound is enforced under sustained traffic.
newest = sorted(
self._seen.items(),
key=lambda item: item[1],
)[-self._max_size:]
self._seen = dict(newest)
return False
def clear(self):
+4 -88
View File
@@ -532,20 +532,6 @@ class MatrixAdapter(BasePlatformAdapter):
)
await crypto_store.open()
# Bind the store to the runtime device_id before any
# put_account() runs. PgCryptoStore defaults _device_id
# to "" and its crypto_account UPSERT never updates the
# device_id column on conflict — so once put_account
# writes blank, it stays blank forever. That breaks
# every downstream device-scoped olm operation: peer
# to-device ciphertext can't find our identity key and
# no megolm sessions ever land. Setting _device_id here
# (in-memory; the on-disk row may not exist yet) makes
# the first put_account write the correct value.
# DeviceID is a NewType(str) so plain str works at runtime.
if client.device_id:
await crypto_store.put_device_id(client.device_id)
crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
olm = OlmMachine(client, crypto_store, crypto_state)
@@ -839,7 +825,7 @@ class MatrixAdapter(BasePlatformAdapter):
async def edit_message(
self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
self, chat_id: str, message_id: str, content: str
) -> SendResult:
"""Edit an existing message (via m.replace)."""
@@ -1178,83 +1164,13 @@ class MatrixAdapter(BasePlatformAdapter):
# Event callbacks
# ------------------------------------------------------------------
def _is_self_sender(self, sender: str) -> bool:
"""Return True if the sender refers to the bot's own account.
Matrix user IDs are byte-compared after trimming whitespace and
lowercasing some homeservers normalize the localpart case
differently at different API surfaces, and the reply-loop tail
of the "hall of mirrors" bug (#15763) has been observed with the
bot's own account bypassing a case-sensitive equality check.
When ``self._user_id`` is empty (whoami hasn't resolved yet, or
login failed), we cannot prove a sender is NOT us, so we return
True defensively an unidentified bot dropping its own events
is always preferable to falling into an echo loop.
"""
own = (self._user_id or "").strip().lower()
if not own:
return True
return sender.strip().lower() == own
@staticmethod
def _is_system_or_bridge_sender(sender: str) -> bool:
"""Return True if the sender looks like a system / bridge / appservice
identity rather than a real user.
Appservice namespaces on Matrix conventionally prefix bot / puppet
user IDs with an underscore (e.g. ``@_telegram_12345:server``,
``@_discord_999:server``, ``@_slack_...:server``). Server-notices
bots and bridge-controller bots on many homeservers use the same
pattern.
We treat these as system identities for pairing purposes: they
should never be offered a pairing code, because an operator
approving the code would hand the bridge itself permanent
authorization and every outbound message relayed by the bridge
would then loop back into the agent as an "authorized user
message", which is the root of issue #15763.
Matches:
``@_something:server`` appservice namespace convention
``@:server`` malformed / empty localpart
``:server`` malformed, no leading ``@``
"""
s = (sender or "").strip()
if not s:
return True
# Localpart is everything between leading '@' and ':'
if s.startswith("@"):
s = s[1:]
if ":" in s:
localpart, _, _ = s.partition(":")
else:
localpart = s
if not localpart:
return True
return localpart.startswith("_")
async def _on_room_message(self, event: Any) -> None:
"""Handle incoming room message events (text, media)."""
room_id = str(getattr(event, "room_id", ""))
sender = str(getattr(event, "sender", ""))
# Ignore own messages (case-insensitive; also drops when our own
# user_id hasn't been resolved yet — see _is_self_sender docstring
# and issue #15763).
if self._is_self_sender(sender):
return
# Ignore appservice / bridge / system identities so they never
# trigger the pairing flow. Once a bridge user is paired, every
# outbound message it relays would loop back as an authorized
# user message (the "hall of mirrors" in #15763).
if self._is_system_or_bridge_sender(sender):
logger.debug(
"Matrix: ignoring system/bridge sender %s in %s",
sender,
room_id,
)
# Ignore own messages.
if sender == self._user_id:
return
# Deduplicate by event ID.
@@ -1724,7 +1640,7 @@ class MatrixAdapter(BasePlatformAdapter):
async def _on_reaction(self, event: Any) -> None:
"""Handle incoming reaction events."""
sender = str(getattr(event, "sender", ""))
if self._is_self_sender(sender):
if sender == self._user_id:
return
event_id = str(getattr(event, "event_id", ""))
if self._is_duplicate_event(event_id):
+2 -1
View File
@@ -304,7 +304,7 @@ class MattermostAdapter(BasePlatformAdapter):
)
async def edit_message(
self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
self, chat_id: str, message_id: str, content: str
) -> SendResult:
"""Edit an existing post."""
formatted = self.format_message(content)
@@ -410,6 +410,7 @@ class MattermostAdapter(BasePlatformAdapter):
logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
import asyncio
import aiohttp
last_exc = None
+4 -2
View File
@@ -26,8 +26,9 @@ from .adapter import ( # noqa: F401
# -- Onboard (QR-code scan-to-configure) -----------------------------------
from .onboard import ( # noqa: F401
BindStatus,
create_bind_task,
poll_bind_result,
build_connect_url,
qr_register,
)
from .crypto import decrypt_secret, generate_bind_key # noqa: F401
@@ -43,8 +44,9 @@ __all__ = [
"_ssrf_redirect_guard",
# onboard
"BindStatus",
"create_bind_task",
"poll_bind_result",
"build_connect_url",
"qr_register",
# crypto
"decrypt_secret",
"generate_bind_key",
+8 -6
View File
@@ -535,9 +535,6 @@ class QQAdapter(BasePlatformAdapter):
quick_disconnect_count = 0
else:
backoff_idx += 1
if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
return
except Exception as exc:
if not self._running:
@@ -1089,8 +1086,11 @@ class QQAdapter(BasePlatformAdapter):
return MessageType.VIDEO
if "image" in first_type or "photo" in first_type:
return MessageType.PHOTO
# Unknown content type with an attachment — don't assume PHOTO
# to prevent non-image files from being sent to vision analysis.
logger.debug(
"Unknown media content_type '%s', defaulting to TEXT",
"[%s] Unknown media content_type '%s', defaulting to TEXT",
self._log_tag,
first_type,
)
return MessageType.TEXT
@@ -1826,12 +1826,14 @@ class QQAdapter(BasePlatformAdapter):
body["file_name"] = file_name
# Retry transient upload failures
last_exc = None
for attempt in range(3):
try:
return await self._api_request(
"POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
)
except RuntimeError as exc:
last_exc = exc
err_msg = str(exc)
if any(
kw in err_msg
@@ -1840,8 +1842,8 @@ class QQAdapter(BasePlatformAdapter):
raise
if attempt < 2:
await asyncio.sleep(1.5 * (attempt + 1))
else:
raise
raise last_exc # type: ignore[misc]
# Maximum time (seconds) to wait for reconnection before giving up on send.
_RECONNECT_WAIT_SECONDS = 15.0
+21 -117
View File
@@ -1,10 +1,6 @@
"""
QQBot scan-to-configure (QR code onboard) module.
Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
entry-point ``qr_register()`` that handles the full flow (create task
display QR code poll decrypt credentials).
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
generate a QR-code URL and poll for scan completion. On success the caller
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -16,20 +12,18 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
from __future__ import annotations
import logging
import time
from enum import IntEnum
from typing import Optional, Tuple
from typing import Tuple
from urllib.parse import quote
from .constants import (
ONBOARD_API_TIMEOUT,
ONBOARD_CREATE_PATH,
ONBOARD_POLL_INTERVAL,
ONBOARD_POLL_PATH,
PORTAL_HOST,
QR_URL_TEMPLATE,
)
from .crypto import decrypt_secret, generate_bind_key
from .crypto import generate_bind_key
from .utils import get_api_headers
logger = logging.getLogger(__name__)
@@ -41,7 +35,7 @@ logger = logging.getLogger(__name__)
class BindStatus(IntEnum):
"""Status codes returned by ``_poll_bind_result``."""
"""Status codes returned by ``poll_bind_result``."""
NONE = 0
PENDING = 1
@@ -50,40 +44,18 @@ class BindStatus(IntEnum):
# ---------------------------------------------------------------------------
# QR rendering
# ---------------------------------------------------------------------------
try:
import qrcode as _qrcode_mod
except (ImportError, TypeError):
_qrcode_mod = None # type: ignore[assignment]
def _render_qr(url: str) -> bool:
"""Try to render a QR code in the terminal. Returns True if successful."""
if _qrcode_mod is None:
return False
try:
qr = _qrcode_mod.QRCode(
error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
border=2,
)
qr.add_data(url)
qr.make(fit=True)
qr.print_ascii(invert=True)
return True
except Exception:
return False
# ---------------------------------------------------------------------------
# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
# Public API
# ---------------------------------------------------------------------------
def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
async def create_bind_task(
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[str, str]:
"""Create a bind task and return *(task_id, aes_key_base64)*.
The AES key is generated locally and sent to the server so it can
encrypt the bot credentials before returning them.
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
@@ -92,8 +64,8 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
key = generate_bind_key()
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
resp = client.post(url, json={"key": key}, headers=get_api_headers())
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"key": key}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
@@ -108,7 +80,7 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
return task_id, key
def _poll_bind_result(
async def poll_bind_result(
task_id: str,
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[BindStatus, str, str, str]:
@@ -117,6 +89,12 @@ def _poll_bind_result(
Returns:
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
* ``bot_encrypt_secret`` is AES-256-GCM encrypted decrypt it with
:func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
key from :func:`create_bind_task`.
* ``user_openid`` is the OpenID of the person who scanned the code
(available when ``status == COMPLETED``).
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
@@ -124,8 +102,8 @@ def _poll_bind_result(
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
@@ -144,77 +122,3 @@ def _poll_bind_result(
def build_connect_url(task_id: str) -> str:
"""Build the QR-code target URL for a given *task_id*."""
return QR_URL_TEMPLATE.format(task_id=quote(task_id))
# ---------------------------------------------------------------------------
# Public entry-point
# ---------------------------------------------------------------------------
_MAX_REFRESHES = 3
def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
"""Run the QQBot scan-to-configure QR registration flow.
Mirrors ``feishu.qr_register()``: handles create display poll
decrypt in one call. Unexpected errors propagate to the caller.
:returns:
``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
success, or ``None`` on failure / expiry / cancellation.
"""
deadline = time.monotonic() + timeout_seconds
for refresh_count in range(_MAX_REFRESHES + 1):
# ── Create bind task ──
try:
task_id, aes_key = _create_bind_task()
except Exception as exc:
logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
return None
url = build_connect_url(task_id)
# ── Display QR code + URL ──
print()
if _render_qr(url):
print(f" Scan the QR code above, or open this URL directly:\n {url}")
else:
print(f" Open this URL in QQ on your phone:\n {url}")
print(" Tip: pip install qrcode to display a scannable QR code here")
print()
# ── Poll loop ──
while time.monotonic() < deadline:
try:
status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
except Exception:
time.sleep(ONBOARD_POLL_INTERVAL)
continue
if status == BindStatus.COMPLETED:
client_secret = decrypt_secret(encrypted_secret, aes_key)
print()
print(f" QR scan complete! (App ID: {app_id})")
if user_openid:
print(f" Scanner's OpenID: {user_openid}")
return {
"app_id": app_id,
"client_secret": client_secret,
"user_openid": user_openid,
}
if status == BindStatus.EXPIRED:
if refresh_count >= _MAX_REFRESHES:
logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
return None
print(f"\n QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
break # next for-loop iteration creates a new task
time.sleep(ONBOARD_POLL_INTERVAL)
else:
# deadline reached without completing
logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
return None
return None
+18 -114
View File
@@ -18,7 +18,6 @@ import logging
import os
import random
import time
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional, Any
@@ -128,27 +127,6 @@ def _render_mentions(text: str, mentions: list) -> str:
return text
def _is_signal_service_id(value: str) -> bool:
"""Return True if *value* already looks like a Signal service identifier."""
if not value:
return False
if value.startswith("PNI:") or value.startswith("u:"):
return True
try:
uuid.UUID(value)
return True
except (ValueError, AttributeError, TypeError):
return False
def _looks_like_e164_number(value: str) -> bool:
"""Return True for a plausible E.164 phone number."""
if not value or not value.startswith("+"):
return False
digits = value[1:]
return digits.isdigit() and 7 <= len(digits) <= 15
def check_signal_requirements() -> bool:
"""Check if Signal is configured (has URL and account)."""
return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT"))
@@ -201,12 +179,6 @@ class SignalAdapter(BasePlatformAdapter):
# in Note to Self / self-chat mode (mirrors WhatsApp recentlySentIds)
self._recent_sent_timestamps: set = set()
self._max_recent_timestamps = 50
# Signal increasingly exposes ACI/PNI UUIDs as stable recipient IDs.
# Keep a best-effort mapping so outbound sends can upgrade from a
# phone number to the corresponding UUID when signal-cli prefers it.
self._recipient_uuid_by_number: Dict[str, str] = {}
self._recipient_number_by_uuid: Dict[str, str] = {}
self._recipient_cache_lock = asyncio.Lock()
logger.info("Signal adapter initialized: url=%s account=%s groups=%s",
self.http_url, redact_phone(self.account),
@@ -223,40 +195,31 @@ class SignalAdapter(BasePlatformAdapter):
return False
# Acquire scoped lock to prevent duplicate Signal listeners for the same phone
lock_acquired = False
try:
if not self._acquire_platform_lock('signal-phone', self.account, 'Signal account'):
return False
lock_acquired = True
except Exception as e:
logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)
self.client = httpx.AsyncClient(timeout=30.0)
# Health check — verify signal-cli daemon is reachable
try:
# Health check — verify signal-cli daemon is reachable
try:
resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
if resp.status_code != 200:
logger.error("Signal: health check failed (status %d)", resp.status_code)
return False
except Exception as e:
logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
if resp.status_code != 200:
logger.error("Signal: health check failed (status %d)", resp.status_code)
return False
except Exception as e:
logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
return False
self._running = True
self._last_sse_activity = time.time()
self._sse_task = asyncio.create_task(self._sse_listener())
self._health_monitor_task = asyncio.create_task(self._health_monitor())
self._running = True
self._last_sse_activity = time.time()
self._sse_task = asyncio.create_task(self._sse_listener())
self._health_monitor_task = asyncio.create_task(self._health_monitor())
logger.info("Signal: connected to %s", self.http_url)
return True
finally:
if not self._running:
if self.client:
await self.client.aclose()
self.client = None
if lock_acquired:
self._release_platform_lock()
logger.info("Signal: connected to %s", self.http_url)
return True
async def disconnect(self) -> None:
"""Stop SSE listener and clean up."""
@@ -437,7 +400,6 @@ class SignalAdapter(BasePlatformAdapter):
)
sender_name = envelope_data.get("sourceName", "")
sender_uuid = envelope_data.get("sourceUuid", "")
self._remember_recipient_identifiers(sender, sender_uuid)
if not sender:
logger.debug("Signal: ignoring envelope with no sender")
@@ -556,64 +518,6 @@ class SignalAdapter(BasePlatformAdapter):
await self.handle_message(event)
def _remember_recipient_identifiers(self, number: Optional[str], service_id: Optional[str]) -> None:
"""Cache any number↔UUID mapping observed from Signal envelopes."""
if not number or not service_id or not _is_signal_service_id(service_id):
return
self._recipient_uuid_by_number[number] = service_id
self._recipient_number_by_uuid[service_id] = number
def _extract_contact_uuid(self, contact: Any, phone_number: str) -> Optional[str]:
"""Best-effort extraction of a Signal service ID from listContacts output."""
if not isinstance(contact, dict):
return None
number = contact.get("number")
recipient = contact.get("recipient")
service_id = contact.get("uuid") or contact.get("serviceId")
if not service_id:
profile = contact.get("profile")
if isinstance(profile, dict):
service_id = profile.get("serviceId") or profile.get("uuid")
if service_id and _is_signal_service_id(service_id):
matches_number = number == phone_number or recipient == phone_number
if matches_number:
return service_id
return None
async def _resolve_recipient(self, chat_id: str) -> str:
"""Return the preferred Signal recipient identifier for a direct chat."""
if (
not chat_id
or chat_id.startswith("group:")
or _is_signal_service_id(chat_id)
or not _looks_like_e164_number(chat_id)
):
return chat_id
cached = self._recipient_uuid_by_number.get(chat_id)
if cached:
return cached
async with self._recipient_cache_lock:
cached = self._recipient_uuid_by_number.get(chat_id)
if cached:
return cached
contacts = await self._rpc("listContacts", {
"account": self.account,
"allRecipients": True,
})
if isinstance(contacts, list):
for contact in contacts:
number = contact.get("number") if isinstance(contact, dict) else None
service_id = self._extract_contact_uuid(contact, chat_id)
if number and service_id:
self._remember_recipient_identifiers(number, service_id)
return self._recipient_uuid_by_number.get(chat_id, chat_id)
# ------------------------------------------------------------------
# Attachment Handling
# ------------------------------------------------------------------
@@ -729,7 +633,7 @@ class SignalAdapter(BasePlatformAdapter):
if chat_id.startswith("group:"):
params["groupId"] = chat_id[6:]
else:
params["recipient"] = [await self._resolve_recipient(chat_id)]
params["recipient"] = [chat_id]
result = await self._rpc("send", params)
@@ -780,7 +684,7 @@ class SignalAdapter(BasePlatformAdapter):
if chat_id.startswith("group:"):
params["groupId"] = chat_id[6:]
else:
params["recipient"] = [await self._resolve_recipient(chat_id)]
params["recipient"] = [chat_id]
fails = self._typing_failures.get(chat_id, 0)
result = await self._rpc(
@@ -841,7 +745,7 @@ class SignalAdapter(BasePlatformAdapter):
if chat_id.startswith("group:"):
params["groupId"] = chat_id[6:]
else:
params["recipient"] = [await self._resolve_recipient(chat_id)]
params["recipient"] = [chat_id]
result = await self._rpc("send", params)
if result is not None:
@@ -880,7 +784,7 @@ class SignalAdapter(BasePlatformAdapter):
if chat_id.startswith("group:"):
params["groupId"] = chat_id[6:]
else:
params["recipient"] = [await self._resolve_recipient(chat_id)]
params["recipient"] = [chat_id]
result = await self._rpc("send", params)
if result is not None:
File diff suppressed because it is too large Load Diff
+21 -166
View File
@@ -11,7 +11,6 @@ import asyncio
import json
import logging
import os
import tempfile
import html as _html
import re
from typing import Dict, List, Optional, Any
@@ -71,10 +70,8 @@ from gateway.platforms.base import (
SendResult,
cache_image_from_bytes,
cache_audio_from_bytes,
cache_video_from_bytes,
cache_document_from_bytes,
resolve_proxy_url,
SUPPORTED_VIDEO_TYPES,
SUPPORTED_DOCUMENT_TYPES,
utf16_len,
_prefix_within_utf16_limit,
@@ -496,13 +493,6 @@ class TelegramAdapter(BasePlatformAdapter):
"[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
self.name, name, chat_id,
)
elif "not a forum" in error_text or "forums_disabled" in error_text:
logger.warning(
"[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
"The user must open the DM with this bot in Telegram, tap the bot name "
"at the top, and enable 'Topics' in chat settings before topics can be created.",
self.name, name, chat_id,
)
else:
logger.warning(
"[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -544,23 +534,8 @@ class TelegramAdapter(BasePlatformAdapter):
break
if changed:
fd, tmp_path = tempfile.mkstemp(
dir=str(config_path.parent),
suffix=".tmp",
prefix=".config_",
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, config_path)
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
with open(config_path, "w") as f:
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
logger.info(
"[%s] Persisted thread_id=%s for topic '%s' in config.yaml",
self.name, thread_id, topic_name,
@@ -703,6 +678,7 @@ class TelegramAdapter(BasePlatformAdapter):
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
}
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
fallback_ips = self._fallback_ips()
if not fallback_ips:
@@ -713,8 +689,6 @@ class TelegramAdapter(BasePlatformAdapter):
", ".join(fallback_ips),
)
proxy_targets = ["api.telegram.org", *fallback_ips]
proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
if fallback_ips and not proxy_url and not disable_fallback:
logger.info(
"[%s] Telegram fallback IPs active: %s",
@@ -795,28 +769,8 @@ class TelegramAdapter(BasePlatformAdapter):
# Telegram pushes updates to our HTTP endpoint. This
# enables cloud platforms (Fly.io, Railway) to auto-wake
# suspended machines on inbound HTTP traffic.
#
# SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
# python-telegram-bot passes secret_token=None and the
# webhook endpoint accepts any HTTP POST — attackers can
# inject forged updates as if from Telegram. Refuse to
# start rather than silently run in fail-open mode.
# See GHSA-3vpc-7q5r-276h.
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
if not webhook_secret:
raise RuntimeError(
"TELEGRAM_WEBHOOK_SECRET is required when "
"TELEGRAM_WEBHOOK_URL is set. Without it, the "
"webhook endpoint accepts forged updates from "
"anyone who can reach it — see "
"https://github.com/NousResearch/hermes-agent/"
"security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
"Generate a secret and set it in your .env:\n"
" export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
"Then register it with Telegram when setting the "
"webhook via setWebhook's secret_token parameter."
)
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
from urllib.parse import urlparse
webhook_path = urlparse(webhook_url).path or "/telegram"
@@ -1127,8 +1081,6 @@ class TelegramAdapter(BasePlatformAdapter):
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""Edit a previously sent Telegram message."""
if not self._bot:
@@ -1209,31 +1161,6 @@ class TelegramAdapter(BasePlatformAdapter):
)
return SendResult(success=False, error=str(e))
async def delete_message(self, chat_id: str, message_id: str) -> bool:
"""Delete a previously sent Telegram message.
Used by the stream consumer's fresh-final cleanup path (ported
from openclaw/openclaw#72038) to remove long-lived preview
messages after sending the completed reply as a fresh message.
Telegram's Bot API ``deleteMessage`` works for bot-posted
messages in the last 48 hours. Failures are non-fatal the
caller leaves the preview in place and logs at debug level.
"""
if not self._bot:
return False
try:
await self._bot.delete_message(
chat_id=int(chat_id),
message_id=int(message_id),
)
return True
except Exception as e:
logger.debug(
"[%s] Failed to delete Telegram message %s: %s",
self.name, message_id, e,
)
return False
async def send_update_prompt(
self, chat_id: str, prompt: str, default: str = "",
session_key: str = "",
@@ -1730,21 +1657,6 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as exc:
logger.error("Failed to write update response from callback: %s", exc)
def _missing_media_path_error(self, label: str, path: str) -> str:
"""Build an actionable file-not-found error for gateway MEDIA delivery.
Paths like /workspace/... or /output/... often only exist inside the
Docker sandbox, while the gateway process runs on the host.
"""
error = f"{label} file not found: {path}"
if path.startswith(("/workspace/", "/output/", "/outputs/")):
error += (
" (path may only exist inside the Docker sandbox. "
"Bind-mount a host directory and emit the host-visible "
"path in MEDIA: for gateway file delivery.)"
)
return error
async def send_voice(
self,
chat_id: str,
@@ -1759,8 +1671,9 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(audio_path):
return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
return SendResult(success=False, error=f"Audio file not found: {audio_path}")
with open(audio_path, "rb") as audio_file:
# .ogg files -> send as voice (round playable bubble)
@@ -1807,8 +1720,9 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(image_path):
return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
return SendResult(success=False, error=f"Image file not found: {image_path}")
_thread = self._metadata_thread_id(metadata)
with open(image_path, "rb") as image_file:
@@ -1845,7 +1759,7 @@ class TelegramAdapter(BasePlatformAdapter):
try:
if not os.path.exists(file_path):
return SendResult(success=False, error=self._missing_media_path_error("File", file_path))
return SendResult(success=False, error=f"File not found: {file_path}")
display_name = file_name or os.path.basename(file_path)
_thread = self._metadata_thread_id(metadata)
@@ -1879,7 +1793,7 @@ class TelegramAdapter(BasePlatformAdapter):
try:
if not os.path.exists(video_path):
return SendResult(success=False, error=self._missing_media_path_error("Video", video_path))
return SendResult(success=False, error=f"Video file not found: {video_path}")
_thread = self._metadata_thread_id(metadata)
with open(video_path, "rb") as f:
@@ -2119,7 +2033,7 @@ class TelegramAdapter(BasePlatformAdapter):
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
return _ph(f'[{display}]({url})')
text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
# 4) Convert markdown headers (## Title) → bold *Title*
def _convert_header(m):
@@ -2327,52 +2241,27 @@ class TelegramAdapter(BasePlatformAdapter):
bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower()
bot_id = getattr(self._bot, "id", None)
expected = f"@{bot_username}" if bot_username else None
def _iter_sources():
yield getattr(message, "text", None) or "", getattr(message, "entities", None) or []
yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or []
# Telegram parses mentions server-side and emits MessageEntity objects
# (type=mention for @username, type=text_mention for @FirstName targeting
# a user without a public username). Only those entities are authoritative —
# raw substring matches like "foo@hermes_bot.example" are not mentions
# (bug #12545). Entities also correctly handle @handles inside URLs, code
# blocks, and quoted text, where a regex scan would over-match.
for source_text, entities in _iter_sources():
if bot_username and f"@{bot_username}" in source_text.lower():
return True
for entity in entities:
entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower()
if entity_type == "mention" and expected:
if entity_type == "mention" and bot_username:
offset = int(getattr(entity, "offset", -1))
length = int(getattr(entity, "length", 0))
if offset < 0 or length <= 0:
continue
if source_text[offset:offset + length].strip().lower() == expected:
if source_text[offset:offset + length].strip().lower() == f"@{bot_username}":
return True
elif entity_type == "text_mention":
user = getattr(entity, "user", None)
if user and getattr(user, "id", None) == bot_id:
return True
elif entity_type == "bot_command" and expected:
# Telegram's official group-disambiguation form for slash
# commands (``/cmd@botname``) is emitted as a single
# ``bot_command`` entity covering the whole span — there
# is no accompanying ``mention`` entity. Treat it as a
# direct address to this bot when the ``@botname`` suffix
# matches. This is the form Telegram's own command menu
# autocomplete produces in groups, so dropping it at the
# mention gate would break /new, /reset, /help, ... for
# every group that has ``require_mention`` enabled (#15415).
offset = int(getattr(entity, "offset", -1))
length = int(getattr(entity, "length", 0))
if offset < 0 or length <= 0:
continue
command_text = source_text[offset:offset + length]
at_index = command_text.find("@")
if at_index < 0:
continue
if command_text[at_index:].strip().lower() == expected:
return True
return False
def _message_matches_mention_patterns(self, message: Message) -> bool:
@@ -2399,16 +2288,10 @@ class TelegramAdapter(BasePlatformAdapter):
DMs remain unrestricted. Group/supergroup messages are accepted when:
- the chat is explicitly allowlisted in ``free_response_chats``
- ``require_mention`` is disabled
- the message is a command
- the message replies to the bot
- the bot is @mentioned
- the text/caption matches a configured regex wake-word pattern
When ``require_mention`` is enabled, slash commands are not given
special treatment they must pass the same mention/reply checks
as any other group message. Users can still trigger commands via
the Telegram bot menu (``/command@botname``) or by explicitly
mentioning the bot (``@botname /command``), both of which are
recognised as mentions by :meth:`_message_mentions_bot`.
"""
if not self._is_group_chat(message):
return True
@@ -2423,6 +2306,8 @@ class TelegramAdapter(BasePlatformAdapter):
return True
if not self._telegram_require_mention():
return True
if is_command:
return True
if self._is_reply_to_bot(message):
return True
if self._message_mentions_bot(message):
@@ -2705,23 +2590,6 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as e:
logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)
elif msg.video:
try:
file_obj = await msg.video.get_file()
video_bytes = await file_obj.download_as_bytearray()
ext = ".mp4"
if getattr(file_obj, "file_path", None):
for candidate in SUPPORTED_VIDEO_TYPES:
if file_obj.file_path.lower().endswith(candidate):
ext = candidate
break
cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
logger.info("[Telegram] Cached user video at %s", cached_path)
except Exception as e:
logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
# Download document files to cache for agent processing
elif msg.document:
doc = msg.document
@@ -2738,21 +2606,6 @@ class TelegramAdapter(BasePlatformAdapter):
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
ext = mime_to_ext.get(doc.mime_type, "")
if not ext and doc.mime_type:
video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
ext = video_mime_to_ext.get(doc.mime_type, "")
if ext in SUPPORTED_VIDEO_TYPES:
file_obj = await doc.get_file()
video_bytes = await file_obj.download_as_bytearray()
cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
event.message_type = MessageType.VIDEO
logger.info("[Telegram] Cached user video document at %s", cached_path)
await self.handle_message(event)
return
# Check if supported
if ext not in SUPPORTED_DOCUMENT_TYPES:
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
@@ -2891,11 +2744,13 @@ class TelegramAdapter(BasePlatformAdapter):
logger.info("[Telegram] Analyzing sticker at %s", cached_path)
from tools.vision_tools import vision_analyze_tool
import json as _json
result_json = await vision_analyze_tool(
image_url=cached_path,
user_prompt=STICKER_VISION_PROMPT,
)
result = json.loads(result_json)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "a sticker")
+3 -3
View File
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
_SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
def _resolve_proxy_url(target_hosts=None) -> str | None:
def _resolve_proxy_url() -> str | None:
# Delegate to shared implementation (env vars + macOS system proxy detection)
from gateway.platforms.base import resolve_proxy_url
return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)
return resolve_proxy_url("TELEGRAM_PROXY")
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
proxy_url = _resolve_proxy_url()
if proxy_url and "proxy" not in transport_kwargs:
transport_kwargs["proxy"] = proxy_url
self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
+12 -115
View File
@@ -13,10 +13,6 @@ Each route defines:
- skills: optional list of skills to load for the agent
- deliver: where to send the response (github_comment, telegram, etc.)
- deliver_extra: additional delivery config (repo, pr_number, chat_id)
- deliver_only: if true, skip the agent the rendered prompt IS the
message that gets delivered. Use for external push notifications
(Supabase, monitoring alerts, inter-agent pings) where zero LLM cost
and sub-second delivery matter more than agent reasoning.
Security:
- HMAC secret is required per route (validated at startup)
@@ -126,19 +122,6 @@ class WebhookAdapter(BasePlatformAdapter):
f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'."
)
# deliver_only routes bypass the agent — the POST body becomes a
# direct push notification via the configured delivery target.
# Validate up-front so misconfiguration surfaces at startup rather
# than on the first webhook POST.
if route.get("deliver_only"):
deliver = route.get("deliver", "log")
if not deliver or deliver == "log":
raise ValueError(
f"[webhook] Route '{name}' has deliver_only=true but "
f"deliver is '{deliver}'. Direct delivery requires a "
f"real target (telegram, discord, slack, github_comment, etc.)."
)
app = web.Application()
app.router.add_get("/health", self._handle_health)
app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
@@ -313,14 +296,24 @@ class WebhookAdapter(BasePlatformAdapter):
{"error": "Payload too large"}, status=413
)
# Read body (must be done before any validation)
# ── Rate limiting ────────────────────────────────────────
now = time.time()
window = self._rate_counts.setdefault(route_name, [])
window[:] = [t for t in window if now - t < 60]
if len(window) >= self._rate_limit:
return web.json_response(
{"error": "Rate limit exceeded"}, status=429
)
window.append(now)
# Read body
try:
raw_body = await request.read()
except Exception as e:
logger.error("[webhook] Failed to read body: %s", e)
return web.json_response({"error": "Bad request"}, status=400)
# Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode)
# Validate HMAC signature (skip for INSECURE_NO_AUTH testing mode)
secret = route_config.get("secret", self._global_secret)
if secret and secret != _INSECURE_NO_AUTH:
if not self._validate_signature(request, raw_body, secret):
@@ -331,16 +324,6 @@ class WebhookAdapter(BasePlatformAdapter):
{"error": "Invalid signature"}, status=401
)
# ── Rate limiting (after auth) ───────────────────────────
now = time.time()
window = self._rate_counts.setdefault(route_name, [])
window[:] = [t for t in window if now - t < 60]
if len(window) >= self._rate_limit:
return web.json_response(
{"error": "Rate limit exceeded"}, status=429
)
window.append(now)
# Parse payload
try:
payload = json.loads(raw_body)
@@ -436,64 +419,6 @@ class WebhookAdapter(BasePlatformAdapter):
)
self._seen_deliveries[delivery_id] = now
# ── Direct delivery mode (deliver_only) ─────────────────
# Skip the agent entirely — the rendered prompt IS the message we
# deliver. Use case: external services (Supabase, monitoring,
# cron jobs, other agents) that need to push a plain notification
# to a user's chat with zero LLM cost. Reuses the same HMAC auth,
# rate limiting, idempotency, and template rendering as agent mode.
if route_config.get("deliver_only"):
delivery = {
"deliver": route_config.get("deliver", "log"),
"deliver_extra": self._render_delivery_extra(
route_config.get("deliver_extra", {}), payload
),
"payload": payload,
}
logger.info(
"[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s",
event_type,
route_name,
delivery["deliver"],
len(prompt),
delivery_id,
)
try:
result = await self._direct_deliver(prompt, delivery)
except Exception:
logger.exception(
"[webhook] direct-deliver failed route=%s delivery=%s",
route_name,
delivery_id,
)
return web.json_response(
{"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
status=502,
)
if result.success:
return web.json_response(
{
"status": "delivered",
"route": route_name,
"target": delivery["deliver"],
"delivery_id": delivery_id,
},
status=200,
)
# Delivery attempted but target rejected it — surface as 502
# with a generic error (don't leak adapter-level detail).
logger.warning(
"[webhook] direct-deliver target rejected route=%s target=%s error=%s",
route_name,
delivery["deliver"],
result.error,
)
return web.json_response(
{"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
status=502,
)
# Use delivery_id in session key so concurrent webhooks on the
# same route get independent agent runs (not queued/interrupted).
session_chat_id = f"webhook:{route_name}:{delivery_id}"
@@ -647,34 +572,6 @@ class WebhookAdapter(BasePlatformAdapter):
# Response delivery
# ------------------------------------------------------------------
async def _direct_deliver(
self, content: str, delivery: dict
) -> SendResult:
"""Deliver *content* directly without invoking the agent.
Used by ``deliver_only`` routes: the rendered template becomes the
literal message body, and we dispatch to the same delivery helpers
that the agent-mode ``send()`` flow uses. All target types that
work in agent mode work here Telegram, Discord, Slack, GitHub
PR comments, etc.
"""
deliver_type = delivery.get("deliver", "log")
if deliver_type == "log":
# Shouldn't reach here — startup validation rejects deliver_only
# with deliver=log — but guard defensively.
logger.info("[webhook] direct-deliver log-only: %s", content[:200])
return SendResult(success=True)
if deliver_type == "github_comment":
return await self._deliver_github_comment(content, delivery)
# Fall through to the cross-platform dispatcher, which validates the
# target name and routes via the gateway runner.
return await self._deliver_cross_platform(
deliver_type, content, delivery
)
async def _deliver_github_comment(
self, content: str, delivery: dict
) -> SendResult:
+5 -146
View File
@@ -508,11 +508,6 @@ class WeComAdapter(BasePlatformAdapter):
self._remember_chat_req_id(chat_id, self._payload_req_id(payload))
text, reply_text = self._extract_text(body)
# Strip leading @mention in group chats so slash commands like
# "@BotName /approve" are correctly recognized as "/approve".
# Mirrors what the Telegram adapter does (re.sub @botname).
if is_group and text:
text = re.sub(r"^@\S+\s*", "", text).strip()
media_urls, media_types = await self._extract_media(body)
message_type = self._derive_message_type(body, text, media_types)
has_reply_context = bool(reply_text and (text or media_urls))
@@ -629,16 +624,13 @@ class WeComAdapter(BasePlatformAdapter):
msgtype = str(body.get("msgtype") or "").lower()
if msgtype == "mixed":
_raw_mixed = body.get("mixed")
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
_raw_items = mixed.get("msg_item")
items = _raw_items if isinstance(_raw_items, list) else []
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
for item in items:
if not isinstance(item, dict):
continue
if str(item.get("msgtype") or "").lower() == "text":
_raw_text = item.get("text")
text_block = _raw_text if isinstance(_raw_text, dict) else {}
text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
content = str(text_block.get("content") or "").strip()
if content:
text_parts.append(content)
@@ -680,10 +672,8 @@ class WeComAdapter(BasePlatformAdapter):
msgtype = str(body.get("msgtype") or "").lower()
if msgtype == "mixed":
_raw_mixed = body.get("mixed")
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
_raw_items = mixed.get("msg_item")
items = _raw_items if isinstance(_raw_items, list) else []
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
for item in items:
if not isinstance(item, dict):
continue
@@ -1469,134 +1459,3 @@ class WeComAdapter(BasePlatformAdapter):
"name": chat_id,
"type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
}
# ------------------------------------------------------------------
# QR code scan flow for obtaining bot credentials
# ------------------------------------------------------------------
_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
_QR_POLL_INTERVAL = 3 # seconds
_QR_POLL_TIMEOUT = 300 # 5 minutes
def qr_scan_for_bot_info(
*,
timeout_seconds: int = _QR_POLL_TIMEOUT,
) -> Optional[Dict[str, str]]:
"""Run the WeCom QR scan flow to obtain bot_id and secret.
Fetches a QR code from WeCom, renders it in the terminal, and polls
until the user scans it or the timeout expires.
Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
failure or timeout.
Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
used here are not part of WeCom's public developer API — they back the
admin-console web UI's bot-creation flow and may change without notice.
The same pattern is used by the feishu/dingtalk QR setup wizards.
"""
try:
import urllib.request
import urllib.parse
except ImportError: # pragma: no cover
logger.error("urllib is required for WeCom QR scan")
return None
generate_url = f"{_QR_GENERATE_URL}?source=hermes"
# ── Step 1: Fetch QR code ──
print(" Connecting to WeCom...", end="", flush=True)
try:
req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
with urllib.request.urlopen(req, timeout=15) as resp:
raw = json.loads(resp.read().decode("utf-8"))
except Exception as exc:
logger.error("WeCom QR: failed to fetch QR code: %s", exc)
print(f" failed: {exc}")
return None
data = raw.get("data") or {}
scode = str(data.get("scode") or "").strip()
auth_url = str(data.get("auth_url") or "").strip()
if not scode or not auth_url:
logger.error("WeCom QR: unexpected response format: %s", raw)
print(" failed: unexpected response format")
return None
print(" done.")
# ── Step 2: Render QR code in terminal ──
print()
qr_rendered = False
try:
import qrcode as _qrcode
qr = _qrcode.QRCode()
qr.add_data(auth_url)
qr.make(fit=True)
qr.print_ascii(invert=True)
qr_rendered = True
except ImportError:
pass
except Exception:
pass
page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
if qr_rendered:
print(f"\n Scan the QR code above, or open this URL directly:\n {page_url}")
else:
print(f" Open this URL in WeCom on your phone:\n\n {page_url}\n")
print(" Tip: pip install qrcode to display a scannable QR code here next time")
print()
print(" Fetching configuration results...", end="", flush=True)
# ── Step 3: Poll for result ──
import time
deadline = time.time() + timeout_seconds
query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
poll_count = 0
while time.time() < deadline:
try:
req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
with urllib.request.urlopen(req, timeout=10) as resp:
result = json.loads(resp.read().decode("utf-8"))
except Exception as exc:
logger.debug("WeCom QR poll error: %s", exc)
time.sleep(_QR_POLL_INTERVAL)
continue
poll_count += 1
# Print a dot on every poll so progress is visible within 3s.
print(".", end="", flush=True)
result_data = result.get("data") or {}
status = str(result_data.get("status") or "").lower()
if status == "success":
print() # newline after "Fetching configuration results..." dots
bot_info = result_data.get("bot_info") or {}
bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
secret = str(bot_info.get("secret") or "").strip()
if bot_id and secret:
return {"bot_id": bot_id, "secret": secret}
logger.warning(
"WeCom QR: scan reported success but bot_info missing or incomplete: %s",
result_data,
)
print(
" QR scan reported success but no bot credentials were returned.\n"
" This usually means the bot was not actually created on the WeCom side.\n"
" Falling back to manual credential entry."
)
return None
time.sleep(_QR_POLL_INTERVAL)
print() # newline after dots
print(f" QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
return None
+34 -119
View File
@@ -66,37 +66,6 @@ def _kill_port_process(port: int) -> None:
except Exception:
pass
def _terminate_bridge_process(proc, *, force: bool = False) -> None:
"""Terminate the bridge process using process-tree semantics where possible."""
if _IS_WINDOWS:
cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
if force:
cmd.append("/F")
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=10,
)
except FileNotFoundError:
if force:
proc.kill()
else:
proc.terminate()
return
if result.returncode != 0:
details = (result.stderr or result.stdout or "").strip()
raise OSError(details or f"taskkill failed for PID {proc.pid}")
return
import signal
sig = signal.SIGTERM if not force else signal.SIGKILL
os.killpg(os.getpgid(proc.pid), sig)
import sys
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
@@ -149,10 +118,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
- bridge_script: Path to the Node.js bridge script
- bridge_port: Port for HTTP communication (default: 3000)
- session_path: Path to store WhatsApp session data
- dm_policy: "open" | "allowlist" | "disabled" how DMs are handled (default: "open")
- allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
- group_policy: "open" | "allowlist" | "disabled" which groups are processed (default: "open")
- group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
"""
# WhatsApp message limits — practical UX limit, not protocol max.
@@ -175,10 +140,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
))
self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
self._mention_patterns = self._compile_mention_patterns()
self._message_queue: asyncio.Queue = asyncio.Queue()
self._bridge_log_fh = None
@@ -202,33 +163,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
return {str(part).strip() for part in raw if str(part).strip()}
return {part.strip() for part in str(raw).split(",") if part.strip()}
@staticmethod
def _coerce_allow_list(raw) -> set[str]:
"""Parse allow_from / group_allow_from from config or env var."""
if raw is None:
return set()
if isinstance(raw, list):
return {str(part).strip() for part in raw if str(part).strip()}
return {part.strip() for part in str(raw).split(",") if part.strip()}
def _is_dm_allowed(self, sender_id: str) -> bool:
"""Check whether a DM from the given sender should be processed."""
if self._dm_policy == "disabled":
return False
if self._dm_policy == "allowlist":
return sender_id in self._allow_from
# "open" — all DMs allowed
return True
def _is_group_allowed(self, chat_id: str) -> bool:
"""Check whether a group chat should be processed."""
if self._group_policy == "disabled":
return False
if self._group_policy == "allowlist":
return chat_id in self._group_allow_from
# "open" — all groups allowed
return True
def _compile_mention_patterns(self):
patterns = self.config.extra.get("mention_patterns")
if patterns is None:
@@ -321,18 +255,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
return cleaned.strip() or text
def _should_process_message(self, data: Dict[str, Any]) -> bool:
is_group = data.get("isGroup", False)
if is_group:
chat_id = str(data.get("chatId") or "")
if not self._is_group_allowed(chat_id):
return False
else:
sender_id = str(data.get("senderId") or data.get("from") or "")
if not self._is_dm_allowed(sender_id):
return False
# DMs that pass the policy gate are always processed
if not data.get("isGroup"):
return True
# Group messages: check mention / free-response settings
chat_id = str(data.get("chatId") or "")
if chat_id in self._whatsapp_free_response_chats():
return True
@@ -365,40 +289,39 @@ class WhatsAppAdapter(BasePlatformAdapter):
logger.info("[%s] Bridge found at %s", self.name, bridge_path)
# Acquire scoped lock to prevent duplicate sessions
lock_acquired = False
try:
if not self._acquire_platform_lock('whatsapp-session', str(self._session_path), 'WhatsApp session'):
return False
lock_acquired = True
except Exception as e:
logger.warning("[%s] Could not acquire session lock (non-fatal): %s", self.name, e)
try:
# Auto-install npm dependencies if node_modules doesn't exist
bridge_dir = bridge_path.parent
if not (bridge_dir / "node_modules").exists():
print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
try:
install_result = subprocess.run(
["npm", "install", "--silent"],
cwd=str(bridge_dir),
capture_output=True,
text=True,
timeout=60,
)
if install_result.returncode != 0:
print(f"[{self.name}] npm install failed: {install_result.stderr}")
return False
print(f"[{self.name}] Dependencies installed")
except Exception as e:
print(f"[{self.name}] Failed to install dependencies: {e}")
# Auto-install npm dependencies if node_modules doesn't exist
bridge_dir = bridge_path.parent
if not (bridge_dir / "node_modules").exists():
print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
try:
install_result = subprocess.run(
["npm", "install", "--silent"],
cwd=str(bridge_dir),
capture_output=True,
text=True,
timeout=60,
)
if install_result.returncode != 0:
print(f"[{self.name}] npm install failed: {install_result.stderr}")
return False
print(f"[{self.name}] Dependencies installed")
except Exception as e:
print(f"[{self.name}] Failed to install dependencies: {e}")
return False
try:
# Ensure session directory exists
self._session_path.mkdir(parents=True, exist_ok=True)
# Check if bridge is already running and connected
import aiohttp
import asyncio
try:
async with aiohttp.ClientSession() as session:
async with session.get(
@@ -529,13 +452,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
return True
except Exception as e:
self._release_platform_lock()
logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True)
self._close_bridge_log()
return False
finally:
if not self._running:
if lock_acquired:
self._release_platform_lock()
self._close_bridge_log()
def _close_bridge_log(self) -> None:
"""Close the bridge log file handle if open."""
@@ -567,14 +487,22 @@ class WhatsAppAdapter(BasePlatformAdapter):
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
if self._bridge_process:
try:
# Kill the entire process group so child node processes die too
import signal
try:
_terminate_bridge_process(self._bridge_process, force=False)
if _IS_WINDOWS:
self._bridge_process.terminate()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
except (ProcessLookupError, PermissionError):
self._bridge_process.terminate()
await asyncio.sleep(1)
if self._bridge_process.poll() is None:
try:
_terminate_bridge_process(self._bridge_process, force=True)
if _IS_WINDOWS:
self._bridge_process.kill()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
except (ProcessLookupError, PermissionError):
self._bridge_process.kill()
except Exception as e:
@@ -727,8 +655,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""Edit a previously sent message via the WhatsApp bridge."""
if not self._running or not self._http_session:
@@ -840,17 +766,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
"""Send a video natively via bridge — plays inline in WhatsApp."""
return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
async def send_voice(
self,
chat_id: str,
audio_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send an audio file as a WhatsApp voice message via bridge."""
return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption)
async def send_document(
self,
chat_id: str,
File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More