Compare commits

..

1 Commits

Author SHA1 Message Date
Shannon Sands bad9fe2452 add generic gateway startup readiness checks 2026-04-15 10:03:23 +10:00
1119 changed files with 20250 additions and 176065 deletions
-13
View File
@@ -24,15 +24,6 @@
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
# =============================================================================
# LLM PROVIDER (Ollama Cloud)
# =============================================================================
# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
# Get your key at: https://ollama.com/settings
# OLLAMA_API_KEY=your_ollama_key_here
# Optional base URL override (default: https://ollama.com/v1)
# OLLAMA_BASE_URL=https://ollama.com/v1
# =============================================================================
# LLM PROVIDER (z.ai / GLM)
# =============================================================================
@@ -154,10 +145,6 @@
# Only override here if you need to force a backend without touching config.yaml:
# TERMINAL_ENV=local
# Override the container runtime binary (e.g. to use Podman instead of Docker).
# Useful on systems where Docker's storage driver is broken or unavailable.
# HERMES_DOCKER_BINARY=/usr/local/bin/podman
# Container images (for singularity/docker/modal backends)
# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
# TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
-4
View File
@@ -1,5 +1 @@
watch_file pyproject.toml uv.lock
watch_file ui-tui/package-lock.json ui-tui/package.json
watch_file flake.nix flake.lock nix/devShell.nix nix/tui.nix nix/package.nix nix/python.nix
use flake
-8
View File
@@ -1,8 +0,0 @@
name: 'Setup Nix'
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
runs:
using: composite
steps:
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
+8 -10
View File
@@ -1,12 +1,11 @@
name: Deploy Site
on:
release:
types: [published]
push:
branches: [main]
paths:
- 'website/**'
- 'landingpage/**'
- 'skills/**'
- 'optional-skills/**'
- '.github/workflows/deploy-site.yml'
@@ -21,14 +20,8 @@ concurrency:
cancel-in-progress: false
jobs:
deploy-vercel:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- name: Trigger Vercel Deploy
run: curl -X POST "${{ secrets.VERCEL_DEPLOY_HOOK }}"
deploy-docs:
build-and-deploy:
# Only run on the upstream repository, not on forks
if: github.repository == 'NousResearch/hermes-agent'
runs-on: ubuntu-latest
environment:
@@ -72,7 +65,12 @@ jobs:
- name: Stage deployment
run: |
mkdir -p _site/docs
# Landing page at root
cp -r landingpage/* _site/
# Docusaurus at /docs/
cp -r website/build/* _site/docs/
# CNAME so GitHub Pages keeps the custom domain between deploys
echo "hermes-agent.nousresearch.com" > _site/CNAME
- name: Upload artifact
uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3
+2 -15
View File
@@ -3,13 +3,8 @@ name: Docker Build and Publish
on:
push:
branches: [main]
paths:
- '**/*.py'
- 'pyproject.toml'
- 'uv.lock'
- 'Dockerfile'
- 'docker/**'
- '.github/workflows/docker-publish.yml'
pull_request:
branches: [main]
release:
types: [published]
@@ -54,14 +49,6 @@ jobs:
- name: Test image starts
run: |
# The image runs as the hermes user (UID 10000). GitHub Actions
# creates /tmp/hermes-test root-owned by default, which hermes
# can't write to — chown it to match the in-container UID before
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
# with their own UID hit the same issue and have their own
# remediations (HERMES_UID env var, or chown locally).
mkdir -p /tmp/hermes-test
sudo chown -R 10000:10000 /tmp/hermes-test
docker run --rm \
-v /tmp/hermes-test:/opt/data \
--entrypoint /opt/hermes/docker/entrypoint.sh \
-68
View File
@@ -1,68 +0,0 @@
name: Nix Lockfile Check
on:
pull_request:
workflow_dispatch:
permissions:
contents: read
pull-requests: write
concurrency:
group: nix-lockfile-check-${{ github.ref }}
cancel-in-progress: true
jobs:
check:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: ./.github/actions/nix-setup
- name: Resolve head SHA
id: sha
shell: bash
run: |
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
echo "full=$FULL" >> "$GITHUB_OUTPUT"
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
- name: Check lockfile hashes
id: check
continue-on-error: true
env:
LINK_SHA: ${{ steps.sha.outputs.full }}
run: nix run .#fix-lockfiles -- --check
- name: Post sticky PR comment (stale)
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
message: |
### ⚠️ npm lockfile hash out of date
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
${{ steps.check.outputs.report }}
#### Apply the fix
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
- name: Clear sticky PR comment (resolved)
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
delete: true
- name: Fail if stale
if: steps.check.outputs.stale == 'true'
run: exit 1
-149
View File
@@ -1,149 +0,0 @@
name: Nix Lockfile Fix
on:
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to fix (leave empty to run on the selected branch)'
required: false
type: string
issue_comment:
types: [edited]
permissions:
contents: write
pull-requests: write
concurrency:
group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
cancel-in-progress: false
jobs:
fix:
# Run on manual dispatch OR when a task-list checkbox in the sticky
# lockfile-check comment flips from `[ ]` to `[x]`.
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'issue_comment'
&& github.event.issue.pull_request != null
&& contains(github.event.comment.body, '[x] **Apply lockfile fix**')
&& !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
runs-on: ubuntu-latest
timeout-minutes: 25
steps:
- name: Authorize & resolve PR
id: resolve
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
// 1. Verify the actor has write access — applies to both checkbox
// clicks and manual dispatch.
const { data: perm } =
await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: context.actor,
});
if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
core.setFailed(
`${context.actor} lacks write access (has: ${perm.permission})`
);
return;
}
// 2. Resolve which ref to check out.
let prNumber = '';
if (context.eventName === 'issue_comment') {
prNumber = String(context.payload.issue.number);
} else if (context.eventName === 'workflow_dispatch') {
prNumber = context.payload.inputs.pr_number || '';
}
if (!prNumber) {
core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
core.setOutput('repo', context.repo.repo);
core.setOutput('owner', context.repo.owner);
core.setOutput('pr', '');
return;
}
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: Number(prNumber),
});
core.setOutput('ref', pr.head.ref);
core.setOutput('repo', pr.head.repo.name);
core.setOutput('owner', pr.head.repo.owner.login);
core.setOutput('pr', String(pr.number));
# Wipe the sticky lockfile-check comment to a "running" state as soon
# as the job is authorized, so the user sees their click was picked up
# before the ~minute of nix build work.
- name: Mark sticky as running
if: steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### 🔄 Applying lockfile fix…
Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
with:
repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
ref: ${{ steps.resolve.outputs.ref }}
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0
- uses: ./.github/actions/nix-setup
- name: Apply lockfile hashes
id: apply
run: nix run .#fix-lockfiles -- --apply
- name: Commit & push
if: steps.apply.outputs.changed == 'true'
shell: bash
run: |
set -euo pipefail
git config user.name 'github-actions[bot]'
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
git add nix/tui.nix nix/web.nix
git commit -m "fix(nix): refresh npm lockfile hashes"
git push
- name: Update sticky (applied)
if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ✅ Lockfile fix applied
Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- name: Update sticky (already current)
if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ✅ Lockfile hashes already current
Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
- name: Update sticky (failed)
if: failure() && steps.resolve.outputs.pr != ''
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
with:
header: nix-lockfile-check
number: ${{ steps.resolve.outputs.pr }}
message: |
### ❌ Lockfile fix failed
See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
+12 -2
View File
@@ -4,6 +4,15 @@ on:
push:
branches: [main]
pull_request:
paths:
- 'flake.nix'
- 'flake.lock'
- 'nix/**'
- 'pyproject.toml'
- 'uv.lock'
- 'hermes_cli/**'
- 'run_agent.py'
- 'acp_adapter/**'
permissions:
contents: read
@@ -20,8 +29,9 @@ jobs:
runs-on: ${{ matrix.os }}
timeout-minutes: 30
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: ./.github/actions/nix-setup
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
- name: Check flake
if: runner.os == 'Linux'
run: nix flake check --print-build-logs
+146 -37
View File
@@ -3,31 +3,14 @@ name: Supply Chain Audit
on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- '**/*.py'
- '**/*.pth'
- '**/setup.py'
- '**/setup.cfg'
- '**/sitecustomize.py'
- '**/usercustomize.py'
- '**/__init__.pth'
permissions:
pull-requests: write
contents: read
# Narrow, high-signal scanner. Only fires on critical indicators of supply
# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
# Actions version unpinning, outbound POST/PUT) were intentionally
# removed — they fired on nearly every PR and trained reviewers to ignore
# the scanner. Keep this file's checks ruthlessly narrow: if you find
# yourself adding WARNING-tier patterns here again, make a separate
# advisory-only workflow instead.
jobs:
scan:
name: Scan PR for critical supply chain risks
name: Scan PR for supply chain risks
runs-on: ubuntu-latest
steps:
- name: Checkout
@@ -35,7 +18,7 @@ jobs:
with:
fetch-depth: 0
- name: Scan diff for critical patterns
- name: Scan diff for suspicious patterns
id: scan
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -45,19 +28,19 @@ jobs:
BASE="${{ github.event.pull_request.base.sha }}"
HEAD="${{ github.event.pull_request.head.sha }}"
# Added lines only, excluding lockfiles.
# Get the full diff (added lines only)
DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
FINDINGS=""
CRITICAL=false
# --- .pth files (auto-execute on Python startup) ---
# The exact mechanism used in the litellm supply chain attack:
# https://github.com/BerriAI/litellm/issues/24512
PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
if [ -n "$PTH_FILES" ]; then
CRITICAL=true
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: .pth file added or modified
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required.
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512).
**Files:**
\`\`\`
@@ -66,12 +49,13 @@ jobs:
"
fi
# --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
# --- base64 + exec/eval combo (the litellm attack pattern) ---
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
if [ -n "$B64_EXEC_HITS" ]; then
CRITICAL=true
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: base64 decode + exec/eval combo
Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads.
This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads.
**Matches:**
\`\`\`
@@ -80,12 +64,41 @@ jobs:
"
fi
# --- subprocess with encoded/obfuscated command argument ---
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
# --- base64 decode/encode (alone — legitimate uses exist) ---
B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true)
if [ -n "$B64_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: base64 encoding/decoding detected
Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate.
**Matches (first 20):**
\`\`\`
${B64_HITS}
\`\`\`
"
fi
# --- exec/eval with string arguments ---
EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true)
if [ -n "$EXEC_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: exec() or eval() usage
Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches.
**Matches (first 20):**
\`\`\`
${EXEC_HITS}
\`\`\`
"
fi
# --- subprocess with encoded/obfuscated commands ---
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true)
if [ -n "$PROC_HITS" ]; then
CRITICAL=true
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: subprocess with encoded/obfuscated command
Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution.
Subprocess calls with encoded arguments are a strong indicator of payload execution.
**Matches:**
\`\`\`
@@ -94,12 +107,25 @@ jobs:
"
fi
# --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
# These execute during pip install or interpreter startup.
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
# --- Network calls to non-standard domains ---
EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true)
if [ -n "$EXFIL_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: Outbound network calls (POST/PUT)
Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate.
**Matches (first 10):**
\`\`\`
${EXFIL_HITS}
\`\`\`
"
fi
# --- setup.py / setup.cfg install hooks ---
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true)
if [ -n "$SETUP_HITS" ]; then
FINDINGS="${FINDINGS}
### 🚨 CRITICAL: Install-hook file added or modified
### ⚠️ WARNING: Install hook files modified
These files can execute code during package installation or interpreter startup.
**Files:**
@@ -109,31 +135,114 @@ jobs:
"
fi
# --- Compile/marshal/pickle (code object injection) ---
MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true)
if [ -n "$MARSHAL_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: marshal/pickle/compile usage
These can deserialize or construct executable code objects.
**Matches:**
\`\`\`
${MARSHAL_HITS}
\`\`\`
"
fi
# --- CI/CD workflow files modified ---
WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true)
if [ -n "$WORKFLOW_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: CI/CD workflow files modified
Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added.
**Files:**
\`\`\`
${WORKFLOW_HITS}
\`\`\`
"
fi
# --- Dockerfile / container build files modified ---
DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true)
if [ -n "$DOCKER_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: Container build files modified
Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands.
**Files:**
\`\`\`
${DOCKER_HITS}
\`\`\`
"
fi
# --- Dependency manifest files modified ---
DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true)
if [ -n "$DEP_HITS" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: Dependency manifest files modified
Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources.
**Files:**
\`\`\`
${DEP_HITS}
\`\`\`
"
fi
# --- GitHub Actions version unpinning (mutable tags instead of SHAs) ---
ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true)
if [ -n "$ACTIONS_UNPIN" ]; then
FINDINGS="${FINDINGS}
### ⚠️ WARNING: GitHub Actions with mutable version tags
Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised.
**Matches:**
\`\`\`
${ACTIONS_UNPIN}
\`\`\`
"
fi
# --- Output results ---
if [ -n "$FINDINGS" ]; then
echo "found=true" >> "$GITHUB_OUTPUT"
if [ "$CRITICAL" = true ]; then
echo "critical=true" >> "$GITHUB_OUTPUT"
else
echo "critical=false" >> "$GITHUB_OUTPUT"
fi
# Write findings to a file (multiline env vars are fragile)
echo "$FINDINGS" > /tmp/findings.md
else
echo "found=false" >> "$GITHUB_OUTPUT"
echo "critical=false" >> "$GITHUB_OUTPUT"
fi
- name: Post critical finding comment
- name: Post warning comment
if: steps.scan.outputs.found == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
BODY="## 🚨 CRITICAL Supply Chain Risk Detected
SEVERITY="⚠️ Supply Chain Risk Detected"
if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
SEVERITY="🚨 CRITICAL Supply Chain Risk Detected"
fi
This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging.
BODY="## ${SEVERITY}
This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging.
$(cat /tmp/findings.md)
---
*Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*"
*Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"
- name: Fail on critical findings
if: steps.scan.outputs.found == 'true'
if: steps.scan.outputs.critical == 'true'
run: |
echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
exit 1
+1 -7
View File
@@ -3,14 +3,8 @@ name: Tests
on:
push:
branches: [main]
paths-ignore:
- '**/*.md'
- 'docs/**'
pull_request:
branches: [main]
paths-ignore:
- '**/*.md'
- 'docs/**'
permissions:
contents: read
@@ -23,7 +17,7 @@ concurrency:
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 20
timeout-minutes: 10
steps:
- name: Checkout code
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-6
View File
@@ -54,17 +54,11 @@ environments/benchmarks/evals/
# Web UI build output
hermes_cli/web_dist/
# Web UI assets — synced from @nous-research/ui at build time via
# `npm run sync-assets` (see web/package.json).
web/public/fonts/
web/public/ds-assets/
# Release script temp files
.release_notes.md
mini-swe-agent/
# Nix
.direnv/
.nix-stamps/
result
website/static/api/skills-index.json
-1
View File
@@ -105,4 +105,3 @@ tesseracttars-creator <tesseracttars@gmail.com> <tesseracttars@gmail.com>
xinbenlv <zzn+pa@zzn.im> <zzn+pa@zzn.im>
SaulJWu <saul.jj.wu@gmail.com> <saul.jj.wu@gmail.com>
angelos <angelos@oikos.lan.home.malaiwah.com> <angelos@oikos.lan.home.malaiwah.com>
MestreY0d4-Uninter <241404605+MestreY0d4-Uninter@users.noreply.github.com> <MestreY0d4-Uninter@users.noreply.github.com>
+9 -156
View File
@@ -13,7 +13,7 @@ source venv/bin/activate # ALWAYS activate before running Python
```
hermes-agent/
├── run_agent.py # AIAgent class — core conversation loop
├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call()
├── model_tools.py # Tool orchestration, _discover_tools(), handle_function_call()
├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list
├── cli.py # HermesCLI class — interactive CLI orchestrator
├── hermes_state.py # SessionDB — SQLite session store (FTS5 search)
@@ -56,19 +56,6 @@ hermes-agent/
│ ├── run.py # Main loop, slash commands, message dispatch
│ ├── session.py # SessionStore — conversation persistence
│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
├── ui-tui/ # Ink (React) terminal UI — `hermes --tui`
│ ├── src/entry.tsx # TTY gate + render()
│ ├── src/app.tsx # Main state machine and UI
│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks)
│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.)
│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory
│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages)
├── tui_gateway/ # Python JSON-RPC backend for the TUI
│ ├── entry.py # stdio entrypoint
│ ├── server.py # RPC handlers and session logic
│ ├── render.py # Optional rich/ANSI bridge
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
├── cron/ # Scheduler (jobs.py, scheduler.py)
├── environments/ # RL training environments (Atropos)
@@ -192,62 +179,9 @@ if canonical == "mycommand":
---
## TUI Architecture (ui-tui + tui_gateway)
The TUI is a full replacement for the classic (prompt_toolkit) CLI, activated via `hermes --tui` or `HERMES_TUI=1`.
### Process Model
```
hermes --tui
└─ Node (Ink) ──stdio JSON-RPC── Python (tui_gateway)
│ └─ AIAgent + tools + sessions
└─ renders transcript, composer, prompts, activity
```
TypeScript owns the screen. Python owns sessions, tools, model calls, and slash command logic.
### Transport
Newline-delimited JSON-RPC over stdio. Requests from Ink, events from Python. See `tui_gateway/server.py` for the full method/event catalog.
### Key Surfaces
| Surface | Ink component | Gateway method |
|---------|---------------|----------------|
| Chat streaming | `app.tsx` + `messageLine.tsx` | `prompt.submit``message.delta/complete` |
| Tool activity | `thinking.tsx` | `tool.start/progress/complete` |
| Approvals | `prompts.tsx` | `approval.respond``approval.request` |
| Clarify/sudo/secret | `prompts.tsx`, `maskedPrompt.tsx` | `clarify/sudo/secret.respond` |
| Session picker | `sessionPicker.tsx` | `session.list/resume` |
| Slash commands | Local handler + fallthrough | `slash.exec``_SlashWorker`, `command.dispatch` |
| Completions | `useCompletion` hook | `complete.slash`, `complete.path` |
| Theming | `theme.ts` + `branding.tsx` | `gateway.ready` with skin data |
### Slash Command Flow
1. Built-in client commands (`/help`, `/quit`, `/clear`, `/resume`, `/copy`, `/paste`, etc.) handled locally in `app.tsx`
2. Everything else → `slash.exec` (runs in persistent `_SlashWorker` subprocess) → `command.dispatch` fallback
### Dev Commands
```bash
cd ui-tui
npm install # first time
npm run dev # watch mode (rebuilds hermes-ink + tsx --watch)
npm start # production
npm run build # full build (hermes-ink + tsc)
npm run type-check # typecheck only (tsc --noEmit)
npm run lint # eslint
npm run fmt # prettier
npm test # vitest
```
---
## Adding New Tools
Requires changes in **2 files**:
Requires changes in **3 files**:
**1. Create `tools/your_tool.py`:**
```python
@@ -270,9 +204,9 @@ registry.register(
)
```
**2. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
**2. Add import** in `model_tools.py` `_discover_tools()` list.
Auto-discovery: any `tools/*.py` file with a top-level `registry.register()` call is imported automatically — no manual import list to maintain.
**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset.
The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string.
@@ -524,94 +458,13 @@ def profile_env(tmp_path, monkeypatch):
## Testing
**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
developer machine with API keys set diverges from CI in ways that have caused
multiple "works locally, fails in CI" incidents (and the reverse).
```bash
scripts/run_tests.sh # full suite, CI-parity
scripts/run_tests.sh tests/gateway/ # one directory
scripts/run_tests.sh tests/agent/test_foo.py::test_x # one test
scripts/run_tests.sh -v --tb=long # pass-through pytest flags
```
### Why the wrapper (and why the old "just call pytest" doesn't work)
Five real sources of local-vs-CI drift the script closes:
| | Without wrapper | With wrapper |
|---|---|---|
| Provider API keys | Whatever is in your env (auto-detects pool) | All `*_API_KEY`/`*_TOKEN`/etc. unset |
| HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
| Timezone | Local TZ (PDT etc.) | UTC |
| Locale | Whatever is set | C.UTF-8 |
| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
invocation (including IDE integrations) gets hermetic behavior — but the wrapper
is belt-and-suspenders.
### Running without the wrapper (only if you must)
If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
pytest directly), at minimum activate the venv and pass `-n 4`:
```bash
source venv/bin/activate
python -m pytest tests/ -q -n 4
python -m pytest tests/ -q # Full suite (~3000 tests, ~3 min)
python -m pytest tests/test_model_tools.py -q # Toolset resolution
python -m pytest tests/test_cli_init.py -q # CLI config loading
python -m pytest tests/gateway/ -q # Gateway tests
python -m pytest tests/tools/ -q # Tool-level tests
```
Worker count above 4 will surface test-ordering flakes that CI never sees.
Always run the full suite before pushing changes.
### Don't write change-detector tests
A test is a **change-detector** if it fails whenever data that is **expected
to change** gets updated — model catalogs, config version numbers,
enumeration counts, hardcoded lists of provider models. These tests add no
behavioral coverage; they just guarantee that routine source updates break
CI and cost engineering time to "fix."
**Do not write:**
```python
# catalog snapshot — breaks every model release
assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
assert "MiniMax-M2.7" in models
# config version literal — breaks every schema bump
assert DEFAULT_CONFIG["_config_version"] == 21
# enumeration count — breaks every time a skill/provider is added
assert len(_PROVIDER_MODELS["huggingface"]) == 8
```
**Do write:**
```python
# behavior: does the catalog plumbing work at all?
assert "gemini" in _PROVIDER_MODELS
assert len(_PROVIDER_MODELS["gemini"]) >= 1
# behavior: does migration bump the user's version to current latest?
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
# invariant: no plan-only model leaks into the legacy list
assert not (set(moonshot_models) & coding_plan_only_models)
# invariant: every model in the catalog has a context-length entry
for m in _PROVIDER_MODELS["huggingface"]:
assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
```
The rule: if the test reads like a snapshot of current data, delete it. If
it reads like a contract about how two pieces of data must relate, keep it.
When a PR adds a new provider/model and you want a test, make the test
assert the relationship (e.g. "catalog entries all have context lengths"),
not the specific names.
Reviewers should reject new change-detector tests; authors should convert
them into invariants before re-requesting review.
+10 -18
View File
@@ -21,34 +21,26 @@ RUN useradd -u 10000 -m -d /opt/data hermes
COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/
COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/
COPY . /opt/hermes
WORKDIR /opt/hermes
# ---------- Layer-cached dependency install ----------
# Copy only package manifests first so npm install + Playwright are cached
# unless the lockfiles themselves change.
COPY package.json package-lock.json ./
COPY web/package.json web/package-lock.json web/
# Install Node dependencies and Playwright as root (--with-deps needs apt)
RUN npm install --prefer-offline --no-audit && \
npx playwright install --with-deps chromium --only-shell && \
(cd web && npm install --prefer-offline --no-audit) && \
cd /opt/hermes/scripts/whatsapp-bridge && \
npm install --prefer-offline --no-audit && \
npm cache clean --force
# ---------- Source code ----------
# .dockerignore excludes node_modules, so the installs above survive.
COPY --chown=hermes:hermes . .
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
RUN cd web && npm run build
# ---------- Python virtualenv ----------
RUN chown hermes:hermes /opt/hermes
# Hand ownership to hermes user, then install Python deps in a virtualenv
RUN chown -R hermes:hermes /opt/hermes
USER hermes
RUN uv venv && \
uv pip install --no-cache-dir -e ".[all]"
# ---------- Runtime ----------
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
USER root
RUN chmod +x /opt/hermes/docker/entrypoint.sh
ENV HERMES_HOME=/opt/data
VOLUME [ "/opt/data" ]
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
+2 -9
View File
@@ -13,7 +13,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
<table>
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@@ -141,18 +141,11 @@ See `hermes claw migrate --help` for all options, or use the `openclaw-migration
We welcome contributions! See the [Contributing Guide](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) for development setup, code style, and PR process.
Quick start for contributors — clone and go with `setup-hermes.sh`:
Quick start for contributors:
```bash
git clone https://github.com/NousResearch/hermes-agent.git
cd hermes-agent
./setup-hermes.sh # installs uv, creates venv, installs .[all], symlinks ~/.local/bin/hermes
./hermes # auto-detects the venv, no need to `source` first
```
Manual path (equivalent to the above):
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
uv venv venv --python 3.11
source venv/bin/activate
-27
View File
@@ -1,27 +0,0 @@
# Hermes Agent v0.10.0 (v2026.4.16)
**Release Date:** April 16, 2026
> The Tool Gateway release — paid Nous Portal subscribers can now use web search, image generation, text-to-speech, and browser automation through their existing subscription with zero additional API keys.
---
## ✨ Highlights
- **Nous Tool Gateway** — Paid [Nous Portal](https://portal.nousresearch.com) subscribers now get automatic access to **web search** (Firecrawl), **image generation** (FAL / FLUX 2 Pro), **text-to-speech** (OpenAI TTS), and **browser automation** (Browser Use) through their existing subscription. No separate API keys needed — just run `hermes model`, select Nous Portal, and pick which tools to enable. Per-tool opt-in via `use_gateway` config, full integration with `hermes tools` and `hermes status`, and the runtime correctly prefers the gateway even when direct API keys exist. Replaces the old hidden `HERMES_ENABLE_NOUS_MANAGED_TOOLS` env var with clean subscription-based detection. ([#11206](https://github.com/NousResearch/hermes-agent/pull/11206), based on work by @jquesnelle; docs: [#11208](https://github.com/NousResearch/hermes-agent/pull/11208))
---
## 🐛 Bug Fixes & Improvements
This release includes 180+ commits with numerous bug fixes, platform improvements, and reliability enhancements across the agent core, gateway, CLI, and tool system. Full details will be published in the v0.11.0 changelog.
---
## 👥 Contributors
- **@jquesnelle** (emozilla) — Original Tool Gateway implementation ([#10799](https://github.com/NousResearch/hermes-agent/pull/10799)), salvaged and shipped in this release
---
**Full Changelog**: [v2026.4.13...v2026.4.16](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.16)
-84
View File
@@ -1,84 +0,0 @@
# Hermes Agent Security Policy
This document outlines the security protocols, trust model, and deployment hardening guidelines for the **Hermes Agent** project.
## 1. Vulnerability Reporting
Hermes Agent does **not** operate a bug bounty program. Security issues should be reported via [GitHub Security Advisories (GHSA)](https://github.com/NousResearch/hermes-agent/security/advisories/new) or by emailing **security@nousresearch.com**. Do not open public issues for security vulnerabilities.
### Required Submission Details
- **Title & Severity:** Concise description and CVSS score/rating.
- **Affected Component:** Exact file path and line range (e.g., `tools/approval.py:120-145`).
- **Environment:** Output of `hermes version`, commit SHA, OS, and Python version.
- **Reproduction:** Step-by-step Proof-of-Concept (PoC) against `main` or the latest release.
- **Impact:** Explanation of what trust boundary was crossed.
---
## 2. Trust Model
The core assumption is that Hermes is a **personal agent** with one trusted operator.
### Operator & Session Trust
- **Single Tenant:** The system protects the operator from LLM actions, not from malicious co-tenants. Multi-user isolation must happen at the OS/host level.
- **Gateway Security:** Authorized callers (Telegram, Discord, Slack, etc.) receive equal trust. Session keys are used for routing, not as authorization boundaries.
- **Execution:** Defaults to `terminal.backend: local` (direct host execution). Container isolation (Docker, Modal, Daytona) is opt-in for sandboxing.
### Dangerous Command Approval
The approval system (`tools/approval.py`) is a core security boundary. Terminal commands, file operations, and other potentially destructive actions are gated behind explicit user confirmation before execution. The approval mode is configurable via `approvals.mode` in `config.yaml`:
- `"on"` (default) — prompts the user to approve dangerous commands.
- `"auto"` — auto-approves after a configurable delay.
- `"off"` — disables the gate entirely (break-glass; see Section 3).
### Output Redaction
`agent/redact.py` strips secret-like patterns (API keys, tokens, credentials) from all display output before it reaches the terminal or gateway platform. This prevents accidental credential leakage in chat logs, tool previews, and response text. Redaction operates on the display layer only — underlying values remain intact for internal agent operations.
### Skills vs. MCP Servers
- **Installed Skills:** High trust. Equivalent to local host code; skills can read environment variables and run arbitrary commands.
- **MCP Servers:** Lower trust. MCP subprocesses receive a filtered environment (`_build_safe_env()` in `tools/mcp_tool.py`) — only safe baseline variables (`PATH`, `HOME`, `XDG_*`) plus variables explicitly declared in the server's `env` config block are passed through. Host credentials are stripped by default. Additionally, packages invoked via `npx`/`uvx` are checked against the OSV malware database before spawning.
### Code Execution Sandbox
The `execute_code` tool (`tools/code_execution_tool.py`) runs LLM-generated Python scripts in a child process with API keys and tokens stripped from the environment to prevent credential exfiltration. Only environment variables explicitly declared by loaded skills (via `env_passthrough`) or by the user in `config.yaml` (`terminal.env_passthrough`) are passed through. The child accesses Hermes tools via RPC, not direct API calls.
### Subagents
- **No recursive delegation:** The `delegate_task` tool is disabled for child agents.
- **Depth limit:** `MAX_DEPTH = 2` — parent (depth 0) can spawn a child (depth 1); grandchildren are rejected.
- **Memory isolation:** Subagents run with `skip_memory=True` and do not have access to the parent's persistent memory provider. The parent receives only the task prompt and final response as an observation.
---
## 3. Out of Scope (Non-Vulnerabilities)
The following scenarios are **not** considered security breaches:
- **Prompt Injection:** Unless it results in a concrete bypass of the approval system, toolset restrictions, or container sandbox.
- **Public Exposure:** Deploying the gateway to the public internet without external authentication or network protection.
- **Trusted State Access:** Reports that require pre-existing write access to `~/.hermes/`, `.env`, or `config.yaml` (these are operator-owned files).
- **Default Behavior:** Host-level command execution when `terminal.backend` is set to `local` — this is the documented default, not a vulnerability.
- **Configuration Trade-offs:** Intentional break-glass settings such as `approvals.mode: "off"` or `terminal.backend: local` in production.
- **Tool-level read/access restrictions:** The agent has unrestricted shell access via the `terminal` tool by design. Reports that a specific tool (e.g., `read_file`) can access a resource are not vulnerabilities if the same access is available through `terminal`. Tool-level deny lists only constitute a meaningful security boundary when paired with equivalent restrictions on the terminal side (as with write operations, where `WRITE_DENIED_PATHS` is paired with the dangerous command approval system).
---
## 4. Deployment Hardening & Best Practices
### Filesystem & Network
- **Production sandboxing:** Use container backends (`docker`, `modal`, `daytona`) instead of `local` for untrusted workloads.
- **File permissions:** Run as non-root (the Docker image uses UID 10000); protect credentials with `chmod 600 ~/.hermes/.env` on local installs.
- **Network exposure:** Do not expose the gateway or API server to the public internet without VPN, Tailscale, or firewall protection. SSRF protection is enabled by default across all gateway platform adapters (Telegram, Discord, Slack, Matrix, Mattermost, etc.) with redirect validation. Note: the local terminal backend does not apply SSRF filtering, as it operates within the trusted operator's environment.
### Skills & Supply Chain
- **Skill installation:** Review Skills Guard reports (`tools/skills_guard.py`) before installing third-party skills. The audit log at `~/.hermes/skills/.hub/audit.log` tracks every install and removal.
- **MCP safety:** OSV malware checking runs automatically for `npx`/`uvx` packages before MCP server processes are spawned.
- **CI/CD:** GitHub Actions are pinned to full commit SHAs. The `supply-chain-audit.yml` workflow blocks PRs containing `.pth` files or suspicious `base64`+`exec` patterns.
### Credential Storage
- API keys and tokens belong exclusively in `~/.hermes/.env` — never in `config.yaml` or checked into version control.
- The credential pool system (`agent/credential_pool.py`) handles key rotation and fallback. Credentials are resolved from environment variables, not stored in plaintext databases.
---
## 5. Disclosure Process
- **Coordinated Disclosure:** 90-day window or until a fix is released, whichever comes first.
- **Communication:** All updates occur via the GHSA thread or email correspondence with security@nousresearch.com.
- **Credits:** Reporters are credited in release notes unless anonymity is requested.
-41
View File
@@ -20,46 +20,6 @@ from pathlib import Path
from hermes_constants import get_hermes_home
# Methods clients send as periodic liveness probes. They are not part of the
# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the
# caller — but the supervisor task that dispatches the request then surfaces
# the raised RequestError via ``logging.exception("Background task failed")``,
# which dumps a traceback to stderr every probe interval. Clients like
# acp-bridge already treat the -32601 response as "agent alive", so the
# traceback is pure noise. We keep the protocol response intact and only
# silence the stderr noise for this specific benign case.
_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"})
class _BenignProbeMethodFilter(logging.Filter):
"""Suppress acp 'Background task failed' tracebacks caused by unknown
liveness-probe methods (e.g. ``ping``) while leaving every other
background-task error — including method_not_found for any non-probe
method — visible in stderr.
"""
def filter(self, record: logging.LogRecord) -> bool:
if record.getMessage() != "Background task failed":
return True
exc_info = record.exc_info
if not exc_info:
return True
exc = exc_info[1]
# Imported lazily so this module stays importable when the optional
# ``agent-client-protocol`` dependency is not installed.
try:
from acp.exceptions import RequestError
except ImportError:
return True
if not isinstance(exc, RequestError):
return True
if getattr(exc, "code", None) != -32601:
return True
data = getattr(exc, "data", None)
method = data.get("method") if isinstance(data, dict) else None
return method not in _BENIGN_PROBE_METHODS
def _setup_logging() -> None:
"""Route all logging to stderr so stdout stays clean for ACP stdio."""
handler = logging.StreamHandler(sys.stderr)
@@ -69,7 +29,6 @@ def _setup_logging() -> None:
datefmt="%Y-%m-%d %H:%M:%S",
)
)
handler.addFilter(_BenignProbeMethodFilter())
root = logging.getLogger()
root.handlers.clear()
root.addHandler(handler)
+1 -20
View File
@@ -49,7 +49,6 @@ def make_tool_progress_cb(
session_id: str,
loop: asyncio.AbstractEventLoop,
tool_call_ids: Dict[str, Deque[str]],
tool_call_meta: Dict[str, Dict[str, Any]],
) -> Callable:
"""Create a ``tool_progress_callback`` for AIAgent.
@@ -85,16 +84,6 @@ def make_tool_progress_cb(
tool_call_ids[name] = queue
queue.append(tc_id)
snapshot = None
if name in {"write_file", "patch", "skill_manage"}:
try:
from agent.display import capture_local_edit_snapshot
snapshot = capture_local_edit_snapshot(name, args)
except Exception:
logger.debug("Failed to capture ACP edit snapshot for %s", name, exc_info=True)
tool_call_meta[tc_id] = {"args": args, "snapshot": snapshot}
update = build_tool_start(tc_id, name, args)
_send_update(conn, session_id, loop, update)
@@ -130,7 +119,6 @@ def make_step_cb(
session_id: str,
loop: asyncio.AbstractEventLoop,
tool_call_ids: Dict[str, Deque[str]],
tool_call_meta: Dict[str, Dict[str, Any]],
) -> Callable:
"""Create a ``step_callback`` for AIAgent.
@@ -144,12 +132,10 @@ def make_step_cb(
for tool_info in prev_tools:
tool_name = None
result = None
function_args = None
if isinstance(tool_info, dict):
tool_name = tool_info.get("name") or tool_info.get("function_name")
result = tool_info.get("result") or tool_info.get("output")
function_args = tool_info.get("arguments") or tool_info.get("args")
elif isinstance(tool_info, str):
tool_name = tool_info
@@ -159,13 +145,8 @@ def make_step_cb(
tool_call_ids[tool_name] = queue
if tool_name and queue:
tc_id = queue.popleft()
meta = tool_call_meta.pop(tc_id, {})
update = build_tool_complete(
tc_id,
tool_name,
result=str(result) if result is not None else None,
function_args=function_args or meta.get("args"),
snapshot=meta.get("snapshot"),
tc_id, tool_name, result=str(result) if result is not None else None
)
_send_update(conn, session_id, loop, update)
if not queue:
-3
View File
@@ -63,9 +63,6 @@ def make_approval_callback(
logger.warning("Permission request timed out or failed: %s", exc)
return "deny"
if response is None:
return "deny"
outcome = response.outcome
if isinstance(outcome, AllowedOutcome):
option_id = outcome.option_id
+44 -222
View File
@@ -4,7 +4,6 @@ from __future__ import annotations
import asyncio
import logging
import os
from collections import defaultdict, deque
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Deque, Optional
@@ -27,7 +26,6 @@ from acp.schema import (
McpServerHttp,
McpServerSse,
McpServerStdio,
ModelInfo,
NewSessionResponse,
PromptResponse,
ResumeSessionResponse,
@@ -38,7 +36,6 @@ from acp.schema import (
SessionCapabilities,
SessionForkCapabilities,
SessionListCapabilities,
SessionModelState,
SessionResumeCapabilities,
SessionInfo,
TextContentBlock,
@@ -52,7 +49,7 @@ try:
except ImportError:
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
from acp_adapter.auth import detect_provider
from acp_adapter.auth import detect_provider, has_provider
from acp_adapter.events import (
make_message_cb,
make_step_cb,
@@ -72,11 +69,6 @@ except Exception:
# Thread pool for running AIAgent (synchronous) in parallel.
_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
# does not expose a client-side limit, so this is a fixed cap that clients
# paginate against using `cursor` / `next_cursor`.
_LIST_SESSIONS_PAGE_SIZE = 50
def _extract_text(
prompt: list[
@@ -155,98 +147,6 @@ class HermesACPAgent(acp.Agent):
self._conn = conn
logger.info("ACP client connected")
@staticmethod
def _encode_model_choice(provider: str | None, model: str | None) -> str:
"""Encode a model selection so ACP clients can keep provider context."""
raw_model = str(model or "").strip()
if not raw_model:
return ""
raw_provider = str(provider or "").strip().lower()
if not raw_provider:
return raw_model
return f"{raw_provider}:{raw_model}"
def _build_model_state(self, state: SessionState) -> SessionModelState | None:
"""Return the ACP model selector payload for editors like Zed."""
model = str(state.model or getattr(state.agent, "model", "") or "").strip()
provider = getattr(state.agent, "provider", None) or detect_provider() or "openrouter"
try:
from hermes_cli.models import curated_models_for_provider, normalize_provider, provider_label
normalized_provider = normalize_provider(provider)
provider_name = provider_label(normalized_provider)
available_models: list[ModelInfo] = []
seen_ids: set[str] = set()
for model_id, description in curated_models_for_provider(normalized_provider):
rendered_model = str(model_id or "").strip()
if not rendered_model:
continue
choice_id = self._encode_model_choice(normalized_provider, rendered_model)
if choice_id in seen_ids:
continue
desc_parts = [f"Provider: {provider_name}"]
if description:
desc_parts.append(str(description).strip())
if rendered_model == model:
desc_parts.append("current")
available_models.append(
ModelInfo(
model_id=choice_id,
name=rendered_model,
description="".join(part for part in desc_parts if part),
)
)
seen_ids.add(choice_id)
current_model_id = self._encode_model_choice(normalized_provider, model)
if current_model_id and current_model_id not in seen_ids:
available_models.insert(
0,
ModelInfo(
model_id=current_model_id,
name=model,
description=f"Provider: {provider_name} • current",
),
)
if available_models:
return SessionModelState(
available_models=available_models,
current_model_id=current_model_id or available_models[0].model_id,
)
except Exception:
logger.debug("Could not build ACP model state", exc_info=True)
if not model:
return None
fallback_choice = self._encode_model_choice(provider, model)
return SessionModelState(
available_models=[ModelInfo(model_id=fallback_choice, name=model)],
current_model_id=fallback_choice,
)
@staticmethod
def _resolve_model_selection(raw_model: str, current_provider: str) -> tuple[str, str]:
"""Resolve ``provider:model`` input into the provider and normalized model id."""
target_provider = current_provider
new_model = raw_model.strip()
try:
from hermes_cli.models import detect_provider_for_model, parse_model_input
target_provider, new_model = parse_model_input(new_model, current_provider)
if target_provider == current_provider:
detected = detect_provider_for_model(new_model, current_provider)
if detected:
target_provider, new_model = detected
except Exception:
logger.debug("Provider detection failed, using model as-is", exc_info=True)
return target_provider, new_model
async def _register_session_mcp_servers(
self,
state: SessionState,
@@ -357,18 +257,9 @@ class HermesACPAgent(acp.Agent):
)
async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
# Only accept authenticate() calls whose method_id matches the
# provider we advertised in initialize(). Without this check,
# authenticate() would acknowledge any method_id as long as the
# server has provider credentials configured — harmless under
# Hermes' threat model (ACP is stdio-only, local-trust), but poor
# API hygiene and confusing if ACP ever grows multi-method auth.
provider = detect_provider()
if not provider:
return None
if not isinstance(method_id, str) or method_id.strip().lower() != provider:
return None
return AuthenticateResponse()
if has_provider():
return AuthenticateResponse()
return None
# ---- Session management -------------------------------------------------
@@ -382,10 +273,7 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("New session %s (cwd=%s)", state.session_id, cwd)
self._schedule_available_commands_update(state.session_id)
return NewSessionResponse(
session_id=state.session_id,
models=self._build_model_state(state),
)
return NewSessionResponse(session_id=state.session_id)
async def load_session(
self,
@@ -401,7 +289,7 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Loaded session %s", session_id)
self._schedule_available_commands_update(session_id)
return LoadSessionResponse(models=self._build_model_state(state))
return LoadSessionResponse()
async def resume_session(
self,
@@ -417,7 +305,7 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Resumed session %s", state.session_id)
self._schedule_available_commands_update(state.session_id)
return ResumeSessionResponse(models=self._build_model_state(state))
return ResumeSessionResponse()
async def cancel(self, session_id: str, **kwargs: Any) -> None:
state = self.session_manager.get_session(session_id)
@@ -452,44 +340,12 @@ class HermesACPAgent(acp.Agent):
cwd: str | None = None,
**kwargs: Any,
) -> ListSessionsResponse:
"""List ACP sessions with optional ``cwd`` filtering and cursor pagination.
``cwd`` is passed through to ``SessionManager.list_sessions`` which already
normalizes and filters by working directory. ``cursor`` is a ``session_id``
previously returned as ``next_cursor``; results resume after that entry.
Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
results remain, ``next_cursor`` is set to the last returned ``session_id``.
"""
infos = self.session_manager.list_sessions(cwd=cwd)
if cursor:
for idx, s in enumerate(infos):
if s["session_id"] == cursor:
infos = infos[idx + 1:]
break
else:
# Unknown cursor -> empty page (do not fall back to full list).
infos = []
has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
sessions = []
for s in infos:
updated_at = s.get("updated_at")
if updated_at is not None and not isinstance(updated_at, str):
updated_at = str(updated_at)
sessions.append(
SessionInfo(
session_id=s["session_id"],
cwd=s["cwd"],
title=s.get("title"),
updated_at=updated_at,
)
)
next_cursor = sessions[-1].session_id if has_more and sessions else None
return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
infos = self.session_manager.list_sessions()
sessions = [
SessionInfo(session_id=s["session_id"], cwd=s["cwd"])
for s in infos
]
return ListSessionsResponse(sessions=sessions)
# ---- Prompt (core) ------------------------------------------------------
@@ -533,13 +389,12 @@ class HermesACPAgent(acp.Agent):
state.cancel_event.clear()
tool_call_ids: dict[str, Deque[str]] = defaultdict(deque)
tool_call_meta: dict[str, dict[str, Any]] = {}
previous_approval_cb = None
if conn:
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids)
thinking_cb = make_thinking_cb(conn, session_id, loop)
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids)
message_cb = make_message_cb(conn, session_id, loop)
approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
else:
@@ -555,32 +410,15 @@ class HermesACPAgent(acp.Agent):
agent.step_callback = step_cb
agent.message_callback = message_cb
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
# Set it INSIDE _run_agent so the TLS write happens in the executor
# thread — setting it here would write to the event-loop thread's TLS,
# not the executor's. Also set HERMES_INTERACTIVE so approval.py
# takes the CLI-interactive path (which calls the registered
# callback via prompt_dangerous_approval) instead of the
# non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
# ACP's conn.request_permission maps cleanly to the interactive
# callback shape — not the gateway-queue HERMES_EXEC_ASK path,
# which requires a notify_cb registered in _gateway_notify_cbs.
previous_approval_cb = None
previous_interactive = None
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
def _run_agent() -> dict:
nonlocal previous_approval_cb, previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = _terminal_tool._get_approval_callback()
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
# Signal to tools.approval that we have an interactive callback
# and the non-interactive auto-approve path must not fire.
previous_interactive = os.environ.get("HERMES_INTERACTIVE")
os.environ["HERMES_INTERACTIVE"] = "1"
try:
result = agent.run_conversation(
user_message=user_text,
@@ -592,11 +430,6 @@ class HermesACPAgent(acp.Agent):
logger.exception("Agent error in session %s", session_id)
return {"final_response": f"Error: {e}", "messages": state.history}
finally:
# Restore HERMES_INTERACTIVE.
if previous_interactive is None:
os.environ.pop("HERMES_INTERACTIVE", None)
else:
os.environ["HERMES_INTERACTIVE"] = previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
@@ -616,19 +449,6 @@ class HermesACPAgent(acp.Agent):
self.session_manager.save_session(session_id)
final_response = result.get("final_response", "")
if final_response:
try:
from agent.title_generator import maybe_auto_title
maybe_auto_title(
self.session_manager._get_db(),
session_id,
user_text,
final_response,
state.history,
)
except Exception:
logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
if final_response and conn:
update = acp.update_agent_message_text(final_response)
await conn.session_update(session_id, update)
@@ -673,8 +493,8 @@ class HermesACPAgent(acp.Agent):
await self._conn.session_update(
session_id=session_id,
update=AvailableCommandsUpdate(
session_update="available_commands_update",
available_commands=self._available_commands(),
sessionUpdate="available_commands_update",
availableCommands=self._available_commands(),
),
)
except Exception:
@@ -736,15 +556,27 @@ class HermesACPAgent(acp.Agent):
provider = getattr(state.agent, "provider", None) or "auto"
return f"Current model: {model}\nProvider: {provider}"
new_model = args.strip()
target_provider = None
current_provider = getattr(state.agent, "provider", None) or "openrouter"
target_provider, new_model = self._resolve_model_selection(args, current_provider)
# Auto-detect provider for the requested model
try:
from hermes_cli.models import parse_model_input, detect_provider_for_model
target_provider, new_model = parse_model_input(new_model, current_provider)
if target_provider == current_provider:
detected = detect_provider_for_model(new_model, current_provider)
if detected:
target_provider, new_model = detected
except Exception:
logger.debug("Provider detection failed, using model as-is", exc_info=True)
state.model = new_model
state.agent = self.session_manager._make_agent(
session_id=state.session_id,
cwd=state.cwd,
model=new_model,
requested_provider=target_provider,
requested_provider=target_provider or current_provider,
)
self.session_manager.save_session(state.session_id)
provider_label = getattr(state.agent, "provider", None) or target_provider or current_provider
@@ -846,30 +678,20 @@ class HermesACPAgent(acp.Agent):
"""Switch the model for a session (called by ACP protocol)."""
state = self.session_manager.get_session(session_id)
if state:
state.model = model_id
current_provider = getattr(state.agent, "provider", None)
requested_provider, resolved_model = self._resolve_model_selection(
model_id,
current_provider or "openrouter",
)
state.model = resolved_model
provider_changed = bool(current_provider and requested_provider != current_provider)
current_base_url = None if provider_changed else getattr(state.agent, "base_url", None)
current_api_mode = None if provider_changed else getattr(state.agent, "api_mode", None)
current_base_url = getattr(state.agent, "base_url", None)
current_api_mode = getattr(state.agent, "api_mode", None)
state.agent = self.session_manager._make_agent(
session_id=session_id,
cwd=state.cwd,
model=resolved_model,
requested_provider=requested_provider,
model=model_id,
requested_provider=current_provider,
base_url=current_base_url,
api_mode=current_api_mode,
)
self.session_manager.save_session(session_id)
logger.info(
"Session %s: model switched to %s via provider %s",
session_id,
resolved_model,
requested_provider,
)
logger.info("Session %s: model switched to %s", session_id, model_id)
return SetSessionModelResponse()
logger.warning("Session %s: model switch requested for missing session", session_id)
return None
+34 -127
View File
@@ -13,12 +13,8 @@ from hermes_constants import get_hermes_home
import copy
import json
import logging
import os
import re
import sys
import time
import uuid
from datetime import datetime, timezone
from dataclasses import dataclass, field
from threading import Lock
from typing import Any, Dict, List, Optional
@@ -26,64 +22,6 @@ from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
def _normalize_cwd_for_compare(cwd: str | None) -> str:
raw = str(cwd or ".").strip()
if not raw:
raw = "."
expanded = os.path.expanduser(raw)
# Normalize Windows drive paths into the equivalent WSL mount form so
# ACP history filters match the same workspace across Windows and WSL.
match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
if match:
drive = match.group(1).lower()
tail = match.group(2).replace("\\", "/")
expanded = f"/mnt/{drive}/{tail}"
elif re.match(r"^/mnt/[A-Za-z]/", expanded):
expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"
return os.path.normpath(expanded)
def _build_session_title(title: Any, preview: Any, cwd: str | None) -> str:
explicit = str(title or "").strip()
if explicit:
return explicit
preview_text = str(preview or "").strip()
if preview_text:
return preview_text
leaf = os.path.basename(str(cwd or "").rstrip("/\\"))
return leaf or "New thread"
def _format_updated_at(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, str) and value.strip():
return value
try:
return datetime.fromtimestamp(float(value), tz=timezone.utc).isoformat()
except Exception:
return None
def _updated_at_sort_key(value: Any) -> float:
if value is None:
return float("-inf")
if isinstance(value, (int, float)):
return float(value)
raw = str(value).strip()
if not raw:
return float("-inf")
try:
return datetime.fromisoformat(raw.replace("Z", "+00:00")).timestamp()
except Exception:
try:
return float(raw)
except Exception:
return float("-inf")
def _acp_stderr_print(*args, **kwargs) -> None:
"""Best-effort human-readable output sink for ACP stdio sessions.
@@ -224,78 +162,47 @@ class SessionManager:
logger.info("Forked ACP session %s -> %s", session_id, new_id)
return state
def list_sessions(self, cwd: str | None = None) -> List[Dict[str, Any]]:
def list_sessions(self) -> List[Dict[str, Any]]:
"""Return lightweight info dicts for all sessions (memory + database)."""
normalized_cwd = _normalize_cwd_for_compare(cwd) if cwd else None
db = self._get_db()
persisted_rows: dict[str, dict[str, Any]] = {}
if db is not None:
try:
for row in db.list_sessions_rich(source="acp", limit=1000):
persisted_rows[str(row["id"])] = dict(row)
except Exception:
logger.debug("Failed to load ACP sessions from DB", exc_info=True)
# Collect in-memory sessions first.
with self._lock:
seen_ids = set(self._sessions.keys())
results = []
for s in self._sessions.values():
history_len = len(s.history)
if history_len <= 0:
continue
if normalized_cwd and _normalize_cwd_for_compare(s.cwd) != normalized_cwd:
continue
persisted = persisted_rows.get(s.session_id, {})
preview = next(
(
str(msg.get("content") or "").strip()
for msg in s.history
if msg.get("role") == "user" and str(msg.get("content") or "").strip()
),
persisted.get("preview") or "",
)
results.append(
{
"session_id": s.session_id,
"cwd": s.cwd,
"model": s.model,
"history_len": history_len,
"title": _build_session_title(persisted.get("title"), preview, s.cwd),
"updated_at": _format_updated_at(
persisted.get("last_active") or persisted.get("started_at") or time.time()
),
}
)
results = [
{
"session_id": s.session_id,
"cwd": s.cwd,
"model": s.model,
"history_len": len(s.history),
}
for s in self._sessions.values()
]
# Merge any persisted sessions not currently in memory.
for sid, row in persisted_rows.items():
if sid in seen_ids:
continue
message_count = int(row.get("message_count") or 0)
if message_count <= 0:
continue
# Extract cwd from model_config JSON.
session_cwd = "."
mc = row.get("model_config")
if mc:
try:
session_cwd = json.loads(mc).get("cwd", ".")
except (json.JSONDecodeError, TypeError):
pass
if normalized_cwd and _normalize_cwd_for_compare(session_cwd) != normalized_cwd:
continue
results.append({
"session_id": sid,
"cwd": session_cwd,
"model": row.get("model") or "",
"history_len": message_count,
"title": _build_session_title(row.get("title"), row.get("preview"), session_cwd),
"updated_at": _format_updated_at(row.get("last_active") or row.get("started_at")),
})
db = self._get_db()
if db is not None:
try:
rows = db.search_sessions(source="acp", limit=1000)
for row in rows:
sid = row["id"]
if sid in seen_ids:
continue
# Extract cwd from model_config JSON.
cwd = "."
mc = row.get("model_config")
if mc:
try:
cwd = json.loads(mc).get("cwd", ".")
except (json.JSONDecodeError, TypeError):
pass
results.append({
"session_id": sid,
"cwd": cwd,
"model": row.get("model") or "",
"history_len": row.get("message_count") or 0,
})
except Exception:
logger.debug("Failed to list ACP sessions from DB", exc_info=True)
results.sort(key=lambda item: _updated_at_sort_key(item.get("updated_at")), reverse=True)
return results
def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
+9 -174
View File
@@ -2,7 +2,6 @@
from __future__ import annotations
import json
import uuid
from typing import Any, Dict, List, Optional
@@ -97,170 +96,6 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
return tool_name
def _build_patch_mode_content(patch_text: str) -> List[Any]:
"""Parse V4A patch mode input into ACP diff blocks when possible."""
if not patch_text:
return [acp.tool_content(acp.text_block(""))]
try:
from tools.patch_parser import OperationType, parse_v4a_patch
operations, error = parse_v4a_patch(patch_text)
if error or not operations:
return [acp.tool_content(acp.text_block(patch_text))]
content: List[Any] = []
for op in operations:
if op.operation == OperationType.UPDATE:
old_chunks: list[str] = []
new_chunks: list[str] = []
for hunk in op.hunks:
old_lines = [line.content for line in hunk.lines if line.prefix in (" ", "-")]
new_lines = [line.content for line in hunk.lines if line.prefix in (" ", "+")]
if old_lines or new_lines:
old_chunks.append("\n".join(old_lines))
new_chunks.append("\n".join(new_lines))
old_text = "\n...\n".join(chunk for chunk in old_chunks if chunk)
new_text = "\n...\n".join(chunk for chunk in new_chunks if chunk)
if old_text or new_text:
content.append(
acp.tool_diff_content(
path=op.file_path,
old_text=old_text or None,
new_text=new_text or "",
)
)
continue
if op.operation == OperationType.ADD:
added_lines = [line.content for hunk in op.hunks for line in hunk.lines if line.prefix == "+"]
content.append(
acp.tool_diff_content(
path=op.file_path,
new_text="\n".join(added_lines),
)
)
continue
if op.operation == OperationType.DELETE:
content.append(
acp.tool_diff_content(
path=op.file_path,
old_text=f"Delete file: {op.file_path}",
new_text="",
)
)
continue
if op.operation == OperationType.MOVE:
content.append(
acp.tool_content(acp.text_block(f"Move file: {op.file_path} -> {op.new_path}"))
)
return content or [acp.tool_content(acp.text_block(patch_text))]
except Exception:
return [acp.tool_content(acp.text_block(patch_text))]
def _strip_diff_prefix(path: str) -> str:
raw = str(path or "").strip()
if raw.startswith(("a/", "b/")):
return raw[2:]
return raw
def _parse_unified_diff_content(diff_text: str) -> List[Any]:
"""Convert unified diff text into ACP diff content blocks."""
if not diff_text:
return []
content: List[Any] = []
current_old_path: Optional[str] = None
current_new_path: Optional[str] = None
old_lines: list[str] = []
new_lines: list[str] = []
def _flush() -> None:
nonlocal current_old_path, current_new_path, old_lines, new_lines
if current_old_path is None and current_new_path is None:
return
path = current_new_path if current_new_path and current_new_path != "/dev/null" else current_old_path
if not path or path == "/dev/null":
current_old_path = None
current_new_path = None
old_lines = []
new_lines = []
return
content.append(
acp.tool_diff_content(
path=_strip_diff_prefix(path),
old_text="\n".join(old_lines) if old_lines else None,
new_text="\n".join(new_lines),
)
)
current_old_path = None
current_new_path = None
old_lines = []
new_lines = []
for line in diff_text.splitlines():
if line.startswith("--- "):
_flush()
current_old_path = line[4:].strip()
continue
if line.startswith("+++ "):
current_new_path = line[4:].strip()
continue
if line.startswith("@@"):
continue
if current_old_path is None and current_new_path is None:
continue
if line.startswith("+"):
new_lines.append(line[1:])
elif line.startswith("-"):
old_lines.append(line[1:])
elif line.startswith(" "):
shared = line[1:]
old_lines.append(shared)
new_lines.append(shared)
_flush()
return content
def _build_tool_complete_content(
tool_name: str,
result: Optional[str],
*,
function_args: Optional[Dict[str, Any]] = None,
snapshot: Any = None,
) -> List[Any]:
"""Build structured ACP completion content, falling back to plain text."""
display_result = result or ""
if len(display_result) > 5000:
display_result = display_result[:4900] + f"\n... ({len(result)} chars total, truncated)"
if tool_name in {"write_file", "patch", "skill_manage"}:
try:
from agent.display import extract_edit_diff
diff_text = extract_edit_diff(
tool_name,
result,
function_args=function_args,
snapshot=snapshot,
)
if isinstance(diff_text, str) and diff_text.strip():
diff_content = _parse_unified_diff_content(diff_text)
if diff_content:
return diff_content
except Exception:
pass
return [acp.tool_content(acp.text_block(display_result))]
# ---------------------------------------------------------------------------
# Build ACP content objects for tool-call events
# ---------------------------------------------------------------------------
@@ -284,8 +119,9 @@ def build_tool_start(
new = arguments.get("new_string", "")
content = [acp.tool_diff_content(path=path, new_text=new, old_text=old)]
else:
# Patch mode — show the patch content as text
patch_text = arguments.get("patch", "")
content = _build_patch_mode_content(patch_text)
content = [acp.tool_content(acp.text_block(patch_text))]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
@@ -342,17 +178,16 @@ def build_tool_complete(
tool_call_id: str,
tool_name: str,
result: Optional[str] = None,
function_args: Optional[Dict[str, Any]] = None,
snapshot: Any = None,
) -> ToolCallProgress:
"""Create a ToolCallUpdate (progress) event for a completed tool call."""
kind = get_tool_kind(tool_name)
content = _build_tool_complete_content(
tool_name,
result,
function_args=function_args,
snapshot=snapshot,
)
# Truncate very large results for the UI
display_result = result or ""
if len(display_result) > 5000:
display_result = display_result[:4900] + f"\n... ({len(result)} chars total, truncated)"
content = [acp.tool_content(acp.text_block(display_result))]
return acp.update_tool_call(
tool_call_id,
kind=kind,
-326
View File
@@ -1,326 +0,0 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Optional
import httpx
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
from hermes_cli.runtime_provider import resolve_runtime_provider
def _utc_now() -> datetime:
return datetime.now(timezone.utc)
@dataclass(frozen=True)
class AccountUsageWindow:
label: str
used_percent: Optional[float] = None
reset_at: Optional[datetime] = None
detail: Optional[str] = None
@dataclass(frozen=True)
class AccountUsageSnapshot:
provider: str
source: str
fetched_at: datetime
title: str = "Account limits"
plan: Optional[str] = None
windows: tuple[AccountUsageWindow, ...] = ()
details: tuple[str, ...] = ()
unavailable_reason: Optional[str] = None
@property
def available(self) -> bool:
return bool(self.windows or self.details) and not self.unavailable_reason
def _title_case_slug(value: Optional[str]) -> Optional[str]:
cleaned = str(value or "").strip()
if not cleaned:
return None
return cleaned.replace("_", " ").replace("-", " ").title()
def _parse_dt(value: Any) -> Optional[datetime]:
if value in (None, ""):
return None
if isinstance(value, (int, float)):
return datetime.fromtimestamp(float(value), tz=timezone.utc)
if isinstance(value, str):
text = value.strip()
if not text:
return None
if text.endswith("Z"):
text = text[:-1] + "+00:00"
try:
dt = datetime.fromisoformat(text)
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
except ValueError:
return None
return None
def _format_reset(dt: Optional[datetime]) -> str:
if not dt:
return "unknown"
local_dt = dt.astimezone()
delta = dt - _utc_now()
total_seconds = int(delta.total_seconds())
if total_seconds <= 0:
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
hours, rem = divmod(total_seconds, 3600)
minutes = rem // 60
if hours >= 24:
days, hours = divmod(hours, 24)
rel = f"in {days}d {hours}h"
elif hours > 0:
rel = f"in {hours}h {minutes}m"
else:
rel = f"in {minutes}m"
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
if not snapshot:
return []
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
lines = [header]
if snapshot.plan:
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
else:
lines.append(f"Provider: {snapshot.provider}")
for window in snapshot.windows:
if window.used_percent is None:
base = f"{window.label}: unavailable"
else:
remaining = max(0, round(100 - float(window.used_percent)))
used = max(0, round(float(window.used_percent)))
base = f"{window.label}: {remaining}% remaining ({used}% used)"
if window.reset_at:
base += f" • resets {_format_reset(window.reset_at)}"
elif window.detail:
base += f"{window.detail}"
lines.append(base)
for detail in snapshot.details:
lines.append(detail)
if snapshot.unavailable_reason:
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
return lines
def _resolve_codex_usage_url(base_url: str) -> str:
normalized = (base_url or "").strip().rstrip("/")
if not normalized:
normalized = "https://chatgpt.com/backend-api/codex"
if normalized.endswith("/codex"):
normalized = normalized[: -len("/codex")]
if "/backend-api" in normalized:
return normalized + "/wham/usage"
return normalized + "/api/codex/usage"
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
token_data = _read_codex_tokens()
tokens = token_data.get("tokens") or {}
account_id = str(tokens.get("account_id", "") or "").strip() or None
headers = {
"Authorization": f"Bearer {creds['api_key']}",
"Accept": "application/json",
"User-Agent": "codex-cli",
}
if account_id:
headers["ChatGPT-Account-Id"] = account_id
with httpx.Client(timeout=15.0) as client:
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
response.raise_for_status()
payload = response.json() or {}
rate_limit = payload.get("rate_limit") or {}
windows: list[AccountUsageWindow] = []
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
window = rate_limit.get(key) or {}
used = window.get("used_percent")
if used is None:
continue
windows.append(
AccountUsageWindow(
label=label,
used_percent=float(used),
reset_at=_parse_dt(window.get("reset_at")),
)
)
details: list[str] = []
credits = payload.get("credits") or {}
if credits.get("has_credits"):
balance = credits.get("balance")
if isinstance(balance, (int, float)):
details.append(f"Credits balance: ${float(balance):.2f}")
elif credits.get("unlimited"):
details.append("Credits balance: unlimited")
return AccountUsageSnapshot(
provider="openai-codex",
source="usage_api",
fetched_at=_utc_now(),
plan=_title_case_slug(payload.get("plan_type")),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
token = (resolve_anthropic_token() or "").strip()
if not token:
return None
if not _is_oauth_token(token):
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
)
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
"Content-Type": "application/json",
"anthropic-beta": "oauth-2025-04-20",
"User-Agent": "claude-code/2.1.0",
}
with httpx.Client(timeout=15.0) as client:
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
response.raise_for_status()
payload = response.json() or {}
windows: list[AccountUsageWindow] = []
mapping = (
("five_hour", "Current session"),
("seven_day", "Current week"),
("seven_day_opus", "Opus week"),
("seven_day_sonnet", "Sonnet week"),
)
for key, label in mapping:
window = payload.get(key) or {}
util = window.get("utilization")
if util is None:
continue
used = float(util) * 100 if float(util) <= 1 else float(util)
windows.append(
AccountUsageWindow(
label=label,
used_percent=used,
reset_at=_parse_dt(window.get("resets_at")),
)
)
details: list[str] = []
extra = payload.get("extra_usage") or {}
if extra.get("is_enabled"):
used_credits = extra.get("used_credits")
monthly_limit = extra.get("monthly_limit")
currency = extra.get("currency") or "USD"
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
details.append(
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
)
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
runtime = resolve_runtime_provider(
requested="openrouter",
explicit_base_url=base_url,
explicit_api_key=api_key,
)
token = str(runtime.get("api_key", "") or "").strip()
if not token:
return None
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
credits_url = f"{normalized}/credits"
key_url = f"{normalized}/key"
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
}
with httpx.Client(timeout=10.0) as client:
credits_resp = client.get(credits_url, headers=headers)
credits_resp.raise_for_status()
credits = (credits_resp.json() or {}).get("data") or {}
try:
key_resp = client.get(key_url, headers=headers)
key_resp.raise_for_status()
key_data = (key_resp.json() or {}).get("data") or {}
except Exception:
key_data = {}
total_credits = float(credits.get("total_credits") or 0.0)
total_usage = float(credits.get("total_usage") or 0.0)
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
windows: list[AccountUsageWindow] = []
limit = key_data.get("limit")
limit_remaining = key_data.get("limit_remaining")
limit_reset = str(key_data.get("limit_reset") or "").strip()
usage = key_data.get("usage")
if (
isinstance(limit, (int, float))
and float(limit) > 0
and isinstance(limit_remaining, (int, float))
and 0 <= float(limit_remaining) <= float(limit)
):
limit_value = float(limit)
remaining_value = float(limit_remaining)
used_percent = ((limit_value - remaining_value) / limit_value) * 100
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
if limit_reset:
detail_parts.append(f"resets {limit_reset}")
windows.append(
AccountUsageWindow(
label="API key quota",
used_percent=used_percent,
detail="".join(detail_parts),
)
)
if isinstance(usage, (int, float)):
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
for value, label in (
(key_data.get("usage_daily"), "today"),
(key_data.get("usage_weekly"), "this week"),
(key_data.get("usage_monthly"), "this month"),
):
if isinstance(value, (int, float)) and float(value) > 0:
usage_parts.append(f"${float(value):.2f} {label}")
details.append("".join(usage_parts))
return AccountUsageSnapshot(
provider="openrouter",
source="credits_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def fetch_account_usage(
provider: Optional[str],
*,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Optional[AccountUsageSnapshot]:
normalized = str(provider or "").strip().lower()
if normalized in {"", "auto", "custom"}:
return None
try:
if normalized == "openai-codex":
return _fetch_codex_account_usage()
if normalized == "anthropic":
return _fetch_anthropic_account_usage()
if normalized == "openrouter":
return _fetch_openrouter_account_usage(base_url, api_key)
except Exception:
return None
return None
+13 -172
View File
@@ -19,7 +19,6 @@ from pathlib import Path
from hermes_constants import get_hermes_home
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from utils import normalize_proxy_env_vars
try:
import anthropic as _anthropic_sdk
@@ -29,45 +28,19 @@ except ImportError:
logger = logging.getLogger(__name__)
THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
# Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
# Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh.
# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/
# agentic work) and downgrade it to max on pre-4.7 adaptive models (which
# is the strongest level they accept). "minimal" is a legacy alias that
# maps to low on every model. See:
# https://platform.claude.com/docs/en/about-claude/models/migration-guide
ADAPTIVE_EFFORT_MAP = {
"max": "max",
"xhigh": "xhigh",
"high": "high",
"medium": "medium",
"low": "low",
"xhigh": "max",
"high": "high",
"medium": "medium",
"low": "low",
"minimal": "low",
}
# Models that accept the "xhigh" output_config.effort level. Opus 4.7 added
# xhigh as a distinct level between high and max; older adaptive-thinking
# models (4.6) reject it with a 400. Keep this substring list in sync with
# the Anthropic migration guide as new model families ship.
_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
# is the only supported mode; 4.7 additionally forbids manual thinking entirely
# and drops temperature/top_p/top_k).
_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
# Models where temperature/top_p/top_k return 400 if set to non-default values.
# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
# ── Max output token limits per Anthropic model ───────────────────────
# Source: Anthropic docs + Cline model catalog. Anthropic's API requires
# max_tokens as a mandatory field. Previously we hardcoded 16384, which
# starves thinking-enabled models (thinking tokens count toward the limit).
_ANTHROPIC_OUTPUT_LIMITS = {
# Claude 4.7
"claude-opus-4-7": 128_000,
# Claude 4.6
"claude-opus-4-6": 128_000,
"claude-sonnet-4-6": 64_000,
@@ -118,37 +91,11 @@ def _get_anthropic_max_output(model: str) -> int:
def _supports_adaptive_thinking(model: str) -> bool:
"""Return True for Claude 4.6+ models that support adaptive thinking."""
return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
"""Return True for Claude 4.6 models that support adaptive thinking."""
return any(v in model for v in ("4-6", "4.6"))
def _supports_xhigh_effort(model: str) -> bool:
"""Return True for models that accept the 'xhigh' adaptive effort level.
Opus 4.7 introduced xhigh as a distinct level between high and max.
Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
when this returns False.
"""
return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
def _forbids_sampling_params(model: str) -> bool:
"""Return True for models that 400 on any non-default temperature/top_p/top_k.
Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
expected to follow suit. Callers should omit these fields entirely rather
than passing zero/default values (the API rejects anything non-null).
"""
return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
# Beta headers for enhanced features (sent with ALL auth types).
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
# beta headers are still accepted (harmless no-op) but not required. Kept
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
# that still gate on the headers continue to get the enhanced features.
# Migration guide: remove these if you no longer support ≤4.5 models.
# Beta headers for enhanced features (sent with ALL auth types)
_COMMON_BETAS = [
"interleaved-thinking-2025-05-14",
"fine-grained-tool-streaming-2025-05-14",
@@ -293,15 +240,9 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
return _COMMON_BETAS
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
def build_anthropic_client(api_key: str, base_url: str = None):
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
If *timeout* is provided it overrides the default 900s read timeout. The
connect timeout stays at 10s. Callers pass this from the per-provider /
per-model ``request_timeout_seconds`` config so Anthropic-native and
Anthropic-compatible providers respect the same knob as OpenAI-wire
providers.
Returns an anthropic.Anthropic instance.
"""
if _anthropic_sdk is None:
@@ -309,15 +250,11 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
"The 'anthropic' package is required for the Anthropic provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
normalize_proxy_env_vars()
from httpx import Timeout
normalized_base_url = _normalize_base_url_text(base_url)
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
kwargs = {
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
"timeout": Timeout(timeout=900.0, connect=10.0),
}
if normalized_base_url:
kwargs["base_url"] = normalized_base_url
@@ -361,33 +298,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
return _anthropic_sdk.Anthropic(**kwargs)
def build_anthropic_bedrock_client(region: str):
"""Create an AnthropicBedrock client for Bedrock Claude models.
Uses the Anthropic SDK's native Bedrock adapter, which provides full
Claude feature parity: prompt caching, thinking budgets, adaptive
thinking, fast mode — features not available via the Converse API.
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
"""
if _anthropic_sdk is None:
raise ImportError(
"The 'anthropic' package is required for the Bedrock provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
raise ImportError(
"anthropic.AnthropicBedrock not available. "
"Upgrade with: pip install 'anthropic>=0.39.0'"
)
from httpx import Timeout
return _anthropic_sdk.AnthropicBedrock(
aws_region=region,
timeout=Timeout(timeout=900.0, connect=10.0),
)
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
@@ -1404,31 +1314,18 @@ def build_anthropic_kwargs(
kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
# Map reasoning_config to Anthropic's thinking parameter.
# Claude 4.6+ models use adaptive thinking + output_config.effort.
# Claude 4.6 models use adaptive thinking + output_config.effort.
# Older models use manual thinking with budget_tokens.
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
#
# On 4.7+ the `thinking.display` field defaults to "omitted", which
# silently hides reasoning text that Hermes surfaces in its CLI. We
# request "summarized" so the reasoning blocks stay populated — matching
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
effort = str(reasoning_config.get("effort", "medium")).lower()
budget = THINKING_BUDGET.get(effort, 8000)
if _supports_adaptive_thinking(model):
kwargs["thinking"] = {
"type": "adaptive",
"display": "summarized",
}
adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium")
# Downgrade xhigh→max on models that don't list xhigh as a
# supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh.
if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model):
adaptive_effort = "max"
kwargs["thinking"] = {"type": "adaptive"}
kwargs["output_config"] = {
"effort": adaptive_effort,
"effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium")
}
else:
kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
@@ -1436,15 +1333,6 @@ def build_anthropic_kwargs(
kwargs["temperature"] = 1
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
# ── Strip sampling params on 4.7+ ─────────────────────────────────
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
# Callers (auxiliary_client, flush_memories, etc.) may set these for
# older models; drop them here as a safety net so upstream 4.6 → 4.7
# migrations don't require coordinated edits everywhere.
if _forbids_sampling_params(model):
for _sampling_key in ("temperature", "top_p", "top_k"):
kwargs.pop(_sampling_key, None)
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
# output speed. Only for native Anthropic endpoints — third-party
@@ -1502,20 +1390,12 @@ def normalize_anthropic_response(
)
)
# Map Anthropic stop_reason to OpenAI finish_reason.
# Newer stop reasons added in Claude 4.5+ / 4.7:
# - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
# - model_context_window_exceeded: hit context limit (not max_tokens)
# Both need distinct handling upstream — a refusal should surface to the
# user with a clear message, and a context-window overflow should trigger
# compression/truncation rather than be treated as normal end-of-turn.
# Map Anthropic stop_reason to OpenAI finish_reason
stop_reason_map = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
@@ -1529,42 +1409,3 @@ def normalize_anthropic_response(
),
finish_reason,
)
def normalize_anthropic_response_v2(
response,
strip_tool_prefix: bool = False,
) -> "NormalizedResponse":
"""Normalize Anthropic response to NormalizedResponse.
Wraps the existing normalize_anthropic_response() and maps its output
to the shared transport types. This allows incremental migration —
one call site at a time — without changing the original function.
"""
from agent.transports.types import NormalizedResponse, build_tool_call
assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
tool_calls = None
if assistant_msg.tool_calls:
tool_calls = [
build_tool_call(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
)
for tc in assistant_msg.tool_calls
]
provider_data = {}
if getattr(assistant_msg, "reasoning_details", None):
provider_data["reasoning_details"] = assistant_msg.reasoning_details
return NormalizedResponse(
content=assistant_msg.content,
tool_calls=tool_calls,
finish_reason=finish_reason,
reasoning=getattr(assistant_msg, "reasoning", None),
usage=None, # Anthropic usage is on the raw response, not the normaliser
provider_data=provider_data or None,
)
+106 -371
View File
@@ -48,7 +48,6 @@ from openai import OpenAI
from agent.credential_pool import load_pool
from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
logger = logging.getLogger(__name__)
@@ -59,9 +58,6 @@ _PROVIDER_ALIASES = {
"google": "gemini",
"google-gemini": "gemini",
"google-ai-studio": "gemini",
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
"glm": "zai",
"z-ai": "zai",
"z.ai": "zai",
@@ -95,40 +91,6 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
return "custom"
return _PROVIDER_ALIASES.get(normalized, normalized)
# Sentinel: when returned by _fixed_temperature_for_model(), callers must
# strip the ``temperature`` key from API kwargs entirely so the provider's
# server-side default applies. Kimi/Moonshot models manage temperature
# internally — sending *any* value (even the "correct" one) can conflict
# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
OMIT_TEMPERATURE: object = object()
def _is_kimi_model(model: Optional[str]) -> bool:
"""True for any Kimi / Moonshot model that manages temperature server-side."""
bare = (model or "").strip().lower().rsplit("/", 1)[-1]
return bare.startswith("kimi-") or bare == "kimi"
def _fixed_temperature_for_model(
model: Optional[str],
base_url: Optional[str] = None,
) -> "Optional[float] | object":
"""Return a temperature directive for models with strict contracts.
Returns:
``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
provider chooses its own default. Used for all Kimi / Moonshot
models whose gateway selects temperature server-side.
``float`` — a specific value the caller must use (reserved for future
models with fixed-temperature contracts).
``None`` — no override; caller should use its own default.
"""
if _is_kimi_model(model):
logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
return OMIT_TEMPERATURE
return None
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
@@ -142,7 +104,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"opencode-zen": "gemini-3-flash",
"opencode-go": "glm-5",
"kilocode": "google/gemini-3-flash-preview",
"ollama-cloud": "nemotron-3-nano:30b",
}
# Vision-specific model overrides for direct providers.
@@ -161,16 +122,6 @@ _OR_HEADERS = {
"X-OpenRouter-Categories": "productivity,cli-agent",
}
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
# referrerUrl and X-Title maps to appName in the gateway's analytics.
from hermes_cli import __version__ as _HERMES_VERSION
_AI_GATEWAY_HEADERS = {
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
"X-Title": "Hermes Agent",
"User-Agent": f"HermesAgent/{_HERMES_VERSION}",
}
# Nous Portal extra_body for product attribution.
# Callers should pass this as extra_body in chat.completions.create()
# when the auxiliary client is backed by Nous Portal.
@@ -197,45 +148,6 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex"
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
def _codex_cloudflare_headers(access_token: str) -> Dict[str, str]:
"""Headers required to avoid Cloudflare 403s on chatgpt.com/backend-api/codex.
The Cloudflare layer in front of the Codex endpoint whitelists a small set of
first-party originators (``codex_cli_rs``, ``codex_vscode``, ``codex_sdk_ts``,
anything starting with ``Codex``). Requests from non-residential IPs (VPS,
server-hosted agents) that don't advertise an allowed originator are served
a 403 with ``cf-mitigated: challenge`` regardless of auth correctness.
We pin ``originator: codex_cli_rs`` to match the upstream codex-rs CLI, set
``User-Agent`` to a codex_cli_rs-shaped string (beats SDK fingerprinting),
and extract ``ChatGPT-Account-ID`` (canonical casing, from codex-rs
``auth.rs``) out of the OAuth JWT's ``chatgpt_account_id`` claim.
Malformed tokens are tolerated — we drop the account-ID header rather than
raise, so a bad token still surfaces as an auth error (401) instead of a
crash at client construction.
"""
headers = {
"User-Agent": "codex_cli_rs/0.0.0 (Hermes Agent)",
"originator": "codex_cli_rs",
}
if not isinstance(access_token, str) or not access_token.strip():
return headers
try:
import base64
parts = access_token.split(".")
if len(parts) < 2:
return headers
payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4)
claims = json.loads(base64.urlsafe_b64decode(payload_b64))
acct_id = claims.get("https://api.openai.com/auth", {}).get("chatgpt_account_id")
if isinstance(acct_id, str) and acct_id:
headers["ChatGPT-Account-ID"] = acct_id
except Exception:
pass
return headers
def _to_openai_base_url(base_url: str) -> str:
"""Normalize an Anthropic-style base URL to OpenAI-compatible format.
@@ -602,13 +514,8 @@ class _AnthropicCompletionsAdapter:
tool_choice=normalized_tool_choice,
is_oauth=self._is_oauth,
)
# Opus 4.7+ rejects any non-default temperature/top_p/top_k; only set
# temperature for models that still accept it. build_anthropic_kwargs
# additionally strips these keys as a safety net — keep both layers.
if temperature is not None:
from agent.anthropic_adapter import _forbids_sampling_params
if not _forbids_sampling_params(model):
anthropic_kwargs["temperature"] = temperature
anthropic_kwargs["temperature"] = temperature
response = self._client.messages.create(**anthropic_kwargs)
assistant_message, finish_reason = normalize_anthropic_response(response)
@@ -811,15 +718,10 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if model is None:
continue # skip provider if we don't know a valid aux model
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
if provider_id == "gemini":
from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
if is_native_gemini_base_url(base_url):
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
if "api.kimi.com" in base_url.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
@@ -837,15 +739,10 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if model is None:
continue # skip provider if we don't know a valid aux model
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
if provider_id == "gemini":
from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
if is_native_gemini_base_url(base_url):
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
extra = {}
if base_url_host_matches(base_url, "api.kimi.com"):
if "api.kimi.com" in base_url.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
@@ -878,21 +775,6 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
# Check cross-session rate limit guard before attempting Nous —
# if another session already recorded a 429, skip Nous entirely
# to avoid piling more requests onto the tapped RPH bucket.
try:
from agent.nous_rate_guard import nous_rate_limit_remaining
_remaining = nous_rate_limit_remaining()
if _remaining is not None and _remaining > 0:
logger.debug(
"Auxiliary: skipping Nous Portal (rate-limited, resets in %.0fs)",
_remaining,
)
return None, None
except Exception:
pass
nous = _read_nous_auth()
if not nous:
return None, None
@@ -994,7 +876,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
return None, None, None
custom_base = custom_base.strip().rstrip("/")
if base_url_host_matches(custom_base, "openrouter.ai"):
if "openrouter.ai" in custom_base.lower():
# requested='custom' falls back to OpenRouter when no custom endpoint is
# configured. Treat that as "no custom endpoint" for auxiliary routing.
return None, None, None
@@ -1017,54 +899,7 @@ def _current_custom_base_url() -> str:
return custom_base or ""
def _validate_proxy_env_urls() -> None:
"""Fail fast with a clear error when proxy env vars have malformed URLs.
Common cause: shell config (e.g. .zshrc) with a typo like
``export HTTP_PROXY=http://127.0.0.1:6153export NEXT_VAR=...``
which concatenates 'export' into the port number. Without this
check the OpenAI/httpx client raises a cryptic ``Invalid port``
error that doesn't name the offending env var.
"""
from urllib.parse import urlparse
normalize_proxy_env_vars()
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = str(os.environ.get(key) or "").strip()
if not value:
continue
try:
parsed = urlparse(value)
if parsed.scheme:
_ = parsed.port # raises ValueError for e.g. '6153export'
except ValueError as exc:
raise RuntimeError(
f"Malformed proxy environment variable {key}={value!r}. "
"Fix or unset your proxy settings and try again."
) from exc
def _validate_base_url(base_url: str) -> None:
"""Reject obviously broken custom endpoint URLs before they reach httpx."""
from urllib.parse import urlparse
candidate = str(base_url or "").strip()
if not candidate or candidate.startswith("acp://"):
return
try:
parsed = urlparse(candidate)
if parsed.scheme in {"http", "https"}:
_ = parsed.port # raises ValueError for malformed ports
except ValueError as exc:
raise RuntimeError(
f"Malformed custom endpoint URL: {candidate!r}. "
"Run `hermes setup` or `hermes model` and enter a valid http(s) base URL."
) from exc
def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
runtime = _resolve_custom_runtime()
if len(runtime) == 2:
custom_base, custom_key = runtime
@@ -1080,23 +915,6 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
if custom_mode == "codex_responses":
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
return CodexAuxiliaryClient(real_client, model), model
if custom_mode == "anthropic_messages":
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
# LiteLLM proxies, etc.). Must NEVER be treated as OAuth —
# Anthropic OAuth claims only apply to api.anthropic.com.
try:
from agent.anthropic_adapter import build_anthropic_client
real_client = build_anthropic_client(custom_key, custom_base)
except ImportError:
logger.warning(
"Custom endpoint declares api_mode=anthropic_messages but the "
"anthropic SDK is not installed — falling back to OpenAI-wire."
)
return OpenAI(api_key=custom_key, base_url=custom_base), model
return (
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
model,
)
return OpenAI(api_key=custom_key, base_url=custom_base), model
@@ -1117,11 +935,7 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
return None, None
base_url = _CODEX_AUX_BASE_URL
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
real_client = OpenAI(
api_key=codex_token,
base_url=base_url,
default_headers=_codex_cloudflare_headers(codex_token),
)
real_client = OpenAI(api_key=codex_token, base_url=base_url)
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
@@ -1181,6 +995,8 @@ _AUTO_PROVIDER_LABELS = {
"_resolve_api_key_provider": "api-key",
}
_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
@@ -1311,15 +1127,11 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
"""Full auto-detection chain.
Priority:
1. User's main provider + main model, regardless of provider type.
This means auxiliary tasks (compression, vision, web extraction,
session search, etc.) use the same model the user configured for
chat. Users on OpenRouter/Nous get their chosen chat model; users
on DeepSeek/ZAI/Alibaba get theirs; etc. Running aux tasks on the
user's picked model keeps behavior predictable — no surprise
switches to a cheap fallback model for side tasks.
2. OpenRouter → Nous → custom → Codex → API-key providers (fallback
chain, only used when the main provider has no working client).
1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
use their main provider + main model directly. This ensures users on
Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
provider they already have credentials for — no OpenRouter key needed.
2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
"""
global auxiliary_is_nous, _stale_base_url_warned
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
@@ -1349,16 +1161,11 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
)
_stale_base_url_warned = True
# ── Step 1: main provider + main model → use them directly ──
#
# This is the primary aux backend for every user. "auto" means
# "use my main chat model for side tasks as well" — including users
# on aggregators (OpenRouter, Nous) who previously got routed to a
# cheap provider-side default. Explicit per-task overrides set via
# config.yaml (auxiliary.<task>.provider) still win over this.
# ── Step 1: non-aggregator main provider → use main model directly ──
main_provider = runtime_provider or _read_main_provider()
main_model = runtime_model or _read_main_model()
if (main_provider and main_model
and main_provider not in _AGGREGATOR_PROVIDERS
and main_provider not in ("auto", "")):
resolved_provider = main_provider
explicit_base_url = None
@@ -1417,13 +1224,6 @@ def _to_async_client(sync_client, model: str):
return AsyncCodexAuxiliaryClient(sync_client), model
if isinstance(sync_client, AnthropicAuxiliaryClient):
return AsyncAnthropicAuxiliaryClient(sync_client), model
try:
from agent.gemini_native_adapter import GeminiNativeClient, AsyncGeminiNativeClient
if isinstance(sync_client, GeminiNativeClient):
return AsyncGeminiNativeClient(sync_client), model
except ImportError:
pass
try:
from agent.copilot_acp_client import CopilotACPClient
if isinstance(sync_client, CopilotACPClient):
@@ -1435,14 +1235,14 @@ def _to_async_client(sync_client, model: str):
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
sync_base_url = str(sync_client.base_url)
if base_url_host_matches(sync_base_url, "openrouter.ai"):
base_lower = str(sync_client.base_url).lower()
if "openrouter" in base_lower:
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_lower:
from hermes_cli.models import copilot_default_headers
async_kwargs["default_headers"] = copilot_default_headers()
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
elif "api.kimi.com" in base_lower:
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
return AsyncOpenAI(**async_kwargs), model
@@ -1499,7 +1299,6 @@ def resolve_provider_client(
Returns:
(client, resolved_model) or (None, None) if auth is unavailable.
"""
_validate_proxy_env_urls()
# Normalise aliases
provider = _normalize_aux_provider(provider)
@@ -1519,7 +1318,8 @@ def resolve_provider_client(
# Auto-detect: api.openai.com + codex model name pattern
if api_mode and api_mode != "codex_responses":
return False # explicit non-codex mode
if base_url_hostname(base_url_str) == "api.openai.com":
normalized_base = (base_url_str or "").strip().lower()
if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
model_lower = (model_str or "").lower()
if "codex" in model_lower:
return True
@@ -1587,11 +1387,7 @@ def resolve_provider_client(
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
raw_client = OpenAI(
api_key=codex_token,
base_url=_CODEX_AUX_BASE_URL,
default_headers=_codex_cloudflare_headers(codex_token),
)
raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
return (raw_client, final_model)
# Standard path: wrap in CodexAuxiliaryClient adapter
client, default = _try_codex()
@@ -1623,9 +1419,9 @@ def resolve_provider_client(
provider,
)
extra = {}
if base_url_host_matches(custom_base, "api.kimi.com"):
if "api.kimi.com" in custom_base.lower():
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in custom_base.lower():
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
@@ -1719,23 +1515,15 @@ def resolve_provider_client(
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
final_model = _normalize_resolved_model(model or default_model, provider)
if provider == "gemini":
from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
if is_native_gemini_base_url(base_url):
client = GeminiNativeClient(api_key=api_key, base_url=base_url)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# Provider-specific headers
headers = {}
if base_url_host_matches(base_url, "api.kimi.com"):
if "api.kimi.com" in base_url.lower():
headers["User-Agent"] = "KimiCLI/1.30.0"
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
elif "api.githubcopilot.com" in base_url.lower():
from hermes_cli.models import copilot_default_headers
headers.update(copilot_default_headers())
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
@@ -1959,31 +1747,34 @@ def resolve_vision_provider_client(
if requested == "auto":
# Vision auto-detection order:
# 1. User's main provider + main model (including aggregators).
# _PROVIDER_VISION_MODELS provides per-provider vision model
# overrides when the provider has a dedicated multimodal model
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
# zai → glm-5v-turbo).
# 2. OpenRouter (vision-capable aggregator fallback)
# 3. Nous Portal (vision-capable aggregator fallback)
# 1. Active provider + model (user's main chat config)
# 2. OpenRouter (known vision-capable default model)
# 3. Nous Portal (known vision-capable default model)
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
# Known strict backend — use its defaults.
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
return _finalize(main_provider, sync_client, default_model)
else:
# Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.)
# Use provider-specific vision model if available, otherwise main model.
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using active provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
# Fall back through aggregators (uses their dedicated vision model,
# not the user's main model) when main provider has no client.
# Fall back through aggregators.
for candidate in _VISION_AUTO_PROVIDER_ORDER:
if candidate == main_provider:
continue # already tried above
@@ -2027,7 +1818,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Only use max_completion_tokens for direct OpenAI custom endpoints
if (not or_key
and _read_nous_auth() is None
and base_url_hostname(custom_base) == "api.openai.com"):
and "api.openai.com" in custom_base.lower()):
return {"max_completion_tokens": value}
return {"max_tokens": value}
@@ -2044,15 +1835,9 @@ def auxiliary_max_tokens_param(value: int) -> dict:
# Every auxiliary LLM consumer should use these instead of manually
# constructing clients and calling .chat.completions.create().
# Client cache: (provider, async_mode, base_url, api_key, api_mode, runtime_key) -> (client, default_model, loop)
# NOTE: loop identity is NOT part of the key. On async cache hits we check
# whether the cached loop is the *current* loop; if not, the stale entry is
# replaced in-place. This bounds cache growth to one entry per unique
# provider config rather than one per (config × event-loop), which previously
# caused unbounded fd accumulation in long-running gateway processes (#10200).
# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
_client_cache: Dict[tuple, tuple] = {}
_client_cache_lock = threading.Lock()
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
def neuter_async_httpx_del() -> None:
@@ -2156,7 +1941,7 @@ def cleanup_stale_async_clients() -> None:
def _is_openrouter_client(client: Any) -> bool:
for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
return True
return False
@@ -2185,49 +1970,39 @@ def _get_cached_client(
Async clients (AsyncOpenAI) use httpx.AsyncClient internally, which
binds to the event loop that was current when the client was created.
Using such a client on a *different* loop causes deadlocks or
RuntimeError. To prevent cross-loop issues, the cache validates on
every async hit that the cached loop is the *current, open* loop.
If the loop changed (e.g. a new gateway worker-thread loop), the stale
entry is replaced in-place rather than creating an additional entry.
This keeps cache size bounded to one entry per unique provider config,
preventing the fd-exhaustion that previously occurred in long-running
gateways where recycled worker threads created unbounded entries (#10200).
RuntimeError. To prevent cross-loop issues (especially in gateway
mode where _run_async() may spawn fresh loops in worker threads), the
cache key for async clients includes the current event loop's identity
so each loop gets its own client instance.
"""
# Resolve the current event loop for async clients so we can validate
# cached entries. Loop identity is NOT in the cache key — instead we
# check at hit time whether the cached loop is still current and open.
# This prevents unbounded cache growth from recycled worker-thread loops
# while still guaranteeing we never reuse a client on the wrong loop
# (which causes deadlocks, see #2681).
# Include loop identity for async clients to prevent cross-loop reuse.
# httpx.AsyncClient (inside AsyncOpenAI) is bound to the loop where it
# was created — reusing it on a different loop causes deadlocks (#2681).
loop_id = 0
current_loop = None
if async_mode:
try:
import asyncio as _aio
current_loop = _aio.get_event_loop()
loop_id = id(current_loop)
except RuntimeError:
pass
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id, runtime_key)
with _client_cache_lock:
if cache_key in _client_cache:
cached_client, cached_default, cached_loop = _client_cache[cache_key]
if async_mode:
# Validate: the cached client must be bound to the CURRENT,
# OPEN loop. If the loop changed or was closed, the httpx
# transport inside is dead — force-close and replace.
loop_ok = (
cached_loop is not None
and cached_loop is current_loop
and not cached_loop.is_closed()
)
if loop_ok:
# A cached async client whose loop has been closed will raise
# "Event loop is closed" when httpx tries to clean up its
# transport. Discard the stale client and create a fresh one.
if cached_loop is not None and cached_loop.is_closed():
_force_close_async_httpx(cached_client)
del _client_cache[cache_key]
else:
effective = _compat_model(cached_client, model, cached_default)
return cached_client, effective
# Stale — evict and fall through to create a new client.
_force_close_async_httpx(cached_client)
del _client_cache[cache_key]
else:
effective = _compat_model(cached_client, model, cached_default)
return cached_client, effective
@@ -2247,12 +2022,6 @@ def _get_cached_client(
bound_loop = current_loop
with _client_cache_lock:
if cache_key not in _client_cache:
# Safety belt: if the cache has grown beyond the max, evict
# the oldest entries (FIFO — dict preserves insertion order).
while len(_client_cache) >= _CLIENT_CACHE_MAX_SIZE:
evict_key, evict_entry = next(iter(_client_cache.items()))
_force_close_async_httpx(evict_entry[0])
del _client_cache[evict_key]
_client_cache[cache_key] = (client, default_model, bound_loop)
else:
client, default_model, _ = _client_cache[cache_key]
@@ -2278,6 +2047,7 @@ def _resolve_task_provider_model(
to "custom" and the task uses that direct endpoint. api_mode is one of
"chat_completions", "codex_responses", or None (auto-detect).
"""
config = {}
cfg_provider = None
cfg_model = None
cfg_base_url = None
@@ -2285,7 +2055,16 @@ def _resolve_task_provider_model(
cfg_api_mode = None
if task:
task_config = _get_auxiliary_task_config(task)
try:
from hermes_cli.config import load_config
config = load_config()
except ImportError:
config = {}
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
if not isinstance(task_config, dict):
task_config = {}
cfg_provider = str(task_config.get("provider", "")).strip() or None
cfg_model = str(task_config.get("model", "")).strip() or None
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
@@ -2315,25 +2094,17 @@ def _resolve_task_provider_model(
_DEFAULT_AUX_TIMEOUT = 30.0
def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
"""Return the config dict for auxiliary.<task>, or {} when unavailable."""
if not task:
return {}
try:
from hermes_cli.config import load_config
config = load_config()
except ImportError:
return {}
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
return task_config if isinstance(task_config, dict) else {}
def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
"""Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
if not task:
return default
task_config = _get_auxiliary_task_config(task)
try:
from hermes_cli.config import load_config
config = load_config()
except ImportError:
return default
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
raw = task_config.get("timeout")
if raw is not None:
try:
@@ -2343,15 +2114,6 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float
return default
def _get_task_extra_body(task: str) -> Dict[str, Any]:
"""Read auxiliary.<task>.extra_body and return a shallow copy when valid."""
task_config = _get_auxiliary_task_config(task)
raw = task_config.get("extra_body")
if isinstance(raw, dict):
return dict(raw)
return {}
# ---------------------------------------------------------------------------
# Anthropic-compatible endpoint detection + image block conversion
# ---------------------------------------------------------------------------
@@ -2439,21 +2201,6 @@ def _build_call_kwargs(
"timeout": timeout,
}
fixed_temperature = _fixed_temperature_for_model(model, base_url)
if fixed_temperature is OMIT_TEMPERATURE:
temperature = None # strip — let server choose
elif fixed_temperature is not None:
temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
# the aux model is flipped to 4.7.
if temperature is not None:
from agent.anthropic_adapter import _forbids_sampling_params
if _forbids_sampling_params(model):
temperature = None
if temperature is not None:
kwargs["temperature"] = temperature
@@ -2462,7 +2209,7 @@ def _build_call_kwargs(
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom":
custom_base = base_url or _current_custom_base_url()
if base_url_hostname(custom_base) == "api.openai.com":
if "api.openai.com" in custom_base.lower():
kwargs["max_completion_tokens"] = max_tokens
else:
kwargs["max_tokens"] = max_tokens
@@ -2554,15 +2301,13 @@ def call_llm(
"""
resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
task, provider, model, base_url, api_key)
effective_extra_body = _get_task_extra_body(task)
effective_extra_body.update(extra_body or {})
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=resolved_provider if resolved_provider != "auto" else provider,
model=resolved_model or model,
base_url=resolved_base_url or base_url,
api_key=resolved_api_key or api_key,
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
async_mode=False,
)
if client is None and resolved_provider != "auto" and not resolved_base_url:
@@ -2624,14 +2369,11 @@ def call_llm(
task, resolved_provider or "auto", final_model or "default",
f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "")
# Pass the client's actual base_url (not just resolved_base_url) so
# endpoint-specific temperature overrides can distinguish
# api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes.
kwargs = _build_call_kwargs(
resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=effective_timeout, extra_body=effective_extra_body,
base_url=_base_info or resolved_base_url)
tools=tools, timeout=effective_timeout, extra_body=extra_body,
base_url=resolved_base_url)
# Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
_client_base = str(getattr(client, "base_url", "") or "")
@@ -2685,8 +2427,7 @@ def call_llm(
fb_label, fb_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=effective_timeout,
extra_body=effective_extra_body,
base_url=str(getattr(fb_client, "base_url", "") or ""))
extra_body=extra_body)
return _validate_llm_response(
fb_client.chat.completions.create(**fb_kwargs), task)
raise
@@ -2768,15 +2509,13 @@ async def async_call_llm(
"""
resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
task, provider, model, base_url, api_key)
effective_extra_body = _get_task_extra_body(task)
effective_extra_body.update(extra_body or {})
if task == "vision":
effective_provider, client, final_model = resolve_vision_provider_client(
provider=resolved_provider if resolved_provider != "auto" else provider,
model=resolved_model or model,
base_url=resolved_base_url or base_url,
api_key=resolved_api_key or api_key,
provider=provider,
model=model,
base_url=base_url,
api_key=api_key,
async_mode=True,
)
if client is None and resolved_provider != "auto" and not resolved_base_url:
@@ -2823,17 +2562,14 @@ async def async_call_llm(
effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
# Pass the client's actual base_url (not just resolved_base_url) so
# endpoint-specific temperature overrides can distinguish
# api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes.
_client_base = str(getattr(client, "base_url", "") or "")
kwargs = _build_call_kwargs(
resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=effective_timeout, extra_body=effective_extra_body,
base_url=_client_base or resolved_base_url)
tools=tools, timeout=effective_timeout, extra_body=extra_body,
base_url=resolved_base_url)
# Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
_client_base = str(getattr(client, "base_url", "") or "")
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
@@ -2869,8 +2605,7 @@ async def async_call_llm(
fb_label, fb_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=effective_timeout,
extra_body=effective_extra_body,
base_url=str(getattr(fb_client, "base_url", "") or ""))
extra_body=extra_body)
# Convert sync fallback client to async
async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
if async_fb_model and async_fb_model != fb_kwargs.get("model"):
File diff suppressed because it is too large Load Diff
-813
View File
@@ -1,813 +0,0 @@
"""Codex Responses API adapter.
Pure format-conversion and normalization logic for the OpenAI Responses API
(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints).
Extracted from run_agent.py to isolate Responses API-specific logic from the
core agent loop. All functions are stateless — they operate on the data passed
in and return transformed results.
"""
from __future__ import annotations
import hashlib
import json
import logging
import re
import uuid
from types import SimpleNamespace
from typing import Any, Dict, List, Optional
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Multimodal content helpers
# ---------------------------------------------------------------------------
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
"""Convert chat-style multimodal content to Responses API input parts.
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
Returns an empty list when ``content`` is not a list or contains no
recognized parts — callers fall back to the string path.
"""
if not isinstance(content, list):
return []
converted: List[Dict[str, Any]] = []
for part in content:
if isinstance(part, str):
if part:
converted.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
continue
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
converted.append({"type": "input_text", "text": text})
continue
if ptype in {"image_url", "input_image"}:
image_ref = part.get("image_url")
detail = part.get("detail")
if isinstance(image_ref, dict):
url = image_ref.get("url")
detail = image_ref.get("detail", detail)
else:
url = image_ref
if not isinstance(url, str) or not url:
continue
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
if isinstance(detail, str) and detail.strip():
image_part["detail"] = detail.strip()
converted.append(image_part)
return converted
def _summarize_user_message_for_log(content: Any) -> str:
"""Return a short text summary of a user message for logging/trajectory.
Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
parts from the API server. Logging, spinner previews, and trajectory
files all want a plain string — this helper extracts the first chunk of
text and notes any attached images. Returns an empty string for empty
lists and ``str(content)`` for unexpected scalar types.
"""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
text_bits: List[str] = []
image_count = 0
for part in content:
if isinstance(part, str):
if part:
text_bits.append(part)
continue
if not isinstance(part, dict):
continue
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
text_bits.append(text)
elif ptype in {"image_url", "input_image"}:
image_count += 1
summary = " ".join(text_bits).strip()
if image_count:
note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
summary = f"{note} {summary}" if summary else note
return summary
try:
return str(content)
except Exception:
return ""
# ---------------------------------------------------------------------------
# ID helpers
# ---------------------------------------------------------------------------
def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
"""Generate a deterministic call_id from tool call content.
Used as a fallback when the API doesn't provide a call_id.
Deterministic IDs prevent cache invalidation — random UUIDs would
make every API call's prefix unique, breaking OpenAI's prompt cache.
"""
seed = f"{fn_name}:{arguments}:{index}"
digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
return f"call_{digest}"
def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
"""Split a stored tool id into (call_id, response_item_id)."""
if not isinstance(raw_id, str):
return None, None
value = raw_id.strip()
if not value:
return None, None
if "|" in value:
call_id, response_item_id = value.split("|", 1)
call_id = call_id.strip() or None
response_item_id = response_item_id.strip() or None
return call_id, response_item_id
if value.startswith("fc_"):
return None, value
return value, None
def _derive_responses_function_call_id(
call_id: str,
response_item_id: Optional[str] = None,
) -> str:
"""Build a valid Responses `function_call.id` (must start with `fc_`)."""
if isinstance(response_item_id, str):
candidate = response_item_id.strip()
if candidate.startswith("fc_"):
return candidate
source = (call_id or "").strip()
if source.startswith("fc_"):
return source
if source.startswith("call_") and len(source) > len("call_"):
return f"fc_{source[len('call_'):]}"
sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
if sanitized.startswith("fc_"):
return sanitized
if sanitized.startswith("call_") and len(sanitized) > len("call_"):
return f"fc_{sanitized[len('call_'):]}"
if sanitized:
return f"fc_{sanitized[:48]}"
seed = source or str(response_item_id or "") or uuid.uuid4().hex
digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
return f"fc_{digest}"
# ---------------------------------------------------------------------------
# Schema conversion
# ---------------------------------------------------------------------------
def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
"""Convert chat-completions tool schemas to Responses function-tool schemas."""
if not tools:
return None
converted: List[Dict[str, Any]] = []
for item in tools:
fn = item.get("function", {}) if isinstance(item, dict) else {}
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
continue
converted.append({
"type": "function",
"name": name,
"description": fn.get("description", ""),
"strict": False,
"parameters": fn.get("parameters", {"type": "object", "properties": {}}),
})
return converted or None
# ---------------------------------------------------------------------------
# Message format conversion
# ---------------------------------------------------------------------------
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
items: List[Dict[str, Any]] = []
seen_item_ids: set = set()
for msg in messages:
if not isinstance(msg, dict):
continue
role = msg.get("role")
if role == "system":
continue
if role in {"user", "assistant"}:
content = msg.get("content", "")
if isinstance(content, list):
content_parts = _chat_content_to_responses_parts(content)
content_text = "".join(
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
)
else:
content_parts = []
content_text = str(content) if content is not None else ""
if role == "assistant":
# Replay encrypted reasoning items from previous turns
# so the API can maintain coherent reasoning chains.
codex_reasoning = msg.get("codex_reasoning_items")
has_codex_reasoning = False
if isinstance(codex_reasoning, list):
for ri in codex_reasoning:
if isinstance(ri, dict) and ri.get("encrypted_content"):
item_id = ri.get("id")
if item_id and item_id in seen_item_ids:
continue
# Strip the "id" field — with store=False the
# Responses API cannot look up items by ID and
# returns 404. The encrypted_content blob is
# self-contained for reasoning chain continuity.
replay_item = {k: v for k, v in ri.items() if k != "id"}
items.append(replay_item)
if item_id:
seen_item_ids.add(item_id)
has_codex_reasoning = True
if content_parts:
items.append({"role": "assistant", "content": content_parts})
elif content_text.strip():
items.append({"role": "assistant", "content": content_text})
elif has_codex_reasoning:
# The Responses API requires a following item after each
# reasoning item (otherwise: missing_following_item error).
# When the assistant produced only reasoning with no visible
# content, emit an empty assistant message as the required
# following item.
items.append({"role": "assistant", "content": ""})
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
if not isinstance(tc, dict):
continue
fn = tc.get("function", {})
fn_name = fn.get("name")
if not isinstance(fn_name, str) or not fn_name.strip():
continue
embedded_call_id, embedded_response_item_id = _split_responses_tool_id(
tc.get("id")
)
call_id = tc.get("call_id")
if not isinstance(call_id, str) or not call_id.strip():
call_id = embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
if (
isinstance(embedded_response_item_id, str)
and embedded_response_item_id.startswith("fc_")
and len(embedded_response_item_id) > len("fc_")
):
call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
else:
_raw_args = str(fn.get("arguments", "{}"))
call_id = _deterministic_call_id(fn_name, _raw_args, len(items))
call_id = call_id.strip()
arguments = fn.get("arguments", "{}")
if isinstance(arguments, dict):
arguments = json.dumps(arguments, ensure_ascii=False)
elif not isinstance(arguments, str):
arguments = str(arguments)
arguments = arguments.strip() or "{}"
items.append({
"type": "function_call",
"call_id": call_id,
"name": fn_name,
"arguments": arguments,
})
continue
# Non-assistant (user) role: emit multimodal parts when present,
# otherwise fall back to the text payload.
if content_parts:
items.append({"role": role, "content": content_parts})
else:
items.append({"role": role, "content": content_text})
continue
if role == "tool":
raw_tool_call_id = msg.get("tool_call_id")
call_id, _ = _split_responses_tool_id(raw_tool_call_id)
if not isinstance(call_id, str) or not call_id.strip():
if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
call_id = raw_tool_call_id.strip()
if not isinstance(call_id, str) or not call_id.strip():
continue
items.append({
"type": "function_call_output",
"call_id": call_id,
"output": str(msg.get("content", "") or ""),
})
return items
# ---------------------------------------------------------------------------
# Input preflight / validation
# ---------------------------------------------------------------------------
def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
if not isinstance(raw_items, list):
raise ValueError("Codex Responses input must be a list of input items.")
normalized: List[Dict[str, Any]] = []
seen_ids: set = set()
for idx, item in enumerate(raw_items):
if not isinstance(item, dict):
raise ValueError(f"Codex Responses input[{idx}] must be an object.")
item_type = item.get("type")
if item_type == "function_call":
call_id = item.get("call_id")
name = item.get("name")
if not isinstance(call_id, str) or not call_id.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
if not isinstance(name, str) or not name.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
arguments = item.get("arguments", "{}")
if isinstance(arguments, dict):
arguments = json.dumps(arguments, ensure_ascii=False)
elif not isinstance(arguments, str):
arguments = str(arguments)
arguments = arguments.strip() or "{}"
normalized.append(
{
"type": "function_call",
"call_id": call_id.strip(),
"name": name.strip(),
"arguments": arguments,
}
)
continue
if item_type == "function_call_output":
call_id = item.get("call_id")
if not isinstance(call_id, str) or not call_id.strip():
raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
output = item.get("output", "")
if output is None:
output = ""
if not isinstance(output, str):
output = str(output)
normalized.append(
{
"type": "function_call_output",
"call_id": call_id.strip(),
"output": output,
}
)
continue
if item_type == "reasoning":
encrypted = item.get("encrypted_content")
if isinstance(encrypted, str) and encrypted:
item_id = item.get("id")
if isinstance(item_id, str) and item_id:
if item_id in seen_ids:
continue
seen_ids.add(item_id)
reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
# Do NOT include the "id" in the outgoing item — with
# store=False (our default) the API tries to resolve the
# id server-side and returns 404. The id is still used
# above for local deduplication via seen_ids.
summary = item.get("summary")
if isinstance(summary, list):
reasoning_item["summary"] = summary
else:
reasoning_item["summary"] = []
normalized.append(reasoning_item)
continue
role = item.get("role")
if role in {"user", "assistant"}:
content = item.get("content", "")
if content is None:
content = ""
if isinstance(content, list):
# Multimodal content from ``_chat_messages_to_responses_input``
# is already in Responses format (``input_text`` / ``input_image``).
# Validate each part and pass through.
validated: List[Dict[str, Any]] = []
for part_idx, part in enumerate(content):
if isinstance(part, str):
if part:
validated.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
raise ValueError(
f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
)
ptype = str(part.get("type") or "").strip().lower()
if ptype in {"input_text", "text", "output_text"}:
text = part.get("text", "")
if not isinstance(text, str):
text = str(text or "")
validated.append({"type": "input_text", "text": text})
elif ptype in {"input_image", "image_url"}:
image_ref = part.get("image_url", "")
detail = part.get("detail")
if isinstance(image_ref, dict):
url = image_ref.get("url", "")
detail = image_ref.get("detail", detail)
else:
url = image_ref
if not isinstance(url, str):
url = str(url or "")
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
if isinstance(detail, str) and detail.strip():
image_part["detail"] = detail.strip()
validated.append(image_part)
else:
raise ValueError(
f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
)
normalized.append({"role": role, "content": validated})
continue
if not isinstance(content, str):
content = str(content)
normalized.append({"role": role, "content": content})
continue
raise ValueError(
f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
)
return normalized
def _preflight_codex_api_kwargs(
api_kwargs: Any,
*,
allow_stream: bool = False,
) -> Dict[str, Any]:
if not isinstance(api_kwargs, dict):
raise ValueError("Codex Responses request must be a dict.")
required = {"model", "instructions", "input"}
missing = [key for key in required if key not in api_kwargs]
if missing:
raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
model = api_kwargs.get("model")
if not isinstance(model, str) or not model.strip():
raise ValueError("Codex Responses request 'model' must be a non-empty string.")
model = model.strip()
instructions = api_kwargs.get("instructions")
if instructions is None:
instructions = ""
if not isinstance(instructions, str):
instructions = str(instructions)
instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
normalized_input = _preflight_codex_input_items(api_kwargs.get("input"))
tools = api_kwargs.get("tools")
normalized_tools = None
if tools is not None:
if not isinstance(tools, list):
raise ValueError("Codex Responses request 'tools' must be a list when provided.")
normalized_tools = []
for idx, tool in enumerate(tools):
if not isinstance(tool, dict):
raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
if tool.get("type") != "function":
raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
name = tool.get("name")
parameters = tool.get("parameters")
if not isinstance(name, str) or not name.strip():
raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
if not isinstance(parameters, dict):
raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
description = tool.get("description", "")
if description is None:
description = ""
if not isinstance(description, str):
description = str(description)
strict = tool.get("strict", False)
if not isinstance(strict, bool):
strict = bool(strict)
normalized_tools.append(
{
"type": "function",
"name": name.strip(),
"description": description,
"strict": strict,
"parameters": parameters,
}
)
store = api_kwargs.get("store", False)
if store is not False:
raise ValueError("Codex Responses contract requires 'store' to be false.")
allowed_keys = {
"model", "instructions", "input", "tools", "store",
"reasoning", "include", "max_output_tokens", "temperature",
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
"extra_headers",
}
normalized: Dict[str, Any] = {
"model": model,
"instructions": instructions,
"input": normalized_input,
"store": False,
}
if normalized_tools is not None:
normalized["tools"] = normalized_tools
# Pass through reasoning config
reasoning = api_kwargs.get("reasoning")
if isinstance(reasoning, dict):
normalized["reasoning"] = reasoning
include = api_kwargs.get("include")
if isinstance(include, list):
normalized["include"] = include
service_tier = api_kwargs.get("service_tier")
if isinstance(service_tier, str) and service_tier.strip():
normalized["service_tier"] = service_tier.strip()
# Pass through max_output_tokens and temperature
max_output_tokens = api_kwargs.get("max_output_tokens")
if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
normalized["max_output_tokens"] = int(max_output_tokens)
temperature = api_kwargs.get("temperature")
if isinstance(temperature, (int, float)):
normalized["temperature"] = float(temperature)
# Pass through tool_choice, parallel_tool_calls, prompt_cache_key
for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
val = api_kwargs.get(passthrough_key)
if val is not None:
normalized[passthrough_key] = val
extra_headers = api_kwargs.get("extra_headers")
if extra_headers is not None:
if not isinstance(extra_headers, dict):
raise ValueError("Codex Responses request 'extra_headers' must be an object.")
normalized_headers: Dict[str, str] = {}
for key, value in extra_headers.items():
if not isinstance(key, str) or not key.strip():
raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
if value is None:
continue
normalized_headers[key.strip()] = str(value)
if normalized_headers:
normalized["extra_headers"] = normalized_headers
if allow_stream:
stream = api_kwargs.get("stream")
if stream is not None and stream is not True:
raise ValueError("Codex Responses 'stream' must be true when set.")
if stream is True:
normalized["stream"] = True
allowed_keys.add("stream")
elif "stream" in api_kwargs:
raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
if unexpected:
raise ValueError(
f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
)
return normalized
# ---------------------------------------------------------------------------
# Response extraction helpers
# ---------------------------------------------------------------------------
def _extract_responses_message_text(item: Any) -> str:
"""Extract assistant text from a Responses message output item."""
content = getattr(item, "content", None)
if not isinstance(content, list):
return ""
chunks: List[str] = []
for part in content:
ptype = getattr(part, "type", None)
if ptype not in {"output_text", "text"}:
continue
text = getattr(part, "text", None)
if isinstance(text, str) and text:
chunks.append(text)
return "".join(chunks).strip()
def _extract_responses_reasoning_text(item: Any) -> str:
"""Extract a compact reasoning text from a Responses reasoning item."""
summary = getattr(item, "summary", None)
if isinstance(summary, list):
chunks: List[str] = []
for part in summary:
text = getattr(part, "text", None)
if isinstance(text, str) and text:
chunks.append(text)
if chunks:
return "\n".join(chunks).strip()
text = getattr(item, "text", None)
if isinstance(text, str) and text:
return text.strip()
return ""
# ---------------------------------------------------------------------------
# Full response normalization
# ---------------------------------------------------------------------------
def _normalize_codex_response(response: Any) -> tuple[Any, str]:
"""Normalize a Responses API object to an assistant_message-like object."""
output = getattr(response, "output", None)
if not isinstance(output, list) or not output:
# The Codex backend can return empty output when the answer was
# delivered entirely via stream events. Check output_text as a
# last-resort fallback before raising.
out_text = getattr(response, "output_text", None)
if isinstance(out_text, str) and out_text.strip():
logger.debug(
"Codex response has empty output but output_text is present (%d chars); "
"synthesizing output item.", len(out_text.strip()),
)
output = [SimpleNamespace(
type="message", role="assistant", status="completed",
content=[SimpleNamespace(type="output_text", text=out_text.strip())],
)]
response.output = output
else:
raise RuntimeError("Responses API returned no output items")
response_status = getattr(response, "status", None)
if isinstance(response_status, str):
response_status = response_status.strip().lower()
else:
response_status = None
if response_status in {"failed", "cancelled"}:
error_obj = getattr(response, "error", None)
if isinstance(error_obj, dict):
error_msg = error_obj.get("message") or str(error_obj)
else:
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
raise RuntimeError(error_msg)
content_parts: List[str] = []
reasoning_parts: List[str] = []
reasoning_items_raw: List[Dict[str, Any]] = []
tool_calls: List[Any] = []
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
saw_commentary_phase = False
saw_final_answer_phase = False
for item in output:
item_type = getattr(item, "type", None)
item_status = getattr(item, "status", None)
if isinstance(item_status, str):
item_status = item_status.strip().lower()
else:
item_status = None
if item_status in {"queued", "in_progress", "incomplete"}:
has_incomplete_items = True
if item_type == "message":
item_phase = getattr(item, "phase", None)
if isinstance(item_phase, str):
normalized_phase = item_phase.strip().lower()
if normalized_phase in {"commentary", "analysis"}:
saw_commentary_phase = True
elif normalized_phase in {"final_answer", "final"}:
saw_final_answer_phase = True
message_text = _extract_responses_message_text(item)
if message_text:
content_parts.append(message_text)
elif item_type == "reasoning":
reasoning_text = _extract_responses_reasoning_text(item)
if reasoning_text:
reasoning_parts.append(reasoning_text)
# Capture the full reasoning item for multi-turn continuity.
# encrypted_content is an opaque blob the API needs back on
# subsequent turns to maintain coherent reasoning chains.
encrypted = getattr(item, "encrypted_content", None)
if isinstance(encrypted, str) and encrypted:
raw_item = {"type": "reasoning", "encrypted_content": encrypted}
item_id = getattr(item, "id", None)
if isinstance(item_id, str) and item_id:
raw_item["id"] = item_id
# Capture summary — required by the API when replaying reasoning items
summary = getattr(item, "summary", None)
if isinstance(summary, list):
raw_summary = []
for part in summary:
text = getattr(part, "text", None)
if isinstance(text, str):
raw_summary.append({"type": "summary_text", "text": text})
raw_item["summary"] = raw_summary
reasoning_items_raw.append(raw_item)
elif item_type == "function_call":
if item_status in {"queued", "in_progress", "incomplete"}:
continue
fn_name = getattr(item, "name", "") or ""
arguments = getattr(item, "arguments", "{}")
if not isinstance(arguments, str):
arguments = json.dumps(arguments, ensure_ascii=False)
raw_call_id = getattr(item, "call_id", None)
raw_item_id = getattr(item, "id", None)
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
call_id = call_id.strip()
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
tool_calls.append(SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=response_item_id,
type="function",
function=SimpleNamespace(name=fn_name, arguments=arguments),
))
elif item_type == "custom_tool_call":
fn_name = getattr(item, "name", "") or ""
arguments = getattr(item, "input", "{}")
if not isinstance(arguments, str):
arguments = json.dumps(arguments, ensure_ascii=False)
raw_call_id = getattr(item, "call_id", None)
raw_item_id = getattr(item, "id", None)
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
if not isinstance(call_id, str) or not call_id.strip():
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
call_id = call_id.strip()
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
tool_calls.append(SimpleNamespace(
id=call_id,
call_id=call_id,
response_item_id=response_item_id,
type="function",
function=SimpleNamespace(name=fn_name, arguments=arguments),
))
final_text = "\n".join([p for p in content_parts if p]).strip()
if not final_text and hasattr(response, "output_text"):
out_text = getattr(response, "output_text", "")
if isinstance(out_text, str):
final_text = out_text.strip()
assistant_message = SimpleNamespace(
content=final_text,
tool_calls=tool_calls,
reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
reasoning_content=None,
reasoning_details=None,
codex_reasoning_items=reasoning_items_raw or None,
)
if tool_calls:
finish_reason = "tool_calls"
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
finish_reason = "incomplete"
elif reasoning_items_raw and not final_text:
# Response contains only reasoning (encrypted thinking state) with
# no visible content or tool calls. The model is still thinking and
# needs another turn to produce the actual answer. Marking this as
# "stop" would send it into the empty-content retry loop which burns
# 3 retries then fails — treat it as incomplete instead so the Codex
# continuation path handles it correctly.
finish_reason = "incomplete"
else:
finish_reason = "stop"
return assistant_message, finish_reason
+46 -455
View File
@@ -17,10 +17,7 @@ Improvements over v2:
- Richer tool call/result detail in summarizer input
"""
import hashlib
import json
import logging
import re
import time
from typing import Any, Dict, List, Optional
@@ -31,7 +28,6 @@ from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
from agent.redact import redact_sensitive_text
logger = logging.getLogger(__name__)
@@ -40,10 +36,7 @@ SUMMARY_PREFIX = (
"into the summary below. This is a handoff from a previous context "
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message "
"they were already addressed. Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:"
)
@@ -64,174 +57,6 @@ _CHARS_PER_TOKEN = 4
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
"""Shrink long string values inside a tool-call arguments JSON blob while
preserving JSON validity.
The ``function.arguments`` field on a tool call is a JSON-encoded string
passed through to the LLM provider; downstream providers strictly
validate it and return a non-retryable 400 when it is not well-formed.
An earlier implementation sliced the raw JSON at a fixed byte offset and
appended ``...[truncated]`` — which routinely produced strings like::
{"path": "/foo/bar", "content": "# long markdown
...[truncated]
i.e. an unterminated string and a missing closing brace. MiniMax, for
example, rejects this with ``invalid function arguments json string``
and the session gets stuck re-sending the same broken history on every
turn. See issue #11762 for the observed loop.
This helper parses the arguments, shrinks long string leaves inside the
parsed structure, and re-serialises. Non-string values (paths, ints,
booleans) are preserved intact. If the arguments are not valid JSON
to begin with — some model backends use non-JSON tool arguments — the
original string is returned unchanged rather than replaced with
something neither we nor the backend can parse.
"""
try:
parsed = json.loads(args)
except (ValueError, TypeError):
return args
def _shrink(obj: Any) -> Any:
if isinstance(obj, str):
if len(obj) > head_chars:
return obj[:head_chars] + "...[truncated]"
return obj
if isinstance(obj, dict):
return {k: _shrink(v) for k, v in obj.items()}
if isinstance(obj, list):
return [_shrink(v) for v in obj]
return obj
shrunken = _shrink(parsed)
# ensure_ascii=False preserves CJK/emoji instead of bloating with \uXXXX
return json.dumps(shrunken, ensure_ascii=False)
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
"""Create an informative 1-line summary of a tool call + result.
Used during the pre-compression pruning pass to replace large tool
outputs with a short but useful description of what the tool did,
rather than a generic placeholder that carries zero information.
Returns strings like::
[terminal] ran `npm test` -> exit 0, 47 lines output
[read_file] read config.py from line 1 (1,200 chars)
[search_files] content search for 'compress' in agent/ -> 12 matches
"""
try:
args = json.loads(tool_args) if tool_args else {}
except (json.JSONDecodeError, TypeError):
args = {}
content = tool_content or ""
content_len = len(content)
line_count = content.count("\n") + 1 if content.strip() else 0
if tool_name == "terminal":
cmd = args.get("command", "")
if len(cmd) > 80:
cmd = cmd[:77] + "..."
exit_match = re.search(r'"exit_code"\s*:\s*(-?\d+)', content)
exit_code = exit_match.group(1) if exit_match else "?"
return f"[terminal] ran `{cmd}` -> exit {exit_code}, {line_count} lines output"
if tool_name == "read_file":
path = args.get("path", "?")
offset = args.get("offset", 1)
return f"[read_file] read {path} from line {offset} ({content_len:,} chars)"
if tool_name == "write_file":
path = args.get("path", "?")
written_lines = args.get("content", "").count("\n") + 1 if args.get("content") else "?"
return f"[write_file] wrote to {path} ({written_lines} lines)"
if tool_name == "search_files":
pattern = args.get("pattern", "?")
path = args.get("path", ".")
target = args.get("target", "content")
match_count = re.search(r'"total_count"\s*:\s*(\d+)', content)
count = match_count.group(1) if match_count else "?"
return f"[search_files] {target} search for '{pattern}' in {path} -> {count} matches"
if tool_name == "patch":
path = args.get("path", "?")
mode = args.get("mode", "replace")
return f"[patch] {mode} in {path} ({content_len:,} chars result)"
if tool_name in ("browser_navigate", "browser_click", "browser_snapshot",
"browser_type", "browser_scroll", "browser_vision"):
url = args.get("url", "")
ref = args.get("ref", "")
detail = f" {url}" if url else (f" ref={ref}" if ref else "")
return f"[{tool_name}]{detail} ({content_len:,} chars)"
if tool_name == "web_search":
query = args.get("query", "?")
return f"[web_search] query='{query}' ({content_len:,} chars result)"
if tool_name == "web_extract":
urls = args.get("urls", [])
url_desc = urls[0] if isinstance(urls, list) and urls else "?"
if isinstance(urls, list) and len(urls) > 1:
url_desc += f" (+{len(urls) - 1} more)"
return f"[web_extract] {url_desc} ({content_len:,} chars)"
if tool_name == "delegate_task":
goal = args.get("goal", "")
if len(goal) > 60:
goal = goal[:57] + "..."
return f"[delegate_task] '{goal}' ({content_len:,} chars result)"
if tool_name == "execute_code":
code_preview = (args.get("code") or "")[:60].replace("\n", " ")
if len(args.get("code", "")) > 60:
code_preview += "..."
return f"[execute_code] `{code_preview}` ({line_count} lines output)"
if tool_name in ("skill_view", "skills_list", "skill_manage"):
name = args.get("name", "?")
return f"[{tool_name}] name={name} ({content_len:,} chars)"
if tool_name == "vision_analyze":
question = args.get("question", "")[:50]
return f"[vision_analyze] '{question}' ({content_len:,} chars)"
if tool_name == "memory":
action = args.get("action", "?")
target = args.get("target", "?")
return f"[memory] {action} on {target}"
if tool_name == "todo":
return "[todo] updated task list"
if tool_name == "clarify":
return "[clarify] asked user a question"
if tool_name == "text_to_speech":
return f"[text_to_speech] generated audio ({content_len:,} chars)"
if tool_name == "cronjob":
action = args.get("action", "?")
return f"[cronjob] {action}"
if tool_name == "process":
action = args.get("action", "?")
sid = args.get("session_id", "?")
return f"[process] {action} session={sid}"
# Generic fallback
first_arg = ""
for k, v in list(args.items())[:2]:
sv = str(v)[:40]
first_arg += f" {k}={sv}"
return f"[{tool_name}]{first_arg} ({content_len:,} chars result)"
class ContextCompressor(ContextEngine):
"""Default context engine — compresses conversation context via lossy summarization.
@@ -253,8 +78,6 @@ class ContextCompressor(ContextEngine):
self._context_probed = False
self._context_probe_persistable = False
self._previous_summary = None
self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0
def update_model(
self,
@@ -344,9 +167,6 @@ class ContextCompressor(ContextEngine):
# Stores the previous compaction summary for iterative updates
self._previous_summary: Optional[str] = None
# Anti-thrashing: track whether last compression was effective
self._last_compression_savings_pct: float = 100.0
self._ineffective_compression_count: int = 0
self._summary_failure_cooldown_until: float = 0.0
def update_from_response(self, usage: Dict[str, Any]):
@@ -355,26 +175,9 @@ class ContextCompressor(ContextEngine):
self.last_completion_tokens = usage.get("completion_tokens", 0)
def should_compress(self, prompt_tokens: int = None) -> bool:
"""Check if context exceeds the compression threshold.
Includes anti-thrashing protection: if the last two compressions
each saved less than 10%, skip compression to avoid infinite loops
where each pass removes only 1-2 messages.
"""
"""Check if context exceeds the compression threshold."""
tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
if tokens < self.threshold_tokens:
return False
# Anti-thrashing: back off if recent compressions were ineffective
if self._ineffective_compression_count >= 2:
if not self.quiet_mode:
logger.warning(
"Compression skipped — last %d compressions saved <10%% each. "
"Consider /new to start a fresh session, or /compress <topic> "
"for focused compression.",
self._ineffective_compression_count,
)
return False
return True
return tokens >= self.threshold_tokens
# ------------------------------------------------------------------
# Tool output pruning (cheap pre-pass, no LLM call)
@@ -384,16 +187,7 @@ class ContextCompressor(ContextEngine):
self, messages: List[Dict[str, Any]], protect_tail_count: int,
protect_tail_tokens: int | None = None,
) -> tuple[List[Dict[str, Any]], int]:
"""Replace old tool result contents with informative 1-line summaries.
Instead of a generic placeholder, generates a summary like::
[terminal] ran `npm test` -> exit 0, 47 lines output
[read_file] read config.py from line 1 (3,400 chars)
Also deduplicates identical tool results (e.g. reading the same file
5x keeps only the newest full copy) and truncates large tool_call
arguments in assistant messages outside the protected tail.
"""Replace old tool result contents with a short placeholder.
Walks backward from the end, protecting the most recent messages that
fall within ``protect_tail_tokens`` (when provided) OR the last
@@ -409,22 +203,6 @@ class ContextCompressor(ContextEngine):
result = [m.copy() for m in messages]
pruned = 0
# Build index: tool_call_id -> (tool_name, arguments_json)
call_id_to_tool: Dict[str, tuple] = {}
for msg in result:
if msg.get("role") == "assistant":
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
cid = tc.get("id", "")
fn = tc.get("function", {})
call_id_to_tool[cid] = (fn.get("name", "unknown"), fn.get("arguments", ""))
else:
cid = getattr(tc, "id", "") or ""
fn = getattr(tc, "function", None)
name = getattr(fn, "name", "unknown") if fn else "unknown"
args_str = getattr(fn, "arguments", "") if fn else ""
call_id_to_tool[cid] = (name, args_str)
# Determine the prune boundary
if protect_tail_tokens is not None and protect_tail_tokens > 0:
# Token-budget approach: walk backward accumulating tokens
@@ -433,8 +211,7 @@ class ContextCompressor(ContextEngine):
min_protect = min(protect_tail_count, len(result) - 1)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
raw_content = msg.get("content") or ""
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
content_len = len(msg.get("content") or "")
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -449,76 +226,18 @@ class ContextCompressor(ContextEngine):
else:
prune_boundary = len(result) - protect_tail_count
# Pass 1: Deduplicate identical tool results.
# When the same file is read multiple times, keep only the most recent
# full copy and replace older duplicates with a back-reference.
content_hashes: dict = {} # hash -> (index, tool_call_id)
for i in range(len(result) - 1, -1, -1):
msg = result[i]
if msg.get("role") != "tool":
continue
content = msg.get("content") or ""
# Skip multimodal content (list of content blocks)
if isinstance(content, list):
continue
if len(content) < 200:
continue
h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12]
if h in content_hashes:
# This is an older duplicate — replace with back-reference
result[i] = {**msg, "content": "[Duplicate tool output — same content as a more recent call]"}
pruned += 1
else:
content_hashes[h] = (i, msg.get("tool_call_id", "?"))
# Pass 2: Replace old tool results with informative summaries
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "tool":
continue
content = msg.get("content", "")
# Skip multimodal content (list of content blocks)
if isinstance(content, list):
continue
if not content or content == _PRUNED_TOOL_PLACEHOLDER:
continue
# Skip already-deduplicated or previously-summarized results
if content.startswith("[Duplicate tool output"):
continue
# Only prune if the content is substantial (>200 chars)
if len(content) > 200:
call_id = msg.get("tool_call_id", "")
tool_name, tool_args = call_id_to_tool.get(call_id, ("unknown", ""))
summary = _summarize_tool_result(tool_name, tool_args, content)
result[i] = {**msg, "content": summary}
result[i] = {**msg, "content": _PRUNED_TOOL_PLACEHOLDER}
pruned += 1
# Pass 3: Truncate large tool_call arguments in assistant messages
# outside the protected tail. write_file with 50KB content, for
# example, survives pruning entirely without this.
#
# The shrinking is done inside the parsed JSON structure so the
# result remains valid JSON — otherwise downstream providers 400
# on every subsequent turn until the broken call falls out of
# the window. See ``_truncate_tool_call_args_json`` docstring.
for i in range(prune_boundary):
msg = result[i]
if msg.get("role") != "assistant" or not msg.get("tool_calls"):
continue
new_tcs = []
modified = False
for tc in msg["tool_calls"]:
if isinstance(tc, dict):
args = tc.get("function", {}).get("arguments", "")
if len(args) > 500:
new_args = _truncate_tool_call_args_json(args)
if new_args != args:
tc = {**tc, "function": {**tc["function"], "arguments": new_args}}
modified = True
new_tcs.append(tc)
if modified:
result[i] = {**msg, "tool_calls": new_tcs}
return result, pruned
# ------------------------------------------------------------------
@@ -551,15 +270,11 @@ class ContextCompressor(ContextEngine):
Includes tool call arguments and result content (up to
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
specific details like file paths, commands, and outputs.
All content is redacted before serialization to prevent secrets
(API keys, tokens, passwords) from leaking into the summary that
gets sent to the auxiliary model and persisted across compactions.
"""
parts = []
for msg in turns:
role = msg.get("role", "unknown")
content = redact_sensitive_text(msg.get("content") or "")
content = msg.get("content") or ""
# Tool results: keep enough content for the summarizer
if role == "tool":
@@ -580,7 +295,7 @@ class ContextCompressor(ContextEngine):
if isinstance(tc, dict):
fn = tc.get("function", {})
name = fn.get("name", "?")
args = redact_sensitive_text(fn.get("arguments", ""))
args = fn.get("arguments", "")
# Truncate long arguments but keep enough for context
if len(args) > self._TOOL_ARGS_MAX:
args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -638,55 +353,33 @@ class ContextCompressor(ContextEngine):
"assistant that continues the conversation. "
"Do NOT respond to any questions or requests in the conversation — "
"only output the structured summary. "
"Do NOT include any preamble, greeting, or prefix. "
"Write the summary in the same language the user was using in the "
"conversation — do not translate or switch to English. "
"NEVER include API keys, tokens, passwords, secrets, credentials, "
"or connection strings in the summary — replace any that appear "
"with [REDACTED]. Note that the user had credentials present, but "
"do not preserve their values."
"Do NOT include any preamble, greeting, or prefix."
)
# Shared structured template (used by both paths).
_template_sections = f"""## Active Task
[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
task assignment verbatim — the exact words they used. If multiple tasks
were requested and only some are done, list only the ones NOT yet completed.
The next assistant must pick up exactly here. Example:
"User asked: 'Now refactor the auth module to use JWT instead of sessions'"
If no outstanding task exists, write "None."]
## Goal
[What the user is trying to accomplish overall]
# Key changes vs v1:
# - "Pending User Asks" section (from Claude Code) explicitly tracks
# unanswered questions so the model knows what's resolved vs open
# - "Remaining Work" replaces "Next Steps" to avoid reading as active
# instructions
# - "Resolved Questions" makes it clear which questions were already
# answered (prevents model from re-answering them)
_template_sections = f"""## Goal
[What the user is trying to accomplish]
## Constraints & Preferences
[User preferences, coding style, constraints, important decisions]
## Completed Actions
[Numbered list of concrete actions taken — include tool used, target, and outcome.
Format each as: N. ACTION target — outcome [tool: name]
Example:
1. READ config.py:45 — found `==` should be `!=` [tool: read_file]
2. PATCH config.py:45 — changed `==` to `!=` [tool: patch]
3. TEST `pytest tests/` — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
Be specific with file paths, commands, line numbers, and results.]
## Active State
[Current working state — include:
- Working directory and branch (if applicable)
- Modified/created files with brief note on each
- Test status (X/Y passing)
- Any running processes or servers
- Environment details that matter]
## In Progress
[Work currently underway — what was being done when compaction fired]
## Blocked
[Any blockers, errors, or issues not yet resolved. Include exact error messages.]
## Progress
### Done
[Completed work — include specific file paths, commands run, results obtained]
### In Progress
[Work currently underway]
### Blocked
[Any blockers or issues encountered]
## Key Decisions
[Important technical decisions and WHY they were made]
[Important technical decisions and why they were made]
## Resolved Questions
[Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
@@ -701,9 +394,12 @@ Be specific with file paths, commands, line numbers, and results.]
[What remains to be done — framed as context, not instructions]
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
## Tools & Patterns
[Which tools were used, how they were used effectively, and any tool-specific discoveries]
Target ~{summary_budget} tokens. Be specific — include file paths, command outputs, error messages, and concrete values rather than vague descriptions.
Write only the summary body. Do not include any preamble or prefix."""
@@ -719,7 +415,7 @@ PREVIOUS SUMMARY:
NEW TURNS TO INCORPORATE:
{content_to_summarize}
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity.
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new progress. Move items from "In Progress" to "Done" when completed. Move answered questions to "Resolved Questions". Remove information only if it is clearly obsolete.
{_template_sections}"""
else:
@@ -741,7 +437,7 @@ Use this exact structure:
prompt += f"""
FOCUS TOPIC: "{focus_topic}"
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
try:
call_kwargs = {
@@ -754,7 +450,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
"api_mode": self.api_mode,
},
"messages": [{"role": "user", "content": prompt}],
"max_tokens": int(summary_budget * 1.3),
"max_tokens": summary_budget * 2,
# timeout resolved from auxiliary.compression.timeout config by call_llm
}
if self.summary_model:
@@ -764,16 +460,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Handle cases where content is not a string (e.g., dict from llama.cpp)
if not isinstance(content, str):
content = str(content) if content else ""
# Redact the summary output as well — the summarizer LLM may
# ignore prompt instructions and echo back secrets verbatim.
summary = redact_sensitive_text(content.strip())
summary = content.strip()
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0
self._summary_model_fallen_back = False
return self._with_summary_prefix(summary)
except RuntimeError:
# No provider configured — long cooldown, unlikely to self-resolve
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary "
@@ -781,42 +473,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
_SUMMARY_FAILURE_COOLDOWN_SECONDS)
return None
except Exception as e:
# If the summary model is different from the main model and the
# error looks permanent (model not found, 503, 404), fall back to
# using the main model instead of entering cooldown that leaves
# context growing unbounded. (#8620 sub-issue 4)
_status = getattr(e, "status_code", None) or getattr(getattr(e, "response", None), "status_code", None)
_err_str = str(e).lower()
_is_model_not_found = (
_status in (404, 503)
or "model_not_found" in _err_str
or "does not exist" in _err_str
or "no available channel" in _err_str
)
if (
_is_model_not_found
and self.summary_model
and self.summary_model != self.model
and not getattr(self, "_summary_model_fallen_back", False)
):
self._summary_model_fallen_back = True
logging.warning(
"Summary model '%s' not available (%s). "
"Falling back to main model '%s' for compression.",
self.summary_model, e, self.model,
)
self.summary_model = "" # empty = use main model
self._summary_failure_cooldown_until = 0.0 # no cooldown
return self._generate_summary(turns_to_summarize) # retry immediately
# Transient errors (timeout, rate limit, network) — shorter cooldown
_transient_cooldown = 60
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
logging.warning(
"Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.",
e,
_transient_cooldown,
_SUMMARY_FAILURE_COOLDOWN_SECONDS,
)
return None
@@ -939,62 +601,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Tail protection by token budget
# ------------------------------------------------------------------
def _find_last_user_message_idx(
self, messages: List[Dict[str, Any]], head_end: int
) -> int:
"""Return the index of the last user-role message at or after *head_end*, or -1."""
for i in range(len(messages) - 1, head_end - 1, -1):
if messages[i].get("role") == "user":
return i
return -1
def _ensure_last_user_message_in_tail(
self,
messages: List[Dict[str, Any]],
cut_idx: int,
head_end: int,
) -> int:
"""Guarantee the most recent user message is in the protected tail.
Context compressor bug (#10896): ``_align_boundary_backward`` can pull
``cut_idx`` past a user message when it tries to keep tool_call/result
groups together. If the last user message ends up in the *compressed*
middle region the LLM summariser writes it into "Pending User Asks",
but ``SUMMARY_PREFIX`` tells the next model to respond only to user
messages *after* the summary — so the task effectively disappears from
the active context, causing the agent to stall, repeat completed work,
or silently drop the user's latest request.
Fix: if the last user-role message is not already in the tail
(``messages[cut_idx:]``), walk ``cut_idx`` back to include it. We
then re-align backward one more time to avoid splitting any
tool_call/result group that immediately precedes the user message.
"""
last_user_idx = self._find_last_user_message_idx(messages, head_end)
if last_user_idx < 0:
# No user message found beyond head — nothing to anchor.
return cut_idx
if last_user_idx >= cut_idx:
# Already in the tail; nothing to do.
return cut_idx
# The last user message is in the middle (compressed) region.
# Pull cut_idx back to it directly — a user message is already a
# clean boundary (no tool_call/result splitting risk), so there is no
# need to call _align_boundary_backward here; doing so would
# unnecessarily pull the cut further back into the preceding
# assistant + tool_calls group.
if not self.quiet_mode:
logger.debug(
"Anchoring tail cut to last user message at index %d "
"(was %d) to prevent active-task loss after compression",
last_user_idx,
cut_idx,
)
# Safety: never go back into the head region.
return max(last_user_idx, head_end + 1)
def _find_tail_cut_by_tokens(
self, messages: List[Dict[str, Any]], head_end: int,
token_budget: int | None = None,
@@ -1012,8 +618,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
read, etc.). If even the minimum 3 messages exceed 1.5x the budget
the cut is placed right after the head so compression still runs.
Never cuts inside a tool_call/result group. Always ensures the most
recent user message is in the tail (see ``_ensure_last_user_message_in_tail``).
Never cuts inside a tool_call/result group.
"""
if token_budget is None:
token_budget = self.tail_token_budget
@@ -1052,10 +657,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Align to avoid splitting tool groups
cut_idx = self._align_boundary_backward(messages, cut_idx)
# Ensure the most recent user message is always in the tail so the
# active task is never lost to compression (fixes #10896).
cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)
return max(cut_idx, head_end + 1)
# ------------------------------------------------------------------
@@ -1143,11 +744,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = []
for i in range(compress_start):
msg = messages[i].copy()
if i == 0 and msg.get("role") == "system":
existing = msg.get("content") or ""
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
if _compression_note not in existing:
msg["content"] = existing + "\n\n" + _compression_note
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
msg["content"] = (
(msg.get("content") or "")
+ "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
)
compressed.append(msg)
# If LLM summary failed, insert a static fallback so the model
@@ -1205,24 +806,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = self._sanitize_tool_pairs(compressed)
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
# Anti-thrashing: track compression effectiveness
savings_pct = (saved_estimate / display_tokens * 100) if display_tokens > 0 else 0
self._last_compression_savings_pct = savings_pct
if savings_pct < 10:
self._ineffective_compression_count += 1
else:
self._ineffective_compression_count = 0
if not self.quiet_mode:
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate
logger.info(
"Compressed: %d -> %d messages (~%d tokens saved, %.0f%%)",
"Compressed: %d -> %d messages (~%d tokens saved)",
n_messages,
len(compressed),
saved_estimate,
savings_pct,
)
logger.info("Compression #%d complete", self.compression_count)
+3 -1
View File
@@ -483,7 +483,9 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
text=True,
timeout=10,
)
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
except FileNotFoundError:
return None
except subprocess.TimeoutExpired:
return None
if result.returncode != 0:
return None
+10 -44
View File
@@ -21,9 +21,6 @@ from pathlib import Path
from types import SimpleNamespace
from typing import Any
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
@@ -57,18 +54,6 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
}
def _permission_denied(message_id: Any) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "cancelled",
}
},
}
def _format_messages_as_prompt(
messages: list[dict[str, Any]],
model: str | None = None,
@@ -328,25 +313,9 @@ class CopilotACPClient:
tools=tools,
tool_choice=tool_choice,
)
# Normalise timeout: run_agent.py may pass an httpx.Timeout object
# (used natively by the OpenAI SDK) rather than a plain float.
if timeout is None:
_effective_timeout = _DEFAULT_TIMEOUT_SECONDS
elif isinstance(timeout, (int, float)):
_effective_timeout = float(timeout)
else:
# httpx.Timeout or similar — pick the largest component so the
# subprocess has enough wall-clock time for the full response.
_candidates = [
getattr(timeout, attr, None)
for attr in ("read", "write", "connect", "pool", "timeout")
]
_numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
_effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
response_text, reasoning_text = self._run_prompt(
prompt_text,
timeout_seconds=_effective_timeout,
timeout_seconds=float(timeout or _DEFAULT_TIMEOUT_SECONDS),
)
tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
@@ -401,8 +370,6 @@ class CopilotACPClient:
stderr_tail: deque[str] = deque(maxlen=40)
def _stdout_reader() -> None:
if proc.stdout is None:
return
for line in proc.stdout:
try:
inbox.put(json.loads(line))
@@ -550,13 +517,18 @@ class CopilotACPClient:
params = msg.get("params") or {}
if method == "session/request_permission":
response = _permission_denied(message_id)
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "allow_once",
}
},
}
elif method == "fs/read_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
block_error = get_read_block_error(str(path))
if block_error:
raise PermissionError(block_error)
content = path.read_text() if path.exists() else ""
line = params.get("line")
limit = params.get("limit")
@@ -565,8 +537,6 @@ class CopilotACPClient:
start = line - 1
end = start + limit if isinstance(limit, int) and limit > 0 else None
content = "".join(lines[start:end])
if content:
content = redact_sensitive_text(content)
response = {
"jsonrpc": "2.0",
"id": message_id,
@@ -579,10 +549,6 @@ class CopilotACPClient:
elif method == "fs/write_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
if is_write_denied(str(path)):
raise PermissionError(
f"Write denied: '{path}' is a protected system/credential file."
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(params.get("content") or ""))
response = {
+180 -112
View File
@@ -22,6 +22,8 @@ from hermes_cli.auth import (
_auth_store_lock,
_codex_access_token_is_expiring,
_decode_jwt_claims,
_import_codex_cli_tokens,
_write_codex_cli_tokens,
_load_auth_store,
_load_provider_state,
_resolve_kimi_base_url,
@@ -455,6 +457,39 @@ class CredentialPool:
logger.debug("Failed to sync from credentials file: %s", exc)
return entry
def _sync_codex_entry_from_cli(self, entry: PooledCredential) -> PooledCredential:
"""Sync an openai-codex pool entry from ~/.codex/auth.json if tokens differ.
OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
When the Codex CLI (or another Hermes profile) refreshes its token,
the pool entry's refresh_token becomes stale. This method detects that
by comparing against ~/.codex/auth.json and syncing the fresh pair.
"""
if self.provider != "openai-codex":
return entry
try:
cli_tokens = _import_codex_cli_tokens()
if not cli_tokens:
return entry
cli_refresh = cli_tokens.get("refresh_token", "")
cli_access = cli_tokens.get("access_token", "")
if cli_refresh and cli_refresh != entry.refresh_token:
logger.debug("Pool entry %s: syncing tokens from ~/.codex/auth.json (refresh token changed)", entry.id)
updated = replace(
entry,
access_token=cli_access,
refresh_token=cli_refresh,
last_status=None,
last_status_at=None,
last_error_code=None,
)
self._replace_entry(entry, updated)
self._persist()
return updated
except Exception as exc:
logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
return entry
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
"""Write refreshed pool entry tokens back to auth.json providers.
@@ -550,6 +585,13 @@ class CredentialPool:
except Exception as wexc:
logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
elif self.provider == "openai-codex":
# Proactively sync from ~/.codex/auth.json before refresh.
# The Codex CLI (or another Hermes profile) may have already
# consumed our refresh_token. Syncing first avoids a
# "refresh_token_reused" error when the CLI has a newer pair.
synced = self._sync_codex_entry_from_cli(entry)
if synced is not entry:
entry = synced
refreshed = auth_mod.refresh_codex_oauth_pure(
entry.access_token,
entry.refresh_token,
@@ -635,6 +677,45 @@ class CredentialPool:
# Credentials file had a valid (non-expired) token — use it directly
logger.debug("Credentials file has valid token, using without refresh")
return synced
# For openai-codex: the refresh_token may have been consumed by
# the Codex CLI between our proactive sync and the refresh call.
# Re-sync and retry once.
if self.provider == "openai-codex":
synced = self._sync_codex_entry_from_cli(entry)
if synced.refresh_token != entry.refresh_token:
logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json")
try:
refreshed = auth_mod.refresh_codex_oauth_pure(
synced.access_token,
synced.refresh_token,
)
updated = replace(
synced,
access_token=refreshed["access_token"],
refresh_token=refreshed["refresh_token"],
last_refresh=refreshed.get("last_refresh"),
last_status=STATUS_OK,
last_status_at=None,
last_error_code=None,
)
self._replace_entry(synced, updated)
self._persist()
self._sync_device_code_entry_to_auth_store(updated)
try:
_write_codex_cli_tokens(
updated.access_token,
updated.refresh_token,
last_refresh=updated.last_refresh,
)
except Exception as wexc:
logger.debug("Failed to write refreshed Codex tokens to CLI file (retry): %s", wexc)
return updated
except Exception as retry_exc:
logger.debug("Codex retry refresh also failed: %s", retry_exc)
elif not self._entry_needs_refresh(synced):
logger.debug("Codex CLI has valid token, using without refresh")
self._sync_device_code_entry_to_auth_store(synced)
return synced
self._mark_exhausted(entry, None)
return None
@@ -653,6 +734,17 @@ class CredentialPool:
# _seed_from_singletons() on the next load_pool() sees fresh state
# instead of re-seeding stale/consumed tokens.
self._sync_device_code_entry_to_auth_store(updated)
# Write refreshed tokens back to ~/.codex/auth.json so Codex CLI
# and VS Code don't hit "refresh_token_reused" on their next refresh.
if self.provider == "openai-codex":
try:
_write_codex_cli_tokens(
updated.access_token,
updated.refresh_token,
last_refresh=updated.last_refresh,
)
except Exception as wexc:
logger.debug("Failed to write refreshed Codex tokens to CLI file: %s", wexc)
return updated
def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
@@ -698,6 +790,16 @@ class CredentialPool:
if synced is not entry:
entry = synced
cleared_any = True
# For openai-codex entries, sync from ~/.codex/auth.json before
# any status/refresh checks. This picks up tokens refreshed by
# the Codex CLI or another Hermes profile.
if (self.provider == "openai-codex"
and entry.last_status == STATUS_EXHAUSTED
and entry.refresh_token):
synced = self._sync_codex_entry_from_cli(entry)
if synced is not entry:
entry = synced
cleared_any = True
if entry.last_status == STATUS_EXHAUSTED:
exhausted_until = _exhausted_until(entry)
if exhausted_until is not None and now < exhausted_until:
@@ -983,14 +1085,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
active_sources: Set[str] = set()
auth_store = _load_auth_store()
# Shared suppression gate — used at every upsert site so
# `hermes auth remove <provider> <N>` is stable across all source types.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "anthropic":
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
# when the user has explicitly configured anthropic as their provider.
@@ -1010,8 +1104,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
("claude_code", read_claude_code_credentials()),
):
if creds and creds.get("accessToken"):
if _is_suppressed(provider, source_name):
continue
# Check if user explicitly removed this source
try:
from hermes_cli.auth import is_source_suppressed
if is_source_suppressed(provider, source_name):
continue
except ImportError:
pass
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
@@ -1029,16 +1128,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
elif provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state and not _is_suppressed(provider, "device_code"):
if state:
active_sources.add("device_code")
# Prefer a user-supplied label embedded in the singleton state
# (set by persist_nous_credentials(label=...) when the user ran
# `hermes auth add nous --label <name>`). Fall back to the
# auto-derived token fingerprint for logins that didn't supply one.
custom_label = str(state.get("label") or "").strip()
seeded_label = custom_label or label_from_token(
state.get("access_token", ""), "device_code"
)
changed |= _upsert_entry(
entries,
provider,
@@ -1057,7 +1148,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
"agent_key": state.get("agent_key"),
"agent_key_expires_at": state.get("agent_key_expires_at"),
"tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
"label": seeded_label,
"label": label_from_token(state.get("access_token", ""), "device_code"),
},
)
@@ -1070,21 +1161,18 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token, source = resolve_copilot_token()
if token:
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"label": source,
},
)
except Exception as exc:
logger.debug("Copilot token seed failed: %s", exc)
@@ -1100,40 +1188,43 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token = creds.get("api_key", "")
if token:
source_name = creds.get("source", "qwen-cli")
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
except Exception as exc:
logger.debug("Qwen OAuth token seed failed: %s", exc)
elif provider == "openai-codex":
# Respect user suppression — `hermes auth remove openai-codex` marks
# the device_code source as suppressed so it won't be re-seeded from
# the Hermes auth store. Without this gate the removal is instantly
# undone on the next load_pool() call.
if _is_suppressed(provider, "device_code"):
return changed, active_sources
state = _load_provider_state(auth_store, "openai-codex")
tokens = state.get("tokens") if isinstance(state, dict) else None
# Hermes owns its own Codex auth state — we do NOT auto-import from
# ~/.codex/auth.json at pool-load time. OAuth refresh tokens are
# single-use, so sharing them with Codex CLI / VS Code causes
# refresh_token_reused race failures. Users who want to adopt
# existing Codex CLI credentials get a one-time, explicit prompt
# via `hermes auth openai-codex`.
# Fallback: import from Codex CLI (~/.codex/auth.json) if Hermes auth
# store has no tokens. This mirrors resolve_codex_runtime_credentials()
# so that load_pool() and list_authenticated_providers() detect tokens
# that only exist in the Codex CLI shared file.
if not (isinstance(tokens, dict) and tokens.get("access_token")):
try:
from hermes_cli.auth import _import_codex_cli_tokens, _save_codex_tokens
cli_tokens = _import_codex_cli_tokens()
if cli_tokens:
logger.info("Importing Codex CLI tokens into Hermes auth store.")
_save_codex_tokens(cli_tokens)
# Re-read state after import
auth_store = _load_auth_store()
state = _load_provider_state(auth_store, "openai-codex")
tokens = state.get("tokens") if isinstance(state, dict) else None
except Exception as exc:
logger.debug("Codex CLI token import failed: %s", exc)
if isinstance(tokens, dict) and tokens.get("access_token"):
active_sources.add("device_code")
changed |= _upsert_entry(
@@ -1157,22 +1248,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
changed = False
active_sources: Set[str] = set()
# Honour user suppression — `hermes auth remove <provider> <N>` for an
# env-seeded credential marks the env:<VAR> source as suppressed so it
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
# Without this gate the removal is silently undone on the next
# load_pool() call whenever the var is still exported by the shell.
try:
from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
except ImportError:
def _is_source_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "openrouter":
token = os.getenv("OPENROUTER_API_KEY", "").strip()
if token:
source = "env:OPENROUTER_API_KEY"
if _is_source_suppressed(provider, source):
return changed, active_sources
active_sources.add(source)
changed |= _upsert_entry(
entries,
@@ -1209,8 +1288,6 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
if not token:
continue
source = f"env:{env_var}"
if _is_source_suppressed(provider, source):
continue
active_sources.add(source)
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
base_url = env_url or pconfig.inference_base_url
@@ -1255,13 +1332,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
changed = False
active_sources: Set[str] = set()
# Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
# Seed from the custom_providers config entry's api_key field
cp_config = _get_custom_provider_config(pool_key)
if cp_config:
@@ -1270,20 +1340,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
name = str(cp_config.get("name") or "").strip()
if api_key:
source = f"config:{name}"
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
# Seed from model.api_key if model.provider=='custom' and model.base_url matches
try:
@@ -1303,20 +1372,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
matched_key = get_custom_provider_pool_key(model_base_url)
if matched_key == pool_key:
source = "model_config"
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
except Exception:
pass
-401
View File
@@ -1,401 +0,0 @@
"""Unified removal contract for every credential source Hermes reads from.
Hermes seeds its credential pool from many places:
env:<VAR> os.environ / ~/.hermes/.env
claude_code ~/.claude/.credentials.json
hermes_pkce ~/.hermes/.anthropic_oauth.json
device_code auth.json providers.<provider> (nous, openai-codex, ...)
qwen-cli ~/.qwen/oauth_creds.json
gh_cli gh auth token
config:<name> custom_providers config entry
model_config model.api_key when model.provider == "custom"
manual user ran `hermes auth add`
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
(which keep their existing shape we haven't restructured them). What we
unify here is **removal**:
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
Before this module, every source had an ad-hoc removal branch in
``auth_remove_command``, and several sources had no branch at all so
``auth remove`` silently reverted on the next ``load_pool()`` call for
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
custom-config sources.
Now every source registers a ``RemovalStep`` that does exactly three things
in the same shape:
1. Clean up whatever externally-readable state the source reads from
(.env line, auth.json block, OAuth file, etc.)
2. Suppress the ``(provider, source_id)`` in auth.json so the
corresponding ``_seed_from_*`` branch skips the upsert on re-load
3. Return ``RemovalResult`` describing what was cleaned and any
diagnostic hints the user should see (shell-exported env vars,
external credential files we deliberately don't delete, etc.)
Adding a new credential source is:
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
- gate that reader behind ``is_source_suppressed(provider, source_id)``
- register a ``RemovalStep`` here
No more per-source if/elif chain in ``auth_remove_command``.
"""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, List, Optional
@dataclass
class RemovalResult:
"""Outcome of removing a credential source.
Attributes:
cleaned: Short strings describing external state that was actually
mutated (``"Cleared XAI_API_KEY from .env"``,
``"Cleared openai-codex OAuth tokens from auth store"``).
Printed as plain lines to the user.
hints: Diagnostic lines ABOUT state the user may need to clean up
themselves or is deliberately left intact (shell-exported env
var, Claude Code credential file we don't delete, etc.).
Printed as plain lines to the user. Always non-destructive.
suppress: Whether to call ``suppress_credential_source`` after
cleanup so future ``load_pool`` calls skip this source.
Default True almost every source needs this to stay sticky.
The only legitimate False is ``manual`` entries, which aren't
seeded from anywhere external.
"""
cleaned: List[str] = field(default_factory=list)
hints: List[str] = field(default_factory=list)
suppress: bool = True
@dataclass
class RemovalStep:
"""How to remove one specific credential source cleanly.
Attributes:
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
Special value ``"*"`` means "matches any provider" used for
sources like ``manual`` that aren't provider-specific.
source_id: Source identifier as it appears in
``PooledCredential.source``. May be a literal (``"claude_code"``)
or a prefix pattern matched via ``match_fn``.
match_fn: Optional predicate overriding literal ``source_id``
matching. Gets the removed entry's source string. Used for
``env:*`` (any env-seeded key), ``config:*`` (any custom
pool), and ``manual:*`` (any manual-source variant).
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
actual cleanup and returns what happened for the user.
description: One-line human-readable description for docs / tests.
"""
provider: str
source_id: str
remove_fn: Callable[..., RemovalResult]
match_fn: Optional[Callable[[str], bool]] = None
description: str = ""
def matches(self, provider: str, source: str) -> bool:
if self.provider != "*" and self.provider != provider:
return False
if self.match_fn is not None:
return self.match_fn(source)
return source == self.source_id
_REGISTRY: List[RemovalStep] = []
def register(step: RemovalStep) -> RemovalStep:
_REGISTRY.append(step)
return step
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
"""Return the first matching RemovalStep, or None if unregistered.
Unregistered sources fall through to the default remove path in
``auth_remove_command``: the pool entry is already gone (that happens
before dispatch), no external cleanup, no suppression. This is the
correct behaviour for ``manual`` entries they were only ever stored
in the pool, nothing external to clean up.
"""
for step in _REGISTRY:
if step.matches(provider, source):
return step
return None
# ---------------------------------------------------------------------------
# Individual RemovalStep implementations — one per source.
# ---------------------------------------------------------------------------
# Each remove_fn is intentionally small and single-purpose. Adding a new
# credential source means adding ONE entry here — no other changes to
# auth_remove_command.
def _remove_env_source(provider: str, removed) -> RemovalResult:
"""env:<VAR> — the most common case.
Handles three user situations:
1. Var lives only in ~/.hermes/.env clear it
2. Var lives only in the user's shell (shell profile, systemd
EnvironmentFile, launchd plist) hint them where to unset it
3. Var lives in both clear from .env, hint about shell
"""
from hermes_cli.config import get_env_path, remove_env_value
result = RemovalResult()
env_var = removed.source[len("env:"):]
if not env_var:
return result
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
env_in_process = bool(os.getenv(env_var))
env_in_dotenv = False
try:
env_path = get_env_path()
if env_path.exists():
env_in_dotenv = any(
line.strip().startswith(f"{env_var}=")
for line in env_path.read_text(errors="replace").splitlines()
)
except OSError:
pass
shell_exported = env_in_process and not env_in_dotenv
cleared = remove_env_value(env_var)
if cleared:
result.cleaned.append(f"Cleared {env_var} from .env")
if shell_exported:
result.hints.extend([
f"Note: {env_var} is still set in your shell environment "
f"(not in ~/.hermes/.env).",
" Unset it there (shell profile, systemd EnvironmentFile, "
"launchd plist, etc.) or it will keep being visible to Hermes.",
f" The pool entry is now suppressed — Hermes will ignore "
f"{env_var} until you run `hermes auth add {provider}`.",
])
else:
result.hints.append(
f"Suppressed env:{env_var} — it will not be re-seeded even "
f"if the variable is re-exported later."
)
return result
def _remove_claude_code(provider: str, removed) -> RemovalResult:
"""~/.claude/.credentials.json is owned by Claude Code itself.
We don't delete it — the user's Claude Code install still needs to
work. We just suppress it so Hermes stops reading it.
"""
return RemovalResult(hints=[
"Suppressed claude_code credential — it will not be re-seeded.",
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
"Run `hermes auth add anthropic` to re-enable if needed.",
])
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
from hermes_constants import get_hermes_home
result = RemovalResult()
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
try:
oauth_file.unlink()
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
except OSError as exc:
result.hints.append(f"Could not delete {oauth_file}: {exc}")
return result
def _clear_auth_store_provider(provider: str) -> bool:
"""Delete auth_store.providers[provider]. Returns True if deleted."""
from hermes_cli.auth import (
_auth_store_lock,
_load_auth_store,
_save_auth_store,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
return True
return False
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
We suppress in addition to clearing because nothing else stops the
user's next `hermes login` run from writing providers.nous again
before they decide to. Suppression forces them to go through
`hermes auth add nous` to re-engage, which is the documented re-add
path and clears the suppression atomically.
"""
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
return result
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
refresh_codex_oauth_pure() writes both every time, so clearing only
the Hermes auth store is not enough _seed_from_singletons() would
re-import from ~/.codex/auth.json on the next load_pool() call and
the removal would be instantly undone. We suppress instead of
deleting Codex CLI's file, so the Codex CLI itself keeps working.
The canonical source name in ``_seed_from_singletons`` is
``"device_code"`` (no prefix). Entries may show up in the pool as
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
via ``hermes auth add openai-codex``), but in both cases the re-seed
gate lives at the ``"device_code"`` suppression key. We suppress
that canonical key here; the central dispatcher also suppresses
``removed.source`` which is fine belt-and-suspenders, idempotent.
"""
from hermes_cli.auth import suppress_credential_source
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
# Suppress the canonical re-seed source, not just whatever source the
# removed entry had. Otherwise `manual:device_code` removals wouldn't
# block the `device_code` re-seed path.
suppress_credential_source(provider, "device_code")
result.hints.extend([
"Suppressed openai-codex device_code source — it will not be re-seeded.",
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
"Run `hermes auth add openai-codex` to re-enable if needed.",
])
return result
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
Same pattern as claude_code suppress, don't delete. The user's
Qwen CLI install still reads from that file.
"""
return RemovalResult(hints=[
"Suppressed qwen-cli credential — it will not be re-seeded.",
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
])
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
Copilot is special: the same token can be seeded as multiple source
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
``_seed_from_env``), so removing one entry without suppressing the
others lets the duplicates resurrect. We suppress ALL known copilot
sources here so removal is stable regardless of which entry the
user clicked.
We don't touch the user's gh CLI or shell state just suppress so
Hermes stops picking the token up.
"""
# Suppress ALL copilot source variants up-front so no path resurrects
# the pool entry. The central dispatcher in auth_remove_command will
# ALSO suppress removed.source, but it's idempotent so double-calling
# is harmless.
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "gh_cli")
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
suppress_credential_source(provider, f"env:{env_var}")
return RemovalResult(hints=[
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
"Note: Your gh CLI / shell environment is unchanged.",
"Run `hermes auth add copilot` to re-enable if needed.",
])
def _remove_custom_config(provider: str, removed) -> RemovalResult:
"""Custom provider pools are seeded from custom_providers config or
model.api_key. Both are in config.yaml modifying that from here
is more invasive than suppression. We suppress; the user can edit
config.yaml if they want to remove the key from disk entirely.
"""
source_label = removed.source
return RemovalResult(hints=[
f"Suppressed {source_label} — it will not be re-seeded.",
"Note: The underlying value in config.yaml is unchanged. Edit it "
"directly if you want to remove the credential from disk.",
])
def _register_all_sources() -> None:
"""Called once on module import.
ORDER MATTERS ``find_removal_step`` returns the first match. Put
provider-specific steps before the generic ``env:*`` step so that e.g.
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
doesn't touch the user's shell), not the generic env-var removal
(which would try to clear .env).
"""
register(RemovalStep(
provider="copilot", source_id="gh_cli",
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
remove_fn=_remove_copilot_gh,
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
))
register(RemovalStep(
provider="*", source_id="env:",
match_fn=lambda src: src.startswith("env:"),
remove_fn=_remove_env_source,
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
))
register(RemovalStep(
provider="anthropic", source_id="claude_code",
remove_fn=_remove_claude_code,
description="~/.claude/.credentials.json",
))
register(RemovalStep(
provider="anthropic", source_id="hermes_pkce",
remove_fn=_remove_hermes_pkce,
description="~/.hermes/.anthropic_oauth.json",
))
register(RemovalStep(
provider="nous", source_id="device_code",
remove_fn=_remove_nous_device_code,
description="auth.json providers.nous",
))
register(RemovalStep(
provider="openai-codex", source_id="device_code",
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
remove_fn=_remove_codex_device_code,
description="auth.json providers.openai-codex + ~/.codex/auth.json",
))
register(RemovalStep(
provider="qwen-oauth", source_id="qwen-cli",
remove_fn=_remove_qwen_cli,
description="~/.qwen/oauth_creds.json",
))
register(RemovalStep(
provider="*", source_id="config:",
match_fn=lambda src: src.startswith("config:") or src == "model_config",
remove_fn=_remove_custom_config,
description="Custom provider config.yaml api_key field",
))
_register_all_sources()
+84 -49
View File
@@ -225,11 +225,9 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
content = _oneline(args.get("content", ""))
return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
elif action == "replace":
old = _oneline(args.get("old_text") or "") or "<missing old_text>"
return f"~{target}: \"{old[:20]}\""
return f"~{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
elif action == "remove":
old = _oneline(args.get("old_text") or "") or "<missing old_text>"
return f"-{target}: \"{old[:20]}\""
return f"-{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
return action
if tool_name == "send_message":
@@ -602,45 +600,6 @@ class KawaiiSpinner:
"analyzing", "computing", "synthesizing", "formulating", "brainstorming",
]
@classmethod
def get_waiting_faces(cls) -> list:
"""Return waiting faces from the active skin, falling back to KAWAII_WAITING."""
try:
skin = _get_skin()
if skin:
faces = skin.spinner.get("waiting_faces", [])
if faces:
return faces
except Exception:
pass
return cls.KAWAII_WAITING
@classmethod
def get_thinking_faces(cls) -> list:
"""Return thinking faces from the active skin, falling back to KAWAII_THINKING."""
try:
skin = _get_skin()
if skin:
faces = skin.spinner.get("thinking_faces", [])
if faces:
return faces
except Exception:
pass
return cls.KAWAII_THINKING
@classmethod
def get_thinking_verbs(cls) -> list:
"""Return thinking verbs from the active skin, falling back to THINKING_VERBS."""
try:
skin = _get_skin()
if skin:
verbs = skin.spinner.get("thinking_verbs", [])
if verbs:
return verbs
except Exception:
pass
return cls.THINKING_VERBS
def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
self.message = message
self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
@@ -941,13 +900,9 @@ def get_cute_tool_message(
if action == "add":
return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
elif action == "replace":
old = args.get("old_text") or ""
old = old if old else "<missing old_text>"
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(old, 20)}\" {dur}")
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
elif action == "remove":
old = args.get("old_text") or ""
old = old if old else "<missing old_text>"
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(old, 20)}\" {dur}")
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
return _wrap(f"┊ 🧠 memory {action} {dur}")
if tool_name == "skills_list":
return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
@@ -999,4 +954,84 @@ def get_cute_tool_message(
# Honcho session line (one-liner with clickable OSC 8 hyperlink)
# =========================================================================
_DIM = "\033[2m"
_SKY_BLUE = "\033[38;5;117m"
_ANSI_RESET = "\033[0m"
# =========================================================================
# Context pressure display (CLI user-facing warnings)
# =========================================================================
# ANSI color codes for context pressure tiers
_CYAN = "\033[36m"
_YELLOW = "\033[33m"
_BOLD = "\033[1m"
_DIM_ANSI = "\033[2m"
# Bar characters
_BAR_FILLED = ""
_BAR_EMPTY = ""
_BAR_WIDTH = 20
def format_context_pressure(
compaction_progress: float,
threshold_tokens: int,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a formatted context pressure line for CLI display.
The bar and percentage show progress toward the compaction threshold,
NOT the raw context window. 100% = compaction fires.
Args:
compaction_progress: How close to compaction (0.01.0, 1.0 = fires).
threshold_tokens: Compaction threshold in tokens.
threshold_percent: Compaction threshold as a fraction of context window.
compression_enabled: Whether auto-compression is active.
"""
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
threshold_pct_int = int(threshold_percent * 100)
color = f"{_BOLD}{_YELLOW}"
icon = ""
if compression_enabled:
hint = "compaction approaching"
else:
hint = "no auto-compaction"
return (
f" {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
f" {_DIM_ANSI}{threshold_k} threshold ({threshold_pct_int}%) · {hint}{_ANSI_RESET}"
)
def format_context_pressure_gateway(
compaction_progress: float,
threshold_percent: float,
compression_enabled: bool = True,
) -> str:
"""Build a plain-text context pressure notification for messaging platforms.
No ANSI just Unicode and plain text suitable for Telegram/Discord/etc.
The percentage shows progress toward the compaction threshold.
"""
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
threshold_pct_int = int(threshold_percent * 100)
icon = "⚠️"
if compression_enabled:
hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)."
else:
hint = "Auto-compaction is disabled — context may be truncated."
return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"
+5 -14
View File
@@ -112,10 +112,6 @@ _RATE_LIMIT_PATTERNS = [
"please retry after",
"resource_exhausted",
"rate increased too quickly", # Alibaba/DashScope throttling
# AWS Bedrock throttling
"throttlingexception",
"too many concurrent requests",
"servicequotaexceededexception",
]
# Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
@@ -175,11 +171,6 @@ _CONTEXT_OVERFLOW_PATTERNS = [
# Chinese error messages (some providers return these)
"超过最大长度",
"上下文长度",
# AWS Bedrock Converse API error patterns
"input is too long",
"max input token",
"input token",
"exceeds the maximum number of input tokens",
]
# Model not found patterns
@@ -290,7 +281,7 @@ def classify_api_error(
if isinstance(body, dict):
_err_obj = body.get("error", {})
if isinstance(_err_obj, dict):
_body_msg = str(_err_obj.get("message") or "").lower()
_body_msg = (_err_obj.get("message") or "").lower()
# Parse metadata.raw for wrapped provider errors
_metadata = _err_obj.get("metadata", {})
if isinstance(_metadata, dict):
@@ -302,11 +293,11 @@ def classify_api_error(
if isinstance(_inner, dict):
_inner_err = _inner.get("error", {})
if isinstance(_inner_err, dict):
_metadata_msg = str(_inner_err.get("message") or "").lower()
_metadata_msg = (_inner_err.get("message") or "").lower()
except (json.JSONDecodeError, TypeError):
pass
if not _body_msg:
_body_msg = str(body.get("message") or "").lower()
_body_msg = (body.get("message") or "").lower()
# Combine all message sources for pattern matching
parts = [_raw_msg]
if _body_msg and _body_msg not in _raw_msg:
@@ -606,10 +597,10 @@ def _classify_400(
if isinstance(body, dict):
err_obj = body.get("error", {})
if isinstance(err_obj, dict):
err_body_msg = str(err_obj.get("message") or "").strip().lower()
err_body_msg = (err_obj.get("message") or "").strip().lower()
# Responses API (and some providers) use flat body: {"message": "..."}
if not err_body_msg:
err_body_msg = str(body.get("message") or "").strip().lower()
err_body_msg = (body.get("message") or "").strip().lower()
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
-111
View File
@@ -1,111 +0,0 @@
"""Shared file safety rules used by both tools and ACP shims."""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional
def _hermes_home_path() -> Path:
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
try:
from hermes_constants import get_hermes_home # local import to avoid cycles
return get_hermes_home()
except Exception:
return Path(os.path.expanduser("~/.hermes"))
def build_write_denied_paths(home: str) -> set[str]:
"""Return exact sensitive paths that must never be written."""
hermes_home = _hermes_home_path()
return {
os.path.realpath(p)
for p in [
os.path.join(home, ".ssh", "authorized_keys"),
os.path.join(home, ".ssh", "id_rsa"),
os.path.join(home, ".ssh", "id_ed25519"),
os.path.join(home, ".ssh", "config"),
str(hermes_home / ".env"),
os.path.join(home, ".bashrc"),
os.path.join(home, ".zshrc"),
os.path.join(home, ".profile"),
os.path.join(home, ".bash_profile"),
os.path.join(home, ".zprofile"),
os.path.join(home, ".netrc"),
os.path.join(home, ".pgpass"),
os.path.join(home, ".npmrc"),
os.path.join(home, ".pypirc"),
"/etc/sudoers",
"/etc/passwd",
"/etc/shadow",
]
}
def build_write_denied_prefixes(home: str) -> list[str]:
"""Return sensitive directory prefixes that must never be written."""
return [
os.path.realpath(p) + os.sep
for p in [
os.path.join(home, ".ssh"),
os.path.join(home, ".aws"),
os.path.join(home, ".gnupg"),
os.path.join(home, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
os.path.join(home, ".docker"),
os.path.join(home, ".azure"),
os.path.join(home, ".config", "gh"),
]
]
def get_safe_write_root() -> Optional[str]:
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not root:
return None
try:
return os.path.realpath(os.path.expanduser(root))
except Exception:
return None
def is_write_denied(path: str) -> bool:
"""Return True if path is blocked by the write denylist or safe root."""
home = os.path.realpath(os.path.expanduser("~"))
resolved = os.path.realpath(os.path.expanduser(str(path)))
if resolved in build_write_denied_paths(home):
return True
for prefix in build_write_denied_prefixes(home):
if resolved.startswith(prefix):
return True
safe_root = get_safe_write_root()
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
return True
return False
def get_read_block_error(path: str) -> Optional[str]:
"""Return an error message when a read targets internal Hermes cache files."""
resolved = Path(path).expanduser().resolve()
hermes_home = _hermes_home_path().resolve()
blocked_dirs = [
hermes_home / "skills" / ".hub" / "index-cache",
hermes_home / "skills" / ".hub",
]
for blocked in blocked_dirs:
try:
resolved.relative_to(blocked)
except ValueError:
continue
return (
f"Access denied: {path} is an internal Hermes cache file "
"and cannot be read directly to prevent prompt injection. "
"Use the skills_list or skill_view tools instead."
)
return None
-905
View File
@@ -1,905 +0,0 @@
"""OpenAI-compatible facade that talks to Google's Cloud Code Assist backend.
This adapter lets Hermes use the ``google-gemini-cli`` provider as if it were
a standard OpenAI-shaped chat completion endpoint, while the underlying HTTP
traffic goes to ``cloudcode-pa.googleapis.com/v1internal:{generateContent,
streamGenerateContent}`` with a Bearer access token obtained via OAuth PKCE.
Architecture
------------
- ``GeminiCloudCodeClient`` exposes ``.chat.completions.create(**kwargs)``
mirroring the subset of the OpenAI SDK that ``run_agent.py`` uses.
- Incoming OpenAI ``messages[]`` / ``tools[]`` / ``tool_choice`` are translated
to Gemini's native ``contents[]`` / ``tools[].functionDeclarations`` /
``toolConfig`` / ``systemInstruction`` shape.
- The request body is wrapped ``{project, model, user_prompt_id, request}``
per Code Assist API expectations.
- Responses (``candidates[].content.parts[]``) are converted back to
OpenAI ``choices[0].message`` shape with ``content`` + ``tool_calls``.
- Streaming uses SSE (``?alt=sse``) and yields OpenAI-shaped delta chunks.
Attribution
-----------
Translation semantics follow jenslys/opencode-gemini-auth (MIT) and the public
Gemini API docs. Request envelope shape
(``{project, model, user_prompt_id, request}``) is documented nowhere; it is
reverse-engineered from the opencode-gemini-auth and clawdbot implementations.
"""
from __future__ import annotations
import json
import logging
import os
import time
import uuid
from types import SimpleNamespace
from typing import Any, Dict, Iterator, List, Optional
import httpx
from agent import google_oauth
from agent.gemini_schema import sanitize_gemini_tool_parameters
from agent.google_code_assist import (
CODE_ASSIST_ENDPOINT,
FREE_TIER_ID,
CodeAssistError,
ProjectContext,
resolve_project_context,
)
logger = logging.getLogger(__name__)
# =============================================================================
# Request translation: OpenAI → Gemini
# =============================================================================
_ROLE_MAP_OPENAI_TO_GEMINI = {
"user": "user",
"assistant": "model",
"system": "user", # handled separately via systemInstruction
"tool": "user", # functionResponse is wrapped in a user-role turn
"function": "user",
}
def _coerce_content_to_text(content: Any) -> str:
"""OpenAI content may be str or a list of parts; reduce to plain text."""
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
pieces: List[str] = []
for p in content:
if isinstance(p, str):
pieces.append(p)
elif isinstance(p, dict):
if p.get("type") == "text" and isinstance(p.get("text"), str):
pieces.append(p["text"])
# Multimodal (image_url, etc.) — stub for now; log and skip
elif p.get("type") in ("image_url", "input_audio"):
logger.debug("Dropping multimodal part (not yet supported): %s", p.get("type"))
return "\n".join(pieces)
return str(content)
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
"""OpenAI tool_call -> Gemini functionCall part."""
fn = tool_call.get("function") or {}
args_raw = fn.get("arguments", "")
try:
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
except json.JSONDecodeError:
args = {"_raw": args_raw}
if not isinstance(args, dict):
args = {"_value": args}
return {
"functionCall": {
"name": fn.get("name") or "",
"args": args,
},
# Sentinel signature — matches opencode-gemini-auth's approach.
# Without this, Code Assist rejects function calls that originated
# outside its own chain.
"thoughtSignature": "skip_thought_signature_validator",
}
def _translate_tool_result_to_gemini(message: Dict[str, Any]) -> Dict[str, Any]:
"""OpenAI tool-role message -> Gemini functionResponse part.
The function name isn't in the OpenAI tool message directly; it must be
passed via the assistant message that issued the call. For simplicity we
look up ``name`` on the message (OpenAI SDK copies it there) or on the
``tool_call_id`` cross-reference.
"""
name = str(message.get("name") or message.get("tool_call_id") or "tool")
content = _coerce_content_to_text(message.get("content"))
# Gemini expects the response as a dict under `response`. We wrap plain
# text in {"output": "..."}.
try:
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
except json.JSONDecodeError:
parsed = None
response = parsed if isinstance(parsed, dict) else {"output": content}
return {
"functionResponse": {
"name": name,
"response": response,
},
}
def _build_gemini_contents(
messages: List[Dict[str, Any]],
) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
"""Convert OpenAI messages[] to Gemini contents[] + systemInstruction."""
system_text_parts: List[str] = []
contents: List[Dict[str, Any]] = []
for msg in messages:
if not isinstance(msg, dict):
continue
role = str(msg.get("role") or "user")
if role == "system":
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
continue
# Tool result message — emit a user-role turn with functionResponse
if role == "tool" or role == "function":
contents.append({
"role": "user",
"parts": [_translate_tool_result_to_gemini(msg)],
})
continue
gemini_role = _ROLE_MAP_OPENAI_TO_GEMINI.get(role, "user")
parts: List[Dict[str, Any]] = []
text = _coerce_content_to_text(msg.get("content"))
if text:
parts.append({"text": text})
# Assistant messages can carry tool_calls
tool_calls = msg.get("tool_calls") or []
if isinstance(tool_calls, list):
for tc in tool_calls:
if isinstance(tc, dict):
parts.append(_translate_tool_call_to_gemini(tc))
if not parts:
# Gemini rejects empty parts; skip the turn entirely
continue
contents.append({"role": gemini_role, "parts": parts})
system_instruction: Optional[Dict[str, Any]] = None
joined_system = "\n".join(p for p in system_text_parts if p).strip()
if joined_system:
system_instruction = {
"role": "system",
"parts": [{"text": joined_system}],
}
return contents, system_instruction
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
"""OpenAI tools[] -> Gemini tools[].functionDeclarations[]."""
if not isinstance(tools, list) or not tools:
return []
declarations: List[Dict[str, Any]] = []
for t in tools:
if not isinstance(t, dict):
continue
fn = t.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not name:
continue
decl = {"name": str(name)}
if fn.get("description"):
decl["description"] = str(fn["description"])
params = fn.get("parameters")
if isinstance(params, dict):
decl["parameters"] = sanitize_gemini_tool_parameters(params)
declarations.append(decl)
if not declarations:
return []
return [{"functionDeclarations": declarations}]
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
"""OpenAI tool_choice -> Gemini toolConfig.functionCallingConfig."""
if tool_choice is None:
return None
if isinstance(tool_choice, str):
if tool_choice == "auto":
return {"functionCallingConfig": {"mode": "AUTO"}}
if tool_choice == "required":
return {"functionCallingConfig": {"mode": "ANY"}}
if tool_choice == "none":
return {"functionCallingConfig": {"mode": "NONE"}}
if isinstance(tool_choice, dict):
fn = tool_choice.get("function") or {}
name = fn.get("name")
if name:
return {
"functionCallingConfig": {
"mode": "ANY",
"allowedFunctionNames": [str(name)],
},
}
return None
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
"""Accept thinkingBudget / thinkingLevel / includeThoughts (+ snake_case)."""
if not isinstance(config, dict) or not config:
return None
budget = config.get("thinkingBudget", config.get("thinking_budget"))
level = config.get("thinkingLevel", config.get("thinking_level"))
include = config.get("includeThoughts", config.get("include_thoughts"))
normalized: Dict[str, Any] = {}
if isinstance(budget, (int, float)):
normalized["thinkingBudget"] = int(budget)
if isinstance(level, str) and level.strip():
normalized["thinkingLevel"] = level.strip().lower()
if isinstance(include, bool):
normalized["includeThoughts"] = include
return normalized or None
def build_gemini_request(
*,
messages: List[Dict[str, Any]],
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
thinking_config: Any = None,
) -> Dict[str, Any]:
"""Build the inner Gemini request body (goes inside ``request`` wrapper)."""
contents, system_instruction = _build_gemini_contents(messages)
body: Dict[str, Any] = {"contents": contents}
if system_instruction is not None:
body["systemInstruction"] = system_instruction
gemini_tools = _translate_tools_to_gemini(tools)
if gemini_tools:
body["tools"] = gemini_tools
tool_cfg = _translate_tool_choice_to_gemini(tool_choice)
if tool_cfg is not None:
body["toolConfig"] = tool_cfg
generation_config: Dict[str, Any] = {}
if isinstance(temperature, (int, float)):
generation_config["temperature"] = float(temperature)
if isinstance(max_tokens, int) and max_tokens > 0:
generation_config["maxOutputTokens"] = max_tokens
if isinstance(top_p, (int, float)):
generation_config["topP"] = float(top_p)
if isinstance(stop, str) and stop:
generation_config["stopSequences"] = [stop]
elif isinstance(stop, list) and stop:
generation_config["stopSequences"] = [str(s) for s in stop if s]
normalized_thinking = _normalize_thinking_config(thinking_config)
if normalized_thinking:
generation_config["thinkingConfig"] = normalized_thinking
if generation_config:
body["generationConfig"] = generation_config
return body
def wrap_code_assist_request(
*,
project_id: str,
model: str,
inner_request: Dict[str, Any],
user_prompt_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Wrap the inner Gemini request in the Code Assist envelope."""
return {
"project": project_id,
"model": model,
"user_prompt_id": user_prompt_id or str(uuid.uuid4()),
"request": inner_request,
}
# =============================================================================
# Response translation: Gemini → OpenAI
# =============================================================================
def _translate_gemini_response(
resp: Dict[str, Any],
model: str,
) -> SimpleNamespace:
"""Non-streaming Gemini response -> OpenAI-shaped SimpleNamespace.
Code Assist wraps the actual Gemini response inside ``response``, so we
unwrap it first if present.
"""
inner = resp.get("response") if isinstance(resp.get("response"), dict) else resp
candidates = inner.get("candidates") or []
if not isinstance(candidates, list) or not candidates:
return _empty_response(model)
cand = candidates[0]
content_obj = cand.get("content") if isinstance(cand, dict) else {}
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
text_pieces: List[str] = []
reasoning_pieces: List[str] = []
tool_calls: List[SimpleNamespace] = []
for i, part in enumerate(parts or []):
if not isinstance(part, dict):
continue
# Thought parts are model's internal reasoning — surface as reasoning,
# don't mix into content.
if part.get("thought") is True:
if isinstance(part.get("text"), str):
reasoning_pieces.append(part["text"])
continue
if isinstance(part.get("text"), str):
text_pieces.append(part["text"])
continue
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
args_str = "{}"
tool_calls.append(SimpleNamespace(
id=f"call_{uuid.uuid4().hex[:12]}",
type="function",
index=i,
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
))
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(
str(cand.get("finishReason") or "")
)
usage_meta = inner.get("usageMetadata") or {}
usage = SimpleNamespace(
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
prompt_tokens_details=SimpleNamespace(
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
),
)
message = SimpleNamespace(
role="assistant",
content="".join(text_pieces) if text_pieces else None,
tool_calls=tool_calls or None,
reasoning="".join(reasoning_pieces) or None,
reasoning_content="".join(reasoning_pieces) or None,
reasoning_details=None,
)
choice = SimpleNamespace(
index=0,
message=message,
finish_reason=finish_reason,
)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
def _empty_response(model: str) -> SimpleNamespace:
message = SimpleNamespace(
role="assistant", content="", tool_calls=None,
reasoning=None, reasoning_content=None, reasoning_details=None,
)
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
usage = SimpleNamespace(
prompt_tokens=0, completion_tokens=0, total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
def _map_gemini_finish_reason(reason: str) -> str:
mapping = {
"STOP": "stop",
"MAX_TOKENS": "length",
"SAFETY": "content_filter",
"RECITATION": "content_filter",
"OTHER": "stop",
}
return mapping.get(reason.upper(), "stop")
# =============================================================================
# Streaming SSE iterator
# =============================================================================
class _GeminiStreamChunk(SimpleNamespace):
"""Mimics an OpenAI ChatCompletionChunk with .choices[0].delta."""
pass
def _make_stream_chunk(
*,
model: str,
content: str = "",
tool_call_delta: Optional[Dict[str, Any]] = None,
finish_reason: Optional[str] = None,
reasoning: str = "",
) -> _GeminiStreamChunk:
delta_kwargs: Dict[str, Any] = {"role": "assistant"}
if content:
delta_kwargs["content"] = content
if tool_call_delta is not None:
delta_kwargs["tool_calls"] = [SimpleNamespace(
index=tool_call_delta.get("index", 0),
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
type="function",
function=SimpleNamespace(
name=tool_call_delta.get("name") or "",
arguments=tool_call_delta.get("arguments") or "",
),
)]
if reasoning:
delta_kwargs["reasoning"] = reasoning
delta_kwargs["reasoning_content"] = reasoning
delta = SimpleNamespace(**delta_kwargs)
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
return _GeminiStreamChunk(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion.chunk",
created=int(time.time()),
model=model,
choices=[choice],
usage=None,
)
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
"""Parse Server-Sent Events from an httpx streaming response."""
buffer = ""
for chunk in response.iter_text():
if not chunk:
continue
buffer += chunk
while "\n" in buffer:
line, buffer = buffer.split("\n", 1)
line = line.rstrip("\r")
if not line:
continue
if line.startswith("data: "):
data = line[6:]
if data == "[DONE]":
return
try:
yield json.loads(data)
except json.JSONDecodeError:
logger.debug("Non-JSON SSE line: %s", data[:200])
def _translate_stream_event(
event: Dict[str, Any],
model: str,
tool_call_counter: List[int],
) -> List[_GeminiStreamChunk]:
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s).
``tool_call_counter`` is a single-element list used as a mutable counter
across events in the same stream. Each ``functionCall`` part gets a
fresh, unique OpenAI ``index`` keying by function name would collide
whenever the model issues parallel calls to the same tool (e.g. reading
three files in one turn).
"""
inner = event.get("response") if isinstance(event.get("response"), dict) else event
candidates = inner.get("candidates") or []
if not candidates:
return []
cand = candidates[0]
if not isinstance(cand, dict):
return []
chunks: List[_GeminiStreamChunk] = []
content = cand.get("content") or {}
parts = content.get("parts") if isinstance(content, dict) else []
for part in parts or []:
if not isinstance(part, dict):
continue
if part.get("thought") is True and isinstance(part.get("text"), str):
chunks.append(_make_stream_chunk(
model=model, reasoning=part["text"],
))
continue
if isinstance(part.get("text"), str) and part["text"]:
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
name = str(fc["name"])
idx = tool_call_counter[0]
tool_call_counter[0] += 1
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
args_str = "{}"
chunks.append(_make_stream_chunk(
model=model,
tool_call_delta={
"index": idx,
"name": name,
"arguments": args_str,
},
))
finish_reason_raw = str(cand.get("finishReason") or "")
if finish_reason_raw:
mapped = _map_gemini_finish_reason(finish_reason_raw)
if tool_call_counter[0] > 0:
mapped = "tool_calls"
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
return chunks
# =============================================================================
# GeminiCloudCodeClient — OpenAI-compatible facade
# =============================================================================
MARKER_BASE_URL = "cloudcode-pa://google"
class _GeminiChatCompletions:
def __init__(self, client: "GeminiCloudCodeClient"):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _GeminiChatNamespace:
def __init__(self, client: "GeminiCloudCodeClient"):
self.completions = _GeminiChatCompletions(client)
class GeminiCloudCodeClient:
"""Minimal OpenAI-SDK-compatible facade over Code Assist v1internal."""
def __init__(
self,
*,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
default_headers: Optional[Dict[str, str]] = None,
project_id: str = "",
**_: Any,
):
# `api_key` here is a dummy — real auth is the OAuth access token
# fetched on every call via agent.google_oauth.get_valid_access_token().
# We accept the kwarg for openai.OpenAI interface parity.
self.api_key = api_key or "google-oauth"
self.base_url = base_url or MARKER_BASE_URL
self._default_headers = dict(default_headers or {})
self._configured_project_id = project_id
self._project_context: Optional[ProjectContext] = None
self._project_context_lock = False # simple single-thread guard
self.chat = _GeminiChatNamespace(self)
self.is_closed = False
self._http = httpx.Client(timeout=httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0))
def close(self) -> None:
self.is_closed = True
try:
self._http.close()
except Exception:
pass
# Implement the OpenAI SDK's context-manager-ish closure check
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def _ensure_project_context(self, access_token: str, model: str) -> ProjectContext:
"""Lazily resolve and cache the project context for this client."""
if self._project_context is not None:
return self._project_context
env_project = google_oauth.resolve_project_id_from_env()
creds = google_oauth.load_credentials()
stored_project = creds.project_id if creds else ""
# Prefer what's already baked into the creds
if stored_project:
self._project_context = ProjectContext(
project_id=stored_project,
managed_project_id=creds.managed_project_id if creds else "",
tier_id="",
source="stored",
)
return self._project_context
ctx = resolve_project_context(
access_token,
configured_project_id=self._configured_project_id,
env_project_id=env_project,
user_agent_model=model,
)
# Persist discovered project back to the creds file so the next
# session doesn't re-run the discovery.
if ctx.project_id or ctx.managed_project_id:
google_oauth.update_project_ids(
project_id=ctx.project_id,
managed_project_id=ctx.managed_project_id,
)
self._project_context = ctx
return ctx
def _create_chat_completion(
self,
*,
model: str = "gemini-2.5-flash",
messages: Optional[List[Dict[str, Any]]] = None,
stream: bool = False,
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Any = None,
**_: Any,
) -> Any:
access_token = google_oauth.get_valid_access_token()
ctx = self._ensure_project_context(access_token, model)
thinking_config = None
if isinstance(extra_body, dict):
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
inner = build_gemini_request(
messages=messages or [],
tools=tools,
tool_choice=tool_choice,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stop=stop,
thinking_config=thinking_config,
)
wrapped = wrap_code_assist_request(
project_id=ctx.project_id,
model=model,
inner_request=inner,
)
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {access_token}",
"User-Agent": "hermes-agent (gemini-cli-compat)",
"X-Goog-Api-Client": "gl-python/hermes",
"x-activity-request-id": str(uuid.uuid4()),
}
headers.update(self._default_headers)
if stream:
return self._stream_completion(model=model, wrapped=wrapped, headers=headers)
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:generateContent"
response = self._http.post(url, json=wrapped, headers=headers)
if response.status_code != 200:
raise _gemini_http_error(response)
try:
payload = response.json()
except ValueError as exc:
raise CodeAssistError(
f"Invalid JSON from Code Assist: {exc}",
code="code_assist_invalid_json",
) from exc
return _translate_gemini_response(payload, model=model)
def _stream_completion(
self,
*,
model: str,
wrapped: Dict[str, Any],
headers: Dict[str, str],
) -> Iterator[_GeminiStreamChunk]:
"""Generator that yields OpenAI-shaped streaming chunks."""
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:streamGenerateContent?alt=sse"
stream_headers = dict(headers)
stream_headers["Accept"] = "text/event-stream"
def _generator() -> Iterator[_GeminiStreamChunk]:
try:
with self._http.stream("POST", url, json=wrapped, headers=stream_headers) as response:
if response.status_code != 200:
# Materialize error body for better diagnostics
response.read()
raise _gemini_http_error(response)
tool_call_counter: List[int] = [0]
for event in _iter_sse_events(response):
for chunk in _translate_stream_event(event, model, tool_call_counter):
yield chunk
except httpx.HTTPError as exc:
raise CodeAssistError(
f"Streaming request failed: {exc}",
code="code_assist_stream_error",
) from exc
return _generator()
def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
"""Translate an httpx response into a CodeAssistError with rich metadata.
Parses Google's error envelope (``{"error": {"code", "message", "status",
"details": [...]}}``) so the agent's error classifier can reason about
the failure ``status_code`` enables the rate_limit / auth classification
paths, and ``response`` lets the main loop honor ``Retry-After`` just
like it does for OpenAI SDK exceptions.
Also lifts a few recognizable Google conditions into human-readable
messages so the user sees something better than a 500-char JSON dump:
MODEL_CAPACITY_EXHAUSTED "Gemini model capacity exhausted for
<model>. This is a Google-side throttle..."
RESOURCE_EXHAUSTED w/o reason quota-style message
404 "Model <name> not found at cloudcode-pa..."
"""
status = response.status_code
# Parse the body once, surviving any weird encodings.
body_text = ""
body_json: Dict[str, Any] = {}
try:
body_text = response.text
except Exception:
body_text = ""
if body_text:
try:
parsed = json.loads(body_text)
if isinstance(parsed, dict):
body_json = parsed
except (ValueError, TypeError):
body_json = {}
# Dig into Google's error envelope. Shape is:
# {"error": {"code": 429, "message": "...", "status": "RESOURCE_EXHAUSTED",
# "details": [{"@type": ".../ErrorInfo", "reason": "MODEL_CAPACITY_EXHAUSTED",
# "metadata": {...}},
# {"@type": ".../RetryInfo", "retryDelay": "30s"}]}}
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
if not isinstance(err_obj, dict):
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
_raw_details = err_obj.get("details")
err_details_list = _raw_details if isinstance(_raw_details, list) else []
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
# than one ErrorInfo (rare), so we pick the first one with a reason.
error_reason = ""
error_metadata: Dict[str, Any] = {}
retry_delay_seconds: Optional[float] = None
for detail in err_details_list:
if not isinstance(detail, dict):
continue
type_url = str(detail.get("@type") or "")
if not error_reason and type_url.endswith("/google.rpc.ErrorInfo"):
reason = detail.get("reason")
if isinstance(reason, str) and reason:
error_reason = reason
md = detail.get("metadata")
if isinstance(md, dict):
error_metadata = md
elif retry_delay_seconds is None and type_url.endswith("/google.rpc.RetryInfo"):
# retryDelay is a google.protobuf.Duration string like "30s" or "1.5s".
delay_raw = detail.get("retryDelay")
if isinstance(delay_raw, str) and delay_raw.endswith("s"):
try:
retry_delay_seconds = float(delay_raw[:-1])
except ValueError:
pass
elif isinstance(delay_raw, (int, float)):
retry_delay_seconds = float(delay_raw)
# Fall back to the Retry-After header if the body didn't include RetryInfo.
if retry_delay_seconds is None:
try:
header_val = response.headers.get("Retry-After") or response.headers.get("retry-after")
except Exception:
header_val = None
if header_val:
try:
retry_delay_seconds = float(header_val)
except (TypeError, ValueError):
retry_delay_seconds = None
# Classify the error code. ``code_assist_rate_limited`` stays the default
# for 429s; a more specific reason tag helps downstream callers (e.g. tests,
# logs) without changing the rate_limit classification path.
code = f"code_assist_http_{status}"
if status == 401:
code = "code_assist_unauthorized"
elif status == 429:
code = "code_assist_rate_limited"
if error_reason == "MODEL_CAPACITY_EXHAUSTED":
code = "code_assist_capacity_exhausted"
# Build a human-readable message. Keep the status + a raw-body tail for
# debugging, but lead with a friendlier summary when we recognize the
# Google signal.
model_hint = ""
if isinstance(error_metadata, dict):
model_hint = str(error_metadata.get("model") or error_metadata.get("modelId") or "").strip()
if status == 429 and error_reason == "MODEL_CAPACITY_EXHAUSTED":
target = model_hint or "this Gemini model"
message = (
f"Gemini capacity exhausted for {target} (Google-side throttle, "
f"not a Hermes issue). Try a different Gemini model or set a "
f"fallback_providers entry to a non-Gemini provider."
)
if retry_delay_seconds is not None:
message += f" Google suggests retrying in {retry_delay_seconds:g}s."
elif status == 429 and err_status == "RESOURCE_EXHAUSTED":
message = (
f"Gemini quota exhausted ({err_message or 'RESOURCE_EXHAUSTED'}). "
f"Check /gquota for remaining daily requests."
)
if retry_delay_seconds is not None:
message += f" Retry suggested in {retry_delay_seconds:g}s."
elif status == 404:
# Google returns 404 when a model has been retired or renamed.
target = model_hint or (err_message or "model")
message = (
f"Code Assist 404: {target} is not available at "
f"cloudcode-pa.googleapis.com. It may have been renamed or "
f"retired. Check hermes_cli/models.py for the current list."
)
elif err_message:
# Generic fallback with the parsed message.
message = f"Code Assist HTTP {status} ({err_status or 'error'}): {err_message}"
else:
# Last-ditch fallback — raw body snippet.
message = f"Code Assist returned HTTP {status}: {body_text[:500]}"
return CodeAssistError(
message,
code=code,
status_code=status,
response=response,
retry_after=retry_delay_seconds,
details={
"status": err_status,
"reason": error_reason,
"metadata": error_metadata,
"message": err_message,
},
)
-847
View File
@@ -1,847 +0,0 @@
"""OpenAI-compatible facade over Google AI Studio's native Gemini API.
Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the
main agent loop can keep using its existing OpenAI-shaped message flow.
This adapter is the transport shim that converts those OpenAI-style
``messages[]`` / ``tools[]`` requests into Gemini's native
``models/{model}:generateContent`` schema and converts the responses back.
Why this exists
---------------
Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn
agent/tool loop (auth churn, tool-call replay quirks, thought-signature
requirements). The native Gemini API is the canonical path and avoids the
OpenAI-compat layer entirely.
"""
from __future__ import annotations
import asyncio
import base64
import json
import logging
import time
import uuid
from types import SimpleNamespace
from typing import Any, Dict, Iterator, List, Optional
import httpx
from agent.gemini_schema import sanitize_gemini_tool_parameters
logger = logging.getLogger(__name__)
DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
def is_native_gemini_base_url(base_url: str) -> bool:
"""Return True when the endpoint speaks Gemini's native REST API."""
normalized = str(base_url or "").strip().rstrip("/").lower()
if not normalized:
return False
if "generativelanguage.googleapis.com" not in normalized:
return False
return not normalized.endswith("/openai")
class GeminiAPIError(Exception):
"""Error shape compatible with Hermes retry/error classification."""
def __init__(
self,
message: str,
*,
code: str = "gemini_api_error",
status_code: Optional[int] = None,
response: Optional[httpx.Response] = None,
retry_after: Optional[float] = None,
details: Optional[Dict[str, Any]] = None,
) -> None:
super().__init__(message)
self.code = code
self.status_code = status_code
self.response = response
self.retry_after = retry_after
self.details = details or {}
def _coerce_content_to_text(content: Any) -> str:
if content is None:
return ""
if isinstance(content, str):
return content
if isinstance(content, list):
pieces: List[str] = []
for part in content:
if isinstance(part, str):
pieces.append(part)
elif isinstance(part, dict) and part.get("type") == "text":
text = part.get("text")
if isinstance(text, str):
pieces.append(text)
return "\n".join(pieces)
return str(content)
def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]:
if not isinstance(content, list):
text = _coerce_content_to_text(content)
return [{"text": text}] if text else []
parts: List[Dict[str, Any]] = []
for item in content:
if isinstance(item, str):
parts.append({"text": item})
continue
if not isinstance(item, dict):
continue
ptype = item.get("type")
if ptype == "text":
text = item.get("text")
if isinstance(text, str) and text:
parts.append({"text": text})
elif ptype == "image_url":
url = ((item.get("image_url") or {}).get("url") or "")
if not isinstance(url, str) or not url.startswith("data:"):
continue
try:
header, encoded = url.split(",", 1)
mime = header.split(":", 1)[1].split(";", 1)[0]
raw = base64.b64decode(encoded)
except Exception:
continue
parts.append(
{
"inlineData": {
"mimeType": mime,
"data": base64.b64encode(raw).decode("ascii"),
}
}
)
return parts
def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]:
extra = tool_call.get("extra_content") or {}
if not isinstance(extra, dict):
return None
google = extra.get("google") or extra.get("thought_signature")
if isinstance(google, dict):
sig = google.get("thought_signature") or google.get("thoughtSignature")
return str(sig) if isinstance(sig, str) and sig else None
if isinstance(google, str) and google:
return google
return None
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
fn = tool_call.get("function") or {}
args_raw = fn.get("arguments", "")
try:
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
except json.JSONDecodeError:
args = {"_raw": args_raw}
if not isinstance(args, dict):
args = {"_value": args}
part: Dict[str, Any] = {
"functionCall": {
"name": str(fn.get("name") or ""),
"args": args,
}
}
thought_signature = _tool_call_extra_signature(tool_call)
if thought_signature:
part["thoughtSignature"] = thought_signature
return part
def _translate_tool_result_to_gemini(
message: Dict[str, Any],
tool_name_by_call_id: Optional[Dict[str, str]] = None,
) -> Dict[str, Any]:
tool_name_by_call_id = tool_name_by_call_id or {}
tool_call_id = str(message.get("tool_call_id") or "")
name = str(
message.get("name")
or tool_name_by_call_id.get(tool_call_id)
or tool_call_id
or "tool"
)
content = _coerce_content_to_text(message.get("content"))
try:
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
except json.JSONDecodeError:
parsed = None
response = parsed if isinstance(parsed, dict) else {"output": content}
return {
"functionResponse": {
"name": name,
"response": response,
}
}
def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
system_text_parts: List[str] = []
contents: List[Dict[str, Any]] = []
tool_name_by_call_id: Dict[str, str] = {}
for msg in messages:
if not isinstance(msg, dict):
continue
role = str(msg.get("role") or "user")
if role == "system":
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
continue
if role in {"tool", "function"}:
contents.append(
{
"role": "user",
"parts": [
_translate_tool_result_to_gemini(
msg,
tool_name_by_call_id=tool_name_by_call_id,
)
],
}
)
continue
gemini_role = "model" if role == "assistant" else "user"
parts: List[Dict[str, Any]] = []
content_parts = _extract_multimodal_parts(msg.get("content"))
parts.extend(content_parts)
tool_calls = msg.get("tool_calls") or []
if isinstance(tool_calls, list):
for tool_call in tool_calls:
if isinstance(tool_call, dict):
tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "")
tool_name = str(((tool_call.get("function") or {}).get("name") or ""))
if tool_call_id and tool_name:
tool_name_by_call_id[tool_call_id] = tool_name
parts.append(_translate_tool_call_to_gemini(tool_call))
if parts:
contents.append({"role": gemini_role, "parts": parts})
system_instruction = None
joined_system = "\n".join(part for part in system_text_parts if part).strip()
if joined_system:
system_instruction = {"parts": [{"text": joined_system}]}
return contents, system_instruction
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
if not isinstance(tools, list):
return []
declarations: List[Dict[str, Any]] = []
for tool in tools:
if not isinstance(tool, dict):
continue
fn = tool.get("function") or {}
if not isinstance(fn, dict):
continue
name = fn.get("name")
if not isinstance(name, str) or not name:
continue
decl: Dict[str, Any] = {"name": name}
description = fn.get("description")
if isinstance(description, str) and description:
decl["description"] = description
parameters = fn.get("parameters")
if isinstance(parameters, dict):
decl["parameters"] = sanitize_gemini_tool_parameters(parameters)
declarations.append(decl)
return [{"functionDeclarations": declarations}] if declarations else []
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
if tool_choice is None:
return None
if isinstance(tool_choice, str):
if tool_choice == "auto":
return {"functionCallingConfig": {"mode": "AUTO"}}
if tool_choice == "required":
return {"functionCallingConfig": {"mode": "ANY"}}
if tool_choice == "none":
return {"functionCallingConfig": {"mode": "NONE"}}
if isinstance(tool_choice, dict):
fn = tool_choice.get("function") or {}
name = fn.get("name")
if isinstance(name, str) and name:
return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}}
return None
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
if not isinstance(config, dict) or not config:
return None
budget = config.get("thinkingBudget", config.get("thinking_budget"))
include = config.get("includeThoughts", config.get("include_thoughts"))
level = config.get("thinkingLevel", config.get("thinking_level"))
normalized: Dict[str, Any] = {}
if isinstance(budget, (int, float)):
normalized["thinkingBudget"] = int(budget)
if isinstance(include, bool):
normalized["includeThoughts"] = include
if isinstance(level, str) and level.strip():
normalized["thinkingLevel"] = level.strip().lower()
return normalized or None
def build_gemini_request(
*,
messages: List[Dict[str, Any]],
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
thinking_config: Any = None,
) -> Dict[str, Any]:
contents, system_instruction = _build_gemini_contents(messages)
request: Dict[str, Any] = {"contents": contents}
if system_instruction:
request["systemInstruction"] = system_instruction
gemini_tools = _translate_tools_to_gemini(tools)
if gemini_tools:
request["tools"] = gemini_tools
tool_config = _translate_tool_choice_to_gemini(tool_choice)
if tool_config:
request["toolConfig"] = tool_config
generation_config: Dict[str, Any] = {}
if temperature is not None:
generation_config["temperature"] = temperature
if max_tokens is not None:
generation_config["maxOutputTokens"] = max_tokens
if top_p is not None:
generation_config["topP"] = top_p
if stop:
generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)]
normalized_thinking = _normalize_thinking_config(thinking_config)
if normalized_thinking:
generation_config["thinkingConfig"] = normalized_thinking
if generation_config:
request["generationConfig"] = generation_config
return request
def _map_gemini_finish_reason(reason: str) -> str:
mapping = {
"STOP": "stop",
"MAX_TOKENS": "length",
"SAFETY": "content_filter",
"RECITATION": "content_filter",
"OTHER": "stop",
}
return mapping.get(str(reason or "").upper(), "stop")
def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
sig = part.get("thoughtSignature")
if isinstance(sig, str) and sig:
return {"google": {"thought_signature": sig}}
return None
def _empty_response(model: str) -> SimpleNamespace:
message = SimpleNamespace(
role="assistant",
content="",
tool_calls=None,
reasoning=None,
reasoning_content=None,
reasoning_details=None,
)
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
usage = SimpleNamespace(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace:
candidates = resp.get("candidates") or []
if not isinstance(candidates, list) or not candidates:
return _empty_response(model)
cand = candidates[0] if isinstance(candidates[0], dict) else {}
content_obj = cand.get("content") if isinstance(cand, dict) else {}
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
text_pieces: List[str] = []
reasoning_pieces: List[str] = []
tool_calls: List[SimpleNamespace] = []
for index, part in enumerate(parts or []):
if not isinstance(part, dict):
continue
if part.get("thought") is True and isinstance(part.get("text"), str):
reasoning_pieces.append(part["text"])
continue
if isinstance(part.get("text"), str):
text_pieces.append(part["text"])
continue
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
except (TypeError, ValueError):
args_str = "{}"
tool_call = SimpleNamespace(
id=f"call_{uuid.uuid4().hex[:12]}",
type="function",
index=index,
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
)
extra_content = _tool_call_extra_from_part(part)
if extra_content:
tool_call.extra_content = extra_content
tool_calls.append(tool_call)
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or ""))
usage_meta = resp.get("usageMetadata") or {}
usage = SimpleNamespace(
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
prompt_tokens_details=SimpleNamespace(
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
),
)
reasoning = "".join(reasoning_pieces) or None
message = SimpleNamespace(
role="assistant",
content="".join(text_pieces) if text_pieces else None,
tool_calls=tool_calls or None,
reasoning=reasoning,
reasoning_content=reasoning,
reasoning_details=None,
)
choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason)
return SimpleNamespace(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion",
created=int(time.time()),
model=model,
choices=[choice],
usage=usage,
)
class _GeminiStreamChunk(SimpleNamespace):
pass
def _make_stream_chunk(
*,
model: str,
content: str = "",
tool_call_delta: Optional[Dict[str, Any]] = None,
finish_reason: Optional[str] = None,
reasoning: str = "",
) -> _GeminiStreamChunk:
delta_kwargs: Dict[str, Any] = {
"role": "assistant",
"content": None,
"tool_calls": None,
"reasoning": None,
"reasoning_content": None,
}
if content:
delta_kwargs["content"] = content
if tool_call_delta is not None:
tool_delta = SimpleNamespace(
index=tool_call_delta.get("index", 0),
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
type="function",
function=SimpleNamespace(
name=tool_call_delta.get("name") or "",
arguments=tool_call_delta.get("arguments") or "",
),
)
extra_content = tool_call_delta.get("extra_content")
if isinstance(extra_content, dict):
tool_delta.extra_content = extra_content
delta_kwargs["tool_calls"] = [tool_delta]
if reasoning:
delta_kwargs["reasoning"] = reasoning
delta_kwargs["reasoning_content"] = reasoning
delta = SimpleNamespace(**delta_kwargs)
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
return _GeminiStreamChunk(
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
object="chat.completion.chunk",
created=int(time.time()),
model=model,
choices=[choice],
usage=None,
)
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
buffer = ""
for chunk in response.iter_text():
if not chunk:
continue
buffer += chunk
while "\n" in buffer:
line, buffer = buffer.split("\n", 1)
line = line.rstrip("\r")
if not line:
continue
if not line.startswith("data: "):
continue
data = line[6:]
if data == "[DONE]":
return
try:
payload = json.loads(data)
except json.JSONDecodeError:
logger.debug("Non-JSON Gemini SSE line: %s", data[:200])
continue
if isinstance(payload, dict):
yield payload
def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]:
candidates = event.get("candidates") or []
if not candidates:
return []
cand = candidates[0] if isinstance(candidates[0], dict) else {}
parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else []
chunks: List[_GeminiStreamChunk] = []
for part_index, part in enumerate(parts):
if not isinstance(part, dict):
continue
if part.get("thought") is True and isinstance(part.get("text"), str):
chunks.append(_make_stream_chunk(model=model, reasoning=part["text"]))
continue
if isinstance(part.get("text"), str) and part["text"]:
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
fc = part.get("functionCall")
if isinstance(fc, dict) and fc.get("name"):
name = str(fc["name"])
try:
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True)
except (TypeError, ValueError):
args_str = "{}"
thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else ""
call_key = json.dumps(
{
"part_index": part_index,
"name": name,
"thought_signature": thought_signature,
},
sort_keys=True,
)
slot = tool_call_indices.get(call_key)
if slot is None:
slot = {
"index": len(tool_call_indices),
"id": f"call_{uuid.uuid4().hex[:12]}",
"last_arguments": "",
}
tool_call_indices[call_key] = slot
emitted_arguments = args_str
last_arguments = str(slot.get("last_arguments") or "")
if last_arguments:
if args_str == last_arguments:
emitted_arguments = ""
elif args_str.startswith(last_arguments):
emitted_arguments = args_str[len(last_arguments):]
slot["last_arguments"] = args_str
chunks.append(
_make_stream_chunk(
model=model,
tool_call_delta={
"index": slot["index"],
"id": slot["id"],
"name": name,
"arguments": emitted_arguments,
"extra_content": _tool_call_extra_from_part(part),
},
)
)
finish_reason_raw = str(cand.get("finishReason") or "")
if finish_reason_raw:
mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
return chunks
def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
status = response.status_code
body_text = ""
body_json: Dict[str, Any] = {}
try:
body_text = response.text
except Exception:
body_text = ""
if body_text:
try:
parsed = json.loads(body_text)
if isinstance(parsed, dict):
body_json = parsed
except (ValueError, TypeError):
body_json = {}
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
if not isinstance(err_obj, dict):
err_obj = {}
err_status = str(err_obj.get("status") or "").strip()
err_message = str(err_obj.get("message") or "").strip()
_raw_details = err_obj.get("details")
details_list = _raw_details if isinstance(_raw_details, list) else []
reason = ""
retry_after: Optional[float] = None
metadata: Dict[str, Any] = {}
for detail in details_list:
if not isinstance(detail, dict):
continue
type_url = str(detail.get("@type") or "")
if not reason and type_url.endswith("/google.rpc.ErrorInfo"):
reason_value = detail.get("reason")
if isinstance(reason_value, str):
reason = reason_value
md = detail.get("metadata")
if isinstance(md, dict):
metadata = md
header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after")
if header_retry:
try:
retry_after = float(header_retry)
except (TypeError, ValueError):
retry_after = None
code = f"gemini_http_{status}"
if status == 401:
code = "gemini_unauthorized"
elif status == 429:
code = "gemini_rate_limited"
elif status == 404:
code = "gemini_model_not_found"
if err_message:
message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}"
else:
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
return GeminiAPIError(
message,
code=code,
status_code=status,
response=response,
retry_after=retry_after,
details={
"status": err_status,
"reason": reason,
"metadata": metadata,
"message": err_message,
},
)
class _GeminiChatCompletions:
def __init__(self, client: "GeminiNativeClient"):
self._client = client
def create(self, **kwargs: Any) -> Any:
return self._client._create_chat_completion(**kwargs)
class _AsyncGeminiChatCompletions:
def __init__(self, client: "AsyncGeminiNativeClient"):
self._client = client
async def create(self, **kwargs: Any) -> Any:
return await self._client._create_chat_completion(**kwargs)
class _GeminiChatNamespace:
def __init__(self, client: "GeminiNativeClient"):
self.completions = _GeminiChatCompletions(client)
class _AsyncGeminiChatNamespace:
def __init__(self, client: "AsyncGeminiNativeClient"):
self.completions = _AsyncGeminiChatCompletions(client)
class GeminiNativeClient:
"""Minimal OpenAI-SDK-compatible facade over Gemini's native REST API."""
def __init__(
self,
*,
api_key: str,
base_url: Optional[str] = None,
default_headers: Optional[Dict[str, str]] = None,
timeout: Any = None,
http_client: Optional[httpx.Client] = None,
**_: Any,
) -> None:
self.api_key = api_key
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
if normalized_base.endswith("/openai"):
normalized_base = normalized_base[: -len("/openai")]
self.base_url = normalized_base
self._default_headers = dict(default_headers or {})
self.chat = _GeminiChatNamespace(self)
self.is_closed = False
self._http = http_client or httpx.Client(
timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0)
)
def close(self) -> None:
self.is_closed = True
try:
self._http.close()
except Exception:
pass
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def _headers(self) -> Dict[str, str]:
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"x-goog-api-key": self.api_key,
"User-Agent": "hermes-agent (gemini-native)",
}
headers.update(self._default_headers)
return headers
@staticmethod
def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]:
try:
return False, next(iterator)
except StopIteration:
return True, None
def _create_chat_completion(
self,
*,
model: str = "gemini-2.5-flash",
messages: Optional[List[Dict[str, Any]]] = None,
stream: bool = False,
tools: Any = None,
tool_choice: Any = None,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
top_p: Optional[float] = None,
stop: Any = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Any = None,
**_: Any,
) -> Any:
thinking_config = None
if isinstance(extra_body, dict):
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
request = build_gemini_request(
messages=messages or [],
tools=tools,
tool_choice=tool_choice,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
stop=stop,
thinking_config=thinking_config,
)
if stream:
return self._stream_completion(model=model, request=request, timeout=timeout)
url = f"{self.base_url}/models/{model}:generateContent"
response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout)
if response.status_code != 200:
raise gemini_http_error(response)
try:
payload = response.json()
except ValueError as exc:
raise GeminiAPIError(
f"Invalid JSON from Gemini native API: {exc}",
code="gemini_invalid_json",
status_code=response.status_code,
response=response,
) from exc
return translate_gemini_response(payload, model=model)
def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]:
url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse"
stream_headers = dict(self._headers())
stream_headers["Accept"] = "text/event-stream"
def _generator() -> Iterator[_GeminiStreamChunk]:
try:
with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response:
if response.status_code != 200:
response.read()
raise gemini_http_error(response)
tool_call_indices: Dict[str, Dict[str, Any]] = {}
for event in _iter_sse_events(response):
for chunk in translate_stream_event(event, model, tool_call_indices):
yield chunk
except httpx.HTTPError as exc:
raise GeminiAPIError(
f"Gemini streaming request failed: {exc}",
code="gemini_stream_error",
) from exc
return _generator()
class AsyncGeminiNativeClient:
"""Async wrapper used by auxiliary_client for native Gemini calls."""
def __init__(self, sync_client: GeminiNativeClient):
self._sync = sync_client
self.api_key = sync_client.api_key
self.base_url = sync_client.base_url
self.chat = _AsyncGeminiChatNamespace(self)
async def _create_chat_completion(self, **kwargs: Any) -> Any:
stream = bool(kwargs.get("stream"))
result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs)
if not stream:
return result
async def _async_stream() -> Any:
while True:
done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result)
if done:
break
yield chunk
return _async_stream()
async def close(self) -> None:
await asyncio.to_thread(self._sync.close)
-85
View File
@@ -1,85 +0,0 @@
"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset."""
from __future__ import annotations
from typing import Any, Dict, List
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
# outside that subset before sending Hermes tool schemas to Google.
_GEMINI_SCHEMA_ALLOWED_KEYS = {
"type",
"format",
"title",
"description",
"nullable",
"enum",
"maxItems",
"minItems",
"properties",
"required",
"minProperties",
"maxProperties",
"minLength",
"maxLength",
"pattern",
"example",
"anyOf",
"propertyOrdering",
"default",
"items",
"minimum",
"maximum",
}
def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
"""Return a Gemini-compatible copy of a tool parameter schema.
Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys
such as ``$schema`` or ``additionalProperties`` that Google's Gemini
``Schema`` object rejects. This helper preserves the documented Gemini
subset and recursively sanitizes nested ``properties`` / ``items`` /
``anyOf`` definitions.
"""
if not isinstance(schema, dict):
return {}
cleaned: Dict[str, Any] = {}
for key, value in schema.items():
if key not in _GEMINI_SCHEMA_ALLOWED_KEYS:
continue
if key == "properties":
if not isinstance(value, dict):
continue
props: Dict[str, Any] = {}
for prop_name, prop_schema in value.items():
if not isinstance(prop_name, str):
continue
props[prop_name] = sanitize_gemini_schema(prop_schema)
cleaned[key] = props
continue
if key == "items":
cleaned[key] = sanitize_gemini_schema(value)
continue
if key == "anyOf":
if not isinstance(value, list):
continue
cleaned[key] = [
sanitize_gemini_schema(item)
for item in value
if isinstance(item, dict)
]
continue
cleaned[key] = value
return cleaned
def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]:
"""Normalize tool parameters to a valid Gemini object schema."""
cleaned = sanitize_gemini_schema(parameters)
if not cleaned:
return {"type": "object", "properties": {}}
return cleaned
-453
View File
@@ -1,453 +0,0 @@
"""Google Code Assist API client — project discovery, onboarding, quota.
The Code Assist API powers Google's official gemini-cli. It sits at
``cloudcode-pa.googleapis.com`` and provides:
- Free tier access (generous daily quota) for personal Google accounts
- Paid tier access via GCP projects with billing / Workspace / Standard / Enterprise
This module handles the control-plane dance needed before inference:
1. ``load_code_assist()`` probe the user's account to learn what tier they're on
and whether a ``cloudaicompanionProject`` is already assigned.
2. ``onboard_user()`` if the user hasn't been onboarded yet (new account, fresh
free tier, etc.), call this with the chosen tier + project id. Supports LRO
polling for slow provisioning.
3. ``retrieve_user_quota()`` fetch the ``buckets[]`` array showing remaining
quota per model, used by the ``/gquota`` slash command.
VPC-SC handling: enterprise accounts under a VPC Service Controls perimeter
will get ``SECURITY_POLICY_VIOLATED`` on ``load_code_assist``. We catch this
and force the account to ``standard-tier`` so the call chain still succeeds.
Derived from opencode-gemini-auth (MIT) and clawdbot/extensions/google. The
request/response shapes are specific to Google's internal Code Assist API,
documented nowhere public we copy them from the reference implementations.
"""
from __future__ import annotations
import json
import logging
import os
import time
import urllib.error
import urllib.parse
import urllib.request
import uuid
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# =============================================================================
# Constants
# =============================================================================
CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"
# Fallback endpoints tried when prod returns an error during project discovery
FALLBACK_ENDPOINTS = [
"https://daily-cloudcode-pa.sandbox.googleapis.com",
"https://autopush-cloudcode-pa.sandbox.googleapis.com",
]
# Tier identifiers that Google's API uses
FREE_TIER_ID = "free-tier"
LEGACY_TIER_ID = "legacy-tier"
STANDARD_TIER_ID = "standard-tier"
# Default HTTP headers matching gemini-cli's fingerprint.
# Google may reject unrecognized User-Agents on these internal endpoints.
_GEMINI_CLI_USER_AGENT = "google-api-nodejs-client/9.15.1 (gzip)"
_X_GOOG_API_CLIENT = "gl-node/24.0.0"
_DEFAULT_REQUEST_TIMEOUT = 30.0
_ONBOARDING_POLL_ATTEMPTS = 12
_ONBOARDING_POLL_INTERVAL_SECONDS = 5.0
class CodeAssistError(RuntimeError):
"""Exception raised by the Code Assist (``cloudcode-pa``) integration.
Carries HTTP status / response / retry-after metadata so the agent's
``error_classifier._extract_status_code`` and the main loop's Retry-After
handling (which walks ``error.response.headers``) pick up the right
signals. Without these, 429s from the OAuth path look like opaque
``RuntimeError`` and skip the rate-limit path.
"""
def __init__(
self,
message: str,
*,
code: str = "code_assist_error",
status_code: Optional[int] = None,
response: Any = None,
retry_after: Optional[float] = None,
details: Optional[Dict[str, Any]] = None,
) -> None:
super().__init__(message)
self.code = code
# ``status_code`` is picked up by ``agent.error_classifier._extract_status_code``
# so a 429 from Code Assist classifies as FailoverReason.rate_limit and
# triggers the main loop's fallback_providers chain the same way SDK
# errors do.
self.status_code = status_code
# ``response`` is the underlying ``httpx.Response`` (or a shim with a
# ``.headers`` mapping and ``.json()`` method). The main loop reads
# ``error.response.headers["Retry-After"]`` to honor Google's retry
# hints when the backend throttles us.
self.response = response
# Parsed ``Retry-After`` seconds (kept separately for convenience —
# Google returns retry hints in both the header and the error body's
# ``google.rpc.RetryInfo`` details, and we pick whichever we found).
self.retry_after = retry_after
# Parsed structured error details from the Google error envelope
# (e.g. ``{"reason": "MODEL_CAPACITY_EXHAUSTED", "status": "RESOURCE_EXHAUSTED"}``).
# Useful for logging and for tests that want to assert on specifics.
self.details = details or {}
class ProjectIdRequiredError(CodeAssistError):
def __init__(self, message: str = "GCP project id required for this tier") -> None:
super().__init__(message, code="code_assist_project_id_required")
# =============================================================================
# HTTP primitive (auth via Bearer token passed per-call)
# =============================================================================
def _build_headers(access_token: str, *, user_agent_model: str = "") -> Dict[str, str]:
ua = _GEMINI_CLI_USER_AGENT
if user_agent_model:
ua = f"{ua} model/{user_agent_model}"
return {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {access_token}",
"User-Agent": ua,
"X-Goog-Api-Client": _X_GOOG_API_CLIENT,
"x-activity-request-id": str(uuid.uuid4()),
}
def _client_metadata() -> Dict[str, str]:
"""Match Google's gemini-cli exactly — unrecognized metadata may be rejected."""
return {
"ideType": "IDE_UNSPECIFIED",
"platform": "PLATFORM_UNSPECIFIED",
"pluginType": "GEMINI",
}
def _post_json(
url: str,
body: Dict[str, Any],
access_token: str,
*,
timeout: float = _DEFAULT_REQUEST_TIMEOUT,
user_agent_model: str = "",
) -> Dict[str, Any]:
data = json.dumps(body).encode("utf-8")
request = urllib.request.Request(
url, data=data, method="POST",
headers=_build_headers(access_token, user_agent_model=user_agent_model),
)
try:
with urllib.request.urlopen(request, timeout=timeout) as response:
raw = response.read().decode("utf-8", errors="replace")
return json.loads(raw) if raw else {}
except urllib.error.HTTPError as exc:
detail = ""
try:
detail = exc.read().decode("utf-8", errors="replace")
except Exception:
pass
# Special case: VPC-SC violation should be distinguishable
if _is_vpc_sc_violation(detail):
raise CodeAssistError(
f"VPC-SC policy violation: {detail}",
code="code_assist_vpc_sc",
) from exc
raise CodeAssistError(
f"Code Assist HTTP {exc.code}: {detail or exc.reason}",
code=f"code_assist_http_{exc.code}",
) from exc
except urllib.error.URLError as exc:
raise CodeAssistError(
f"Code Assist request failed: {exc}",
code="code_assist_network_error",
) from exc
def _is_vpc_sc_violation(body: str) -> bool:
"""Detect a VPC Service Controls violation from a response body."""
if not body:
return False
try:
parsed = json.loads(body)
except (json.JSONDecodeError, ValueError):
return "SECURITY_POLICY_VIOLATED" in body
# Walk the nested error structure Google uses
error = parsed.get("error") if isinstance(parsed, dict) else None
if not isinstance(error, dict):
return False
details = error.get("details") or []
if isinstance(details, list):
for item in details:
if isinstance(item, dict):
reason = item.get("reason") or ""
if reason == "SECURITY_POLICY_VIOLATED":
return True
msg = str(error.get("message", ""))
return "SECURITY_POLICY_VIOLATED" in msg
# =============================================================================
# load_code_assist — discovers current tier + assigned project
# =============================================================================
@dataclass
class CodeAssistProjectInfo:
"""Result from ``load_code_assist``."""
current_tier_id: str = ""
cloudaicompanion_project: str = "" # Google-managed project (free tier)
allowed_tiers: List[str] = field(default_factory=list)
raw: Dict[str, Any] = field(default_factory=dict)
def load_code_assist(
access_token: str,
*,
project_id: str = "",
user_agent_model: str = "",
) -> CodeAssistProjectInfo:
"""Call ``POST /v1internal:loadCodeAssist`` with prod → sandbox fallback.
Returns whatever tier + project info Google reports. On VPC-SC violations,
returns a synthetic ``standard-tier`` result so the chain can continue.
"""
body: Dict[str, Any] = {
"metadata": {
"duetProject": project_id,
**_client_metadata(),
},
}
if project_id:
body["cloudaicompanionProject"] = project_id
endpoints = [CODE_ASSIST_ENDPOINT] + FALLBACK_ENDPOINTS
last_err: Optional[Exception] = None
for endpoint in endpoints:
url = f"{endpoint}/v1internal:loadCodeAssist"
try:
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
return _parse_load_response(resp)
except CodeAssistError as exc:
if exc.code == "code_assist_vpc_sc":
logger.info("VPC-SC violation on %s — defaulting to standard-tier", endpoint)
return CodeAssistProjectInfo(
current_tier_id=STANDARD_TIER_ID,
cloudaicompanion_project=project_id,
)
last_err = exc
logger.warning("loadCodeAssist failed on %s: %s", endpoint, exc)
continue
if last_err:
raise last_err
return CodeAssistProjectInfo()
def _parse_load_response(resp: Dict[str, Any]) -> CodeAssistProjectInfo:
current_tier = resp.get("currentTier") or {}
tier_id = str(current_tier.get("id") or "") if isinstance(current_tier, dict) else ""
project = str(resp.get("cloudaicompanionProject") or "")
allowed = resp.get("allowedTiers") or []
allowed_ids: List[str] = []
if isinstance(allowed, list):
for t in allowed:
if isinstance(t, dict):
tid = str(t.get("id") or "")
if tid:
allowed_ids.append(tid)
return CodeAssistProjectInfo(
current_tier_id=tier_id,
cloudaicompanion_project=project,
allowed_tiers=allowed_ids,
raw=resp,
)
# =============================================================================
# onboard_user — provisions a new user on a tier (with LRO polling)
# =============================================================================
def onboard_user(
access_token: str,
*,
tier_id: str,
project_id: str = "",
user_agent_model: str = "",
) -> Dict[str, Any]:
"""Call ``POST /v1internal:onboardUser`` to provision the user.
For paid tiers, ``project_id`` is REQUIRED (raises ProjectIdRequiredError).
For free tiers, ``project_id`` is optional Google will assign one.
Returns the final operation response. Polls ``/v1internal/<name>`` for up
to ``_ONBOARDING_POLL_ATTEMPTS`` × ``_ONBOARDING_POLL_INTERVAL_SECONDS``
(default: 12 × 5s = 1 min).
"""
if tier_id != FREE_TIER_ID and tier_id != LEGACY_TIER_ID and not project_id:
raise ProjectIdRequiredError(
f"Tier {tier_id!r} requires a GCP project id. "
"Set HERMES_GEMINI_PROJECT_ID or GOOGLE_CLOUD_PROJECT."
)
body: Dict[str, Any] = {
"tierId": tier_id,
"metadata": _client_metadata(),
}
if project_id:
body["cloudaicompanionProject"] = project_id
endpoint = CODE_ASSIST_ENDPOINT
url = f"{endpoint}/v1internal:onboardUser"
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
# Poll if LRO (long-running operation)
if not resp.get("done"):
op_name = resp.get("name", "")
if not op_name:
return resp
for attempt in range(_ONBOARDING_POLL_ATTEMPTS):
time.sleep(_ONBOARDING_POLL_INTERVAL_SECONDS)
poll_url = f"{endpoint}/v1internal/{op_name}"
try:
poll_resp = _post_json(poll_url, {}, access_token, user_agent_model=user_agent_model)
except CodeAssistError as exc:
logger.warning("Onboarding poll attempt %d failed: %s", attempt + 1, exc)
continue
if poll_resp.get("done"):
return poll_resp
logger.warning("Onboarding did not complete within %d attempts", _ONBOARDING_POLL_ATTEMPTS)
return resp
# =============================================================================
# retrieve_user_quota — for /gquota
# =============================================================================
@dataclass
class QuotaBucket:
model_id: str
token_type: str = ""
remaining_fraction: float = 0.0
reset_time_iso: str = ""
raw: Dict[str, Any] = field(default_factory=dict)
def retrieve_user_quota(
access_token: str,
*,
project_id: str = "",
user_agent_model: str = "",
) -> List[QuotaBucket]:
"""Call ``POST /v1internal:retrieveUserQuota`` and parse ``buckets[]``."""
body: Dict[str, Any] = {}
if project_id:
body["project"] = project_id
url = f"{CODE_ASSIST_ENDPOINT}/v1internal:retrieveUserQuota"
resp = _post_json(url, body, access_token, user_agent_model=user_agent_model)
raw_buckets = resp.get("buckets") or []
buckets: List[QuotaBucket] = []
if not isinstance(raw_buckets, list):
return buckets
for b in raw_buckets:
if not isinstance(b, dict):
continue
buckets.append(QuotaBucket(
model_id=str(b.get("modelId") or ""),
token_type=str(b.get("tokenType") or ""),
remaining_fraction=float(b.get("remainingFraction") or 0.0),
reset_time_iso=str(b.get("resetTime") or ""),
raw=b,
))
return buckets
# =============================================================================
# Project context resolution
# =============================================================================
@dataclass
class ProjectContext:
"""Resolved state for a given OAuth session."""
project_id: str = "" # effective project id sent on requests
managed_project_id: str = "" # Google-assigned project (free tier)
tier_id: str = ""
source: str = "" # "env", "config", "discovered", "onboarded"
def resolve_project_context(
access_token: str,
*,
configured_project_id: str = "",
env_project_id: str = "",
user_agent_model: str = "",
) -> ProjectContext:
"""Figure out what project id + tier to use for requests.
Priority:
1. If configured_project_id or env_project_id is set, use that directly
and short-circuit (no discovery needed).
2. Otherwise call loadCodeAssist to see what Google says.
3. If no tier assigned yet, onboard the user (free tier default).
"""
# Short-circuit: caller provided a project id
if configured_project_id:
return ProjectContext(
project_id=configured_project_id,
tier_id=STANDARD_TIER_ID, # assume paid since they specified one
source="config",
)
if env_project_id:
return ProjectContext(
project_id=env_project_id,
tier_id=STANDARD_TIER_ID,
source="env",
)
# Discover via loadCodeAssist
info = load_code_assist(access_token, user_agent_model=user_agent_model)
effective_project = info.cloudaicompanion_project
tier = info.current_tier_id
if not tier:
# User hasn't been onboarded — provision them on free tier
onboard_resp = onboard_user(
access_token,
tier_id=FREE_TIER_ID,
project_id="",
user_agent_model=user_agent_model,
)
# Re-parse from the onboard response
response_body = onboard_resp.get("response") or {}
if isinstance(response_body, dict):
effective_project = (
effective_project
or str(response_body.get("cloudaicompanionProject") or "")
)
tier = FREE_TIER_ID
source = "onboarded"
else:
source = "discovered"
return ProjectContext(
project_id=effective_project,
managed_project_id=effective_project if tier == FREE_TIER_ID else "",
tier_id=tier,
source=source,
)
File diff suppressed because it is too large Load Diff
+26 -167
View File
@@ -124,7 +124,6 @@ class InsightsEngine:
# Gather raw data
sessions = self._get_sessions(cutoff, source)
tool_usage = self._get_tool_usage(cutoff, source)
skill_usage = self._get_skill_usage(cutoff, source)
message_stats = self._get_message_stats(cutoff, source)
if not sessions:
@@ -136,15 +135,6 @@ class InsightsEngine:
"models": [],
"platforms": [],
"tools": [],
"skills": {
"summary": {
"total_skill_loads": 0,
"total_skill_edits": 0,
"total_skill_actions": 0,
"distinct_skills_used": 0,
},
"top_skills": [],
},
"activity": {},
"top_sessions": [],
}
@@ -154,7 +144,6 @@ class InsightsEngine:
models = self._compute_model_breakdown(sessions)
platforms = self._compute_platform_breakdown(sessions)
tools = self._compute_tool_breakdown(tool_usage)
skills = self._compute_skill_breakdown(skill_usage)
activity = self._compute_activity_patterns(sessions)
top_sessions = self._compute_top_sessions(sessions)
@@ -167,7 +156,6 @@ class InsightsEngine:
"models": models,
"platforms": platforms,
"tools": tools,
"skills": skills,
"activity": activity,
"top_sessions": top_sessions,
}
@@ -296,82 +284,6 @@ class InsightsEngine:
for name, count in tool_counts.most_common()
]
def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
"""Extract per-skill usage from assistant tool calls."""
skill_counts: Dict[str, Dict[str, Any]] = {}
if source:
cursor = self._conn.execute(
"""SELECT m.tool_calls, m.timestamp
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ? AND s.source = ?
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
(cutoff, source),
)
else:
cursor = self._conn.execute(
"""SELECT m.tool_calls, m.timestamp
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE s.started_at >= ?
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
(cutoff,),
)
for row in cursor.fetchall():
try:
calls = row["tool_calls"]
if isinstance(calls, str):
calls = json.loads(calls)
if not isinstance(calls, list):
continue
except (json.JSONDecodeError, TypeError):
continue
timestamp = row["timestamp"]
for call in calls:
if not isinstance(call, dict):
continue
func = call.get("function", {})
tool_name = func.get("name")
if tool_name not in {"skill_view", "skill_manage"}:
continue
args = func.get("arguments")
if isinstance(args, str):
try:
args = json.loads(args)
except (json.JSONDecodeError, TypeError):
continue
if not isinstance(args, dict):
continue
skill_name = args.get("name")
if not isinstance(skill_name, str) or not skill_name.strip():
continue
entry = skill_counts.setdefault(
skill_name,
{
"skill": skill_name,
"view_count": 0,
"manage_count": 0,
"last_used_at": None,
},
)
if tool_name == "skill_view":
entry["view_count"] += 1
else:
entry["manage_count"] += 1
if timestamp is not None and (
entry["last_used_at"] is None or timestamp > entry["last_used_at"]
):
entry["last_used_at"] = timestamp
return list(skill_counts.values())
def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
"""Get aggregate message statistics."""
if source:
@@ -563,46 +475,6 @@ class InsightsEngine:
})
return result
def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
"""Process per-skill usage into summary + ranked list."""
total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
total_skill_actions = total_skill_loads + total_skill_edits
top_skills = []
for skill in skill_usage:
total_count = skill["view_count"] + skill["manage_count"]
percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
top_skills.append({
"skill": skill["skill"],
"view_count": skill["view_count"],
"manage_count": skill["manage_count"],
"total_count": total_count,
"percentage": percentage,
"last_used_at": skill.get("last_used_at"),
})
top_skills.sort(
key=lambda s: (
s["total_count"],
s["view_count"],
s["manage_count"],
s["last_used_at"] or 0,
s["skill"],
),
reverse=True,
)
return {
"summary": {
"total_skill_loads": total_skill_loads,
"total_skill_edits": total_skill_edits,
"total_skill_actions": total_skill_actions,
"distinct_skills_used": len(skill_usage),
},
"top_skills": top_skills,
}
def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
"""Analyze activity patterns by day of week and hour."""
day_counts = Counter() # 0=Monday ... 6=Sunday
@@ -762,7 +634,13 @@ class InsightsEngine:
lines.append(f" Sessions: {o['total_sessions']:<12} Messages: {o['total_messages']:,}")
lines.append(f" Tool calls: {o['total_tool_calls']:<12,} User messages: {o['user_messages']:,}")
lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}")
lines.append(f" Total tokens: {o['total_tokens']:,}")
cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
if cache_total > 0:
lines.append(f" Cache read: {o['total_cache_read_tokens']:<12,} Cache write: {o['total_cache_write_tokens']:,}")
cost_str = f"${o['estimated_cost']:.2f}"
if o.get("models_without_pricing"):
cost_str += " *"
lines.append(f" Total tokens: {o['total_tokens']:<12,} Est. cost: {cost_str}")
if o["total_hours"] > 0:
lines.append(f" Active time: ~{_format_duration(o['total_hours'] * 3600):<11} Avg session: ~{_format_duration(o['avg_session_duration'])}")
lines.append(f" Avg msgs/session: {o['avg_messages_per_session']:.1f}")
@@ -772,10 +650,16 @@ class InsightsEngine:
if report["models"]:
lines.append(" 🤖 Models Used")
lines.append(" " + "" * 56)
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12}")
lines.append(f" {'Model':<30} {'Sessions':>8} {'Tokens':>12} {'Cost':>8}")
for m in report["models"]:
model_name = m["model"][:28]
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,}")
if m.get("has_pricing"):
cost_cell = f"${m['cost']:>6.2f}"
else:
cost_cell = " N/A"
lines.append(f" {model_name:<30} {m['sessions']:>8} {m['total_tokens']:>12,} {cost_cell}")
if o.get("models_without_pricing"):
lines.append(" * Cost N/A for custom/self-hosted models")
lines.append("")
# Platform breakdown
@@ -798,28 +682,6 @@ class InsightsEngine:
lines.append(f" ... and {len(report['tools']) - 15} more tools")
lines.append("")
# Skill usage
skills = report.get("skills", {})
top_skills = skills.get("top_skills", [])
if top_skills:
lines.append(" 🧠 Top Skills")
lines.append(" " + "" * 56)
lines.append(f" {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
for skill in top_skills[:10]:
last_used = ""
if skill.get("last_used_at"):
last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
lines.append(
f" {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
)
summary = skills.get("summary", {})
lines.append(
f" Distinct skills: {summary.get('distinct_skills_used', 0)} "
f"Loads: {summary.get('total_skill_loads', 0):,} "
f"Edits: {summary.get('total_skill_edits', 0):,}"
)
lines.append("")
# Activity patterns
act = report.get("activity", {})
if act.get("by_day"):
@@ -877,7 +739,15 @@ class InsightsEngine:
# Overview
lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
if cache_total > 0:
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
else:
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
cost_note = ""
if o.get("models_without_pricing"):
cost_note = " _(excludes custom/self-hosted models)_"
lines.append(f"**Est. cost:** ${o['estimated_cost']:.2f}{cost_note}")
if o["total_hours"] > 0:
lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
lines.append("")
@@ -886,7 +756,8 @@ class InsightsEngine:
if report["models"]:
lines.append("**🤖 Models:**")
for m in report["models"][:5]:
lines.append(f" {m['model'][:25]}{m['sessions']} sessions, {m['total_tokens']:,} tokens")
cost_str = f"${m['cost']:.2f}" if m.get("has_pricing") else "N/A"
lines.append(f" {m['model'][:25]}{m['sessions']} sessions, {m['total_tokens']:,} tokens, {cost_str}")
lines.append("")
# Platforms (if multi-platform)
@@ -903,18 +774,6 @@ class InsightsEngine:
lines.append(f" {t['tool']}{t['count']:,} calls ({t['percentage']:.1f}%)")
lines.append("")
skills = report.get("skills", {})
if skills.get("top_skills"):
lines.append("**🧠 Top Skills:**")
for skill in skills["top_skills"][:5]:
suffix = ""
if skill.get("last_used_at"):
suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
lines.append(
f" {skill['skill']}{skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
)
lines.append("")
# Activity summary
act = report.get("activity", {})
if act.get("busiest_day") and act.get("busiest_hour"):
+2 -14
View File
@@ -28,7 +28,6 @@ Usage in run_agent.py:
from __future__ import annotations
import json
import logging
import re
from typing import Any, Dict, List, Optional
@@ -44,22 +43,11 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
_FENCE_TAG_RE = re.compile(r'</?\s*memory-context\s*>', re.IGNORECASE)
_INTERNAL_CONTEXT_RE = re.compile(
r'<\s*memory-context\s*>[\s\S]*?</\s*memory-context\s*>',
re.IGNORECASE,
)
_INTERNAL_NOTE_RE = re.compile(
r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
re.IGNORECASE,
)
def sanitize_context(text: str) -> str:
"""Strip fence tags, injected context blocks, and system notes from provider output."""
text = _INTERNAL_CONTEXT_RE.sub('', text)
text = _INTERNAL_NOTE_RE.sub('', text)
text = _FENCE_TAG_RE.sub('', text)
return text
"""Strip fence-escape sequences from provider output."""
return _FENCE_TAG_RE.sub('', text)
def build_memory_context_block(raw_context: str) -> str:
+15 -105
View File
@@ -14,8 +14,6 @@ from urllib.parse import urlparse
import requests
import yaml
from utils import base_url_host_matches, base_url_hostname
from hermes_constants import OPENROUTER_MODELS_URL
logger = logging.getLogger(__name__)
@@ -25,7 +23,7 @@ logger = logging.getLogger(__name__)
# are preserved so the full model name reaches cache lookups and server queries.
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
"qwen-oauth",
"xiaomi",
@@ -35,12 +33,9 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"google", "google-gemini", "google-ai-studio",
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
"ollama",
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"xai", "x-ai", "x.ai", "grok",
"nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
})
@@ -105,8 +100,6 @@ DEFAULT_CONTEXT_LENGTHS = {
# fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
# substring of "anthropic/claude-sonnet-4.6").
# OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
"claude-opus-4-7": 1000000,
"claude-opus-4.7": 1000000,
"claude-opus-4-6": 1000000,
"claude-sonnet-4-6": 1000000,
"claude-opus-4.6": 1000000,
@@ -118,6 +111,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
"gpt-5.3-codex-spark": 128000, # Spark variant has reduced 128k context
"gpt-5.1-chat": 128000, # Chat variant has 128k context
"gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k)
"gpt-4.1": 1047576,
@@ -126,6 +120,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"gemini": 1048576,
# Gemma (open models served via AI Studio)
"gemma-4-31b": 256000,
"gemma-4-26b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek
@@ -159,8 +154,6 @@ DEFAULT_CONTEXT_LENGTHS = {
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
# Kimi
"kimi": 262144,
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
"nemotron": 131072,
# Arcee
"trinity": 262144,
# OpenRouter
@@ -170,7 +163,6 @@ DEFAULT_CONTEXT_LENGTHS = {
"Qwen/Qwen3.5-35B-A3B": 131072,
"deepseek-ai/DeepSeek-V3.2": 65536,
"moonshotai/Kimi-K2.5": 262144,
"moonshotai/Kimi-K2.6": 262144,
"moonshotai/Kimi-K2-Thinking": 262144,
"MiniMaxAI/MiniMax-M2.5": 204800,
"XiaomiMiMo/MiMo-V2-Flash": 256000,
@@ -213,15 +205,8 @@ def _normalize_base_url(base_url: str) -> str:
return (base_url or "").strip().rstrip("/")
def _auth_headers(api_key: str = "") -> Dict[str, str]:
token = str(api_key or "").strip()
if not token:
return {}
return {"Authorization": f"Bearer {token}"}
def _is_openrouter_base_url(base_url: str) -> bool:
return base_url_host_matches(base_url, "openrouter.ai")
return "openrouter.ai" in _normalize_base_url(base_url).lower()
def _is_custom_endpoint(base_url: str) -> bool:
@@ -251,10 +236,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.fireworks.ai": "fireworks",
"opencode.ai": "opencode-go",
"api.x.ai": "xai",
"integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"ollama.com": "ollama-cloud",
}
@@ -319,7 +302,7 @@ def is_local_endpoint(base_url: str) -> bool:
return False
def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
def detect_local_server_type(base_url: str) -> Optional[str]:
"""Detect which local server is running at base_url by probing known endpoints.
Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -331,10 +314,8 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=2.0, headers=headers) as client:
with httpx.Client(timeout=2.0) as client:
# LM Studio exposes /api/v1/models — check first (most specific)
try:
r = client.get(f"{server_url}/api/v1/models")
@@ -521,59 +502,6 @@ def fetch_endpoint_model_metadata(
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
last_error: Optional[Exception] = None
if is_local_endpoint(normalized):
try:
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
response = requests.get(
server_url.rstrip("/") + "/api/v1/models",
headers=headers,
timeout=10,
)
response.raise_for_status()
payload = response.json()
cache: Dict[str, Dict[str, Any]] = {}
for model in payload.get("models", []):
if not isinstance(model, dict):
continue
model_id = model.get("key") or model.get("id")
if not model_id:
continue
entry: Dict[str, Any] = {"name": model.get("name", model_id)}
context_length = None
for inst in model.get("loaded_instances", []) or []:
if not isinstance(inst, dict):
continue
cfg = inst.get("config", {})
ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
if isinstance(ctx, int) and ctx > 0:
context_length = ctx
break
if context_length is None:
context_length = _extract_context_length(model)
if context_length is not None:
entry["context_length"] = context_length
max_completion_tokens = _extract_max_completion_tokens(model)
if max_completion_tokens is not None:
entry["max_completion_tokens"] = max_completion_tokens
pricing = _extract_pricing(model)
if pricing:
entry["pricing"] = pricing
_add_model_aliases(cache, model_id, entry)
alt_id = model.get("id")
if isinstance(alt_id, str) and alt_id and alt_id != model_id:
_add_model_aliases(cache, alt_id, entry)
_endpoint_model_metadata_cache[normalized] = cache
_endpoint_model_metadata_cache_time[normalized] = time.time()
return cache
except Exception as exc:
last_error = exc
for candidate in candidates:
url = candidate.rstrip("/") + "/models"
try:
@@ -780,7 +708,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
return False
def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
"""Query an Ollama server for the model's context length.
Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -798,16 +726,14 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
server_url = server_url[:-3]
try:
server_type = detect_local_server_type(base_url, api_key=api_key)
server_type = detect_local_server_type(base_url)
except Exception:
return None
if server_type != "ollama":
return None
headers = _auth_headers(api_key)
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
with httpx.Client(timeout=3.0) as client:
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
if resp.status_code != 200:
return None
@@ -835,7 +761,7 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
return None
def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
"""Query a local server for the model's context length."""
import httpx
@@ -848,15 +774,13 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
if server_url.endswith("/v1"):
server_url = server_url[:-3]
headers = _auth_headers(api_key)
try:
server_type = detect_local_server_type(base_url, api_key=api_key)
server_type = detect_local_server_type(base_url)
except Exception:
server_type = None
try:
with httpx.Client(timeout=3.0, headers=headers) as client:
with httpx.Client(timeout=3.0) as client:
# Ollama: /api/show returns model details with context info
if server_type == "ollama":
resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -1067,7 +991,7 @@ def get_model_context_length(
if not _is_known_provider_base_url(base_url):
# 3. Try querying local server directly
if is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
local_ctx = _query_local_context_length(model, base_url)
if local_ctx and local_ctx > 0:
save_context_length(model, base_url, local_ctx)
return local_ctx
@@ -1081,26 +1005,12 @@ def get_model_context_length(
# 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
if provider == "anthropic" or (
base_url and base_url_hostname(base_url) == "api.anthropic.com"
base_url and "api.anthropic.com" in base_url
):
ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
if ctx:
return ctx
# 4b. AWS Bedrock — use static context length table.
# Bedrock's ListFoundationModels doesn't expose context window sizes,
# so we maintain a curated table in bedrock_adapter.py.
if provider == "bedrock" or (
base_url
and base_url_hostname(base_url).startswith("bedrock-runtime.")
and base_url_host_matches(base_url, "amazonaws.com")
):
try:
from agent.bedrock_adapter import get_bedrock_context_length
return get_bedrock_context_length(model)
except ImportError:
pass # boto3 not installed — fall through to generic resolution
# 5. Provider-aware lookups (before generic OpenRouter cache)
# These are provider-specific and take priority over the generic OR cache,
# since the same model can have different context limits per provider
@@ -1141,7 +1051,7 @@ def get_model_context_length(
# 9. Query local server as last resort
if base_url and is_local_endpoint(base_url):
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
local_ctx = _query_local_context_length(model, base_url)
if local_ctx and local_ctx > 0:
save_context_length(model, base_url, local_ctx)
return local_ctx
+3 -44
View File
@@ -169,7 +169,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
"togetherai": "togetherai",
"perplexity": "perplexity",
"cohere": "cohere",
"ollama-cloud": "ollama-cloud",
}
# Reverse mapping: models.dev → Hermes (built lazily)
@@ -420,10 +419,7 @@ def list_provider_models(provider: str) -> List[str]:
models = _get_provider_models(provider)
if models is None:
return []
return [
mid for mid in models.keys()
if not _should_hide_from_provider_catalog(provider, mid)
]
return list(models.keys())
# Patterns that indicate non-agentic or noise models (TTS, embedding,
@@ -435,43 +431,6 @@ _NOISE_PATTERNS: re.Pattern = re.compile(
re.IGNORECASE,
)
# Google's live Gemini catalogs currently include a mix of stale slugs and
# Gemma models whose TPM quotas are too small for normal Hermes agent traffic.
# Keep capability metadata available for direct/manual use, but hide these from
# the Gemini model catalogs we surface in setup and model selection.
_GOOGLE_HIDDEN_MODELS = frozenset({
# Low-TPM Gemma models that trip Google input-token quota walls under
# agent-style traffic despite advertising large context windows.
"gemma-4-31b-it",
"gemma-4-26b-it",
"gemma-4-26b-a4b-it",
"gemma-3-1b",
"gemma-3-1b-it",
"gemma-3-2b",
"gemma-3-2b-it",
"gemma-3-4b",
"gemma-3-4b-it",
"gemma-3-12b",
"gemma-3-12b-it",
"gemma-3-27b",
"gemma-3-27b-it",
# Stale/retired Google slugs that still surface through models.dev-backed
# Gemini selection but 404 on the current Google endpoints.
"gemini-1.5-flash",
"gemini-1.5-pro",
"gemini-1.5-flash-8b",
"gemini-2.0-flash",
"gemini-2.0-flash-lite",
})
def _should_hide_from_provider_catalog(provider: str, model_id: str) -> bool:
provider_lower = (provider or "").strip().lower()
model_lower = (model_id or "").strip().lower()
if provider_lower in {"gemini", "google"} and model_lower in _GOOGLE_HIDDEN_MODELS:
return True
return False
def list_agentic_models(provider: str) -> List[str]:
"""Return model IDs suitable for agentic use from models.dev.
@@ -488,8 +447,6 @@ def list_agentic_models(provider: str) -> List[str]:
for mid, entry in models.items():
if not isinstance(entry, dict):
continue
if _should_hide_from_provider_catalog(provider, mid):
continue
if not entry.get("tool_call", False):
continue
if _NOISE_PATTERNS.search(mid):
@@ -624,3 +581,5 @@ def get_model_info(
return _parse_model_info(mid, mdata, mdev_id)
return None
-182
View File
@@ -1,182 +0,0 @@
"""Cross-session rate limit guard for Nous Portal.
Writes rate limit state to a shared file so all sessions (CLI, gateway,
cron, auxiliary) can check whether Nous Portal is currently rate-limited
before making requests. Prevents retry amplification when RPH is tapped.
Each 429 from Nous triggers up to 9 API calls per conversation turn
(3 SDK retries x 3 Hermes retries), and every one of those calls counts
against RPH. By recording the rate limit state on first 429 and checking
it before subsequent attempts, we eliminate the amplification effect.
"""
from __future__ import annotations
import json
import logging
import os
import tempfile
import time
from typing import Any, Mapping, Optional
logger = logging.getLogger(__name__)
_STATE_SUBDIR = "rate_limits"
_STATE_FILENAME = "nous.json"
def _state_path() -> str:
"""Return the path to the Nous rate limit state file."""
try:
from hermes_constants import get_hermes_home
base = get_hermes_home()
except ImportError:
base = os.path.join(os.path.expanduser("~"), ".hermes")
return os.path.join(base, _STATE_SUBDIR, _STATE_FILENAME)
def _parse_reset_seconds(headers: Optional[Mapping[str, str]]) -> Optional[float]:
"""Extract the best available reset-time estimate from response headers.
Priority:
1. x-ratelimit-reset-requests-1h (hourly RPH window most useful)
2. x-ratelimit-reset-requests (per-minute RPM window)
3. retry-after (generic HTTP header)
Returns seconds-from-now, or None if no usable header found.
"""
if not headers:
return None
lowered = {k.lower(): v for k, v in headers.items()}
for key in (
"x-ratelimit-reset-requests-1h",
"x-ratelimit-reset-requests",
"retry-after",
):
raw = lowered.get(key)
if raw is not None:
try:
val = float(raw)
if val > 0:
return val
except (TypeError, ValueError):
pass
return None
def record_nous_rate_limit(
*,
headers: Optional[Mapping[str, str]] = None,
error_context: Optional[dict[str, Any]] = None,
default_cooldown: float = 300.0,
) -> None:
"""Record that Nous Portal is rate-limited.
Parses the reset time from response headers or error context.
Falls back to ``default_cooldown`` (5 minutes) if no reset info
is available. Writes to a shared file that all sessions can read.
Args:
headers: HTTP response headers from the 429 error.
error_context: Structured error context from _extract_api_error_context().
default_cooldown: Fallback cooldown in seconds when no header data.
"""
now = time.time()
reset_at = None
# Try headers first (most accurate)
header_seconds = _parse_reset_seconds(headers)
if header_seconds is not None:
reset_at = now + header_seconds
# Try error_context reset_at (from body parsing)
if reset_at is None and isinstance(error_context, dict):
ctx_reset = error_context.get("reset_at")
if isinstance(ctx_reset, (int, float)) and ctx_reset > now:
reset_at = float(ctx_reset)
# Default cooldown
if reset_at is None:
reset_at = now + default_cooldown
path = _state_path()
try:
state_dir = os.path.dirname(path)
os.makedirs(state_dir, exist_ok=True)
state = {
"reset_at": reset_at,
"recorded_at": now,
"reset_seconds": reset_at - now,
}
# Atomic write: write to temp file + rename
fd, tmp_path = tempfile.mkstemp(dir=state_dir, suffix=".tmp")
try:
with os.fdopen(fd, "w") as f:
json.dump(state, f)
os.replace(tmp_path, path)
except Exception:
# Clean up temp file on failure
try:
os.unlink(tmp_path)
except OSError:
pass
raise
logger.info(
"Nous rate limit recorded: resets in %.0fs (at %.0f)",
reset_at - now, reset_at,
)
except Exception as exc:
logger.debug("Failed to write Nous rate limit state: %s", exc)
def nous_rate_limit_remaining() -> Optional[float]:
"""Check if Nous Portal is currently rate-limited.
Returns:
Seconds remaining until reset, or None if not rate-limited.
"""
path = _state_path()
try:
with open(path) as f:
state = json.load(f)
reset_at = state.get("reset_at", 0)
remaining = reset_at - time.time()
if remaining > 0:
return remaining
# Expired — clean up
try:
os.unlink(path)
except OSError:
pass
return None
except (FileNotFoundError, json.JSONDecodeError, KeyError, TypeError):
return None
def clear_nous_rate_limit() -> None:
"""Clear the rate limit state (e.g., after a successful Nous request)."""
try:
os.unlink(_state_path())
except FileNotFoundError:
pass
except OSError as exc:
logger.debug("Failed to clear Nous rate limit state: %s", exc)
def format_remaining(seconds: float) -> str:
"""Format seconds remaining into human-readable duration."""
s = max(0, int(seconds))
if s < 60:
return f"{s}s"
if s < 3600:
m, sec = divmod(s, 60)
return f"{m}m {sec}s" if sec else f"{m}m"
h, remainder = divmod(s, 3600)
m = remainder // 60
return f"{h}h {m}m" if m else f"{h}h"
+10 -19
View File
@@ -152,13 +152,7 @@ MEMORY_GUIDANCE = (
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
"state to memory; use session_search to recall those from past transcripts. "
"If you've discovered a new way to do something, solved a problem that could be "
"necessary later, save it as a skill with the skill tool.\n"
"Write memories as declarative facts, not instructions to yourself. "
"'User prefers concise responses' ✓ — 'Always respond concisely' ✗. "
"'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. "
"Imperative phrasing gets re-read as a directive in later sessions and can "
"cause repeated work or override the user's current request. Procedures and "
"workflows belong in skills, not memory."
"necessary later, save it as a skill with the skill tool."
)
SESSION_SEARCH_GUIDANCE = (
@@ -301,9 +295,7 @@ PLATFORM_HINTS = {
),
"telegram": (
"You are on a text messaging communication platform, Telegram. "
"Standard markdown is automatically converted to Telegram format. "
"Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
"`inline code`, ```code blocks```, [links](url), and ## headers. "
"Please do not use markdown as it does not render. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. Images "
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -619,14 +611,12 @@ def build_skills_system_prompt(
or get_session_env("HERMES_SESSION_PLATFORM")
or ""
)
disabled = get_disabled_skill_names()
cache_key = (
str(skills_dir.resolve()),
tuple(str(d) for d in external_dirs),
tuple(sorted(str(t) for t in (available_tools or set()))),
tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
_platform_hint,
tuple(sorted(disabled)),
)
with _SKILLS_PROMPT_CACHE_LOCK:
cached = _SKILLS_PROMPT_CACHE.get(cache_key)
@@ -634,6 +624,8 @@ def build_skills_system_prompt(
_SKILLS_PROMPT_CACHE.move_to_end(cache_key)
return cached
disabled = get_disabled_skill_names()
# ── Layer 2: disk snapshot ────────────────────────────────────────
snapshot = _load_skills_snapshot(skills_dir)
@@ -660,7 +652,7 @@ def build_skills_system_prompt(
):
continue
skills_by_category.setdefault(category, []).append(
(frontmatter_name, entry.get("description", ""))
(skill_name, entry.get("description", ""))
)
category_descriptions = {
str(k): str(v)
@@ -685,7 +677,7 @@ def build_skills_system_prompt(
):
continue
skills_by_category.setdefault(entry["category"], []).append(
(entry["frontmatter_name"], entry["description"])
(skill_name, entry["description"])
)
# Read category-level DESCRIPTION.md files
@@ -728,10 +720,9 @@ def build_skills_system_prompt(
continue
entry = _build_snapshot_entry(skill_file, ext_dir, frontmatter, desc)
skill_name = entry["skill_name"]
frontmatter_name = entry["frontmatter_name"]
if frontmatter_name in seen_skill_names:
if skill_name in seen_skill_names:
continue
if frontmatter_name in disabled or skill_name in disabled:
if entry["frontmatter_name"] in disabled or skill_name in disabled:
continue
if not _skill_should_show(
extract_skill_conditions(frontmatter),
@@ -739,9 +730,9 @@ def build_skills_system_prompt(
available_toolsets,
):
continue
seen_skill_names.add(frontmatter_name)
seen_skill_names.add(skill_name)
skills_by_category.setdefault(entry["category"], []).append(
(frontmatter_name, entry["description"])
(skill_name, entry["description"])
)
except Exception as e:
logger.debug("Error reading external skill %s: %s", skill_file, e)
-159
View File
@@ -13,48 +13,6 @@ import re
logger = logging.getLogger(__name__)
# Sensitive query-string parameter names (case-insensitive exact match).
# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
_SENSITIVE_QUERY_PARAMS = frozenset({
"access_token",
"refresh_token",
"id_token",
"token",
"api_key",
"apikey",
"client_secret",
"password",
"auth",
"jwt",
"session",
"secret",
"key",
"code", # OAuth authorization codes
"signature", # pre-signed URL signatures
"x-amz-signature",
})
# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
# Ported from nearai/ironclaw#2529.
_SENSITIVE_BODY_KEYS = frozenset({
"access_token",
"refresh_token",
"id_token",
"token",
"api_key",
"apikey",
"client_secret",
"password",
"auth",
"jwt",
"secret",
"private_key",
"authorization",
"key",
})
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -135,45 +93,10 @@ _DB_CONNSTR_RE = re.compile(
re.IGNORECASE,
)
# JWT tokens: header.payload[.signature] — always start with "eyJ" (base64 for "{")
# Matches 1-part (header only), 2-part (header.payload), and full 3-part JWTs.
_JWT_RE = re.compile(
r"eyJ[A-Za-z0-9_-]{10,}" # Header (always starts with eyJ)
r"(?:\.[A-Za-z0-9_=-]{4,}){0,2}" # Optional payload and/or signature
)
# Discord user/role mentions: <@123456789012345678> or <@!123456789012345678>
# Snowflake IDs are 17-20 digit integers that resolve to specific Discord accounts.
_DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
# E.164 phone numbers: +<country><number>, 7-15 digits
# Negative lookahead prevents matching hex strings or identifiers
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
# URLs containing query strings — matches `scheme://...?...[# or end]`.
# Used to scan text for URLs whose query params may contain secrets.
# Ported from nearai/ironclaw#2529.
_URL_WITH_QUERY_RE = re.compile(
r"(https?|wss?|ftp)://" # scheme
r"([^\s/?#]+)" # authority (may include userinfo)
r"([^\s?#]*)" # path
r"\?([^\s#]+)" # query (required)
r"(#\S*)?", # optional fragment
)
# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
# (not just DB protocols already covered by _DB_CONNSTR_RE above).
# Catches things like `https://user:token@api.example.com/v1/foo`.
_URL_USERINFO_RE = re.compile(
r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
)
# Form-urlencoded body detection: conservative — only applies when the entire
# text looks like a query string (k=v&k=v pattern with no newlines).
_FORM_BODY_RE = re.compile(
r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
)
# Compile known prefix patterns into one alternation
_PREFIX_RE = re.compile(
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -187,72 +110,6 @@ def _mask_token(token: str) -> str:
return f"{token[:6]}...{token[-4:]}"
def _redact_query_string(query: str) -> str:
"""Redact sensitive parameter values in a URL query string.
Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
replaced with `***`. Non-sensitive keys pass through unchanged.
Empty or malformed pairs are preserved as-is.
"""
if not query:
return query
parts = []
for pair in query.split("&"):
if "=" not in pair:
parts.append(pair)
continue
key, _, value = pair.partition("=")
if key.lower() in _SENSITIVE_QUERY_PARAMS:
parts.append(f"{key}=***")
else:
parts.append(pair)
return "&".join(parts)
def _redact_url_query_params(text: str) -> str:
"""Scan text for URLs with query strings and redact sensitive params.
Catches opaque tokens that don't match vendor prefix regexes, e.g.
`https://example.com/cb?code=ABC123&state=xyz` `...?code=***&state=xyz`.
"""
def _sub(m: re.Match) -> str:
scheme = m.group(1)
authority = m.group(2)
path = m.group(3)
query = _redact_query_string(m.group(4))
fragment = m.group(5) or ""
return f"{scheme}://{authority}{path}?{query}{fragment}"
return _URL_WITH_QUERY_RE.sub(_sub, text)
def _redact_url_userinfo(text: str) -> str:
"""Strip `user:password@` from HTTP/WS/FTP URLs.
DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
separately by `_DB_CONNSTR_RE`.
"""
return _URL_USERINFO_RE.sub(
lambda m: f"{m.group(1)}://{m.group(2)}:***@",
text,
)
def _redact_form_body(text: str) -> str:
"""Redact sensitive values in a form-urlencoded body.
Only applies when the entire input looks like a pure form body
(k=v&k=v with no newlines, no other text). Single-line non-form
text passes through unchanged. This is a conservative pass the
`_redact_url_query_params` function handles embedded query strings.
"""
if not text or "\n" in text or "&" not in text:
return text
# The body-body form check is strict: only trigger on clean k=v&k=v.
if not _FORM_BODY_RE.match(text.strip()):
return text
return _redact_query_string(text.strip())
def redact_sensitive_text(text: str) -> str:
"""Apply all redaction patterns to a block of text.
@@ -302,22 +159,6 @@ def redact_sensitive_text(text: str) -> str:
# Database connection string passwords
text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text)
# JWT tokens (eyJ... — base64-encoded JSON headers)
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
# DB schemes are handled above by _DB_CONNSTR_RE.
text = _redact_url_userinfo(text)
# URL query params containing opaque tokens (?access_token=…&code=…)
text = _redact_url_query_params(text)
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
text = _redact_form_body(text)
# Discord user/role mentions (<@snowflake_id>)
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
# E.164 phone numbers (Signal, WhatsApp)
def _redact_phone(m):
phone = m.group(1)
-831
View File
@@ -1,831 +0,0 @@
"""
Shell-script hooks bridge.
Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
consent on first use of each ``(event, command)`` pair, and registers
callbacks on the existing plugin hook manager so every existing
``invoke_hook()`` site dispatches to the configured shell scripts with
zero changes to call sites.
Design notes
------------
* Python plugins and shell hooks compose naturally: both flow through
:func:`hermes_cli.plugins.invoke_hook` and its aggregators. Python
plugins are registered first (via ``discover_and_load()``) so their
block decisions win ties over shell-hook blocks.
* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
with ``shell=False`` no shell injection footguns. Users that need
pipes/redirection wrap their logic in a script.
* First-use consent is gated by the allowlist under
``~/.hermes/shell-hooks-allowlist.json``. Non-TTY callers must pass
``accept_hooks=True`` (resolved from ``--accept-hooks``,
``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
for registration to succeed without a prompt.
* Registration is idempotent safe to invoke from both the CLI entry
point (``hermes_cli/main.py``) and the gateway entry point
(``gateway/run.py``).
Wire protocol
-------------
**stdin** (JSON, piped to the script)::
{
"hook_event_name": "pre_tool_call",
"tool_name": "terminal",
"tool_input": {"command": "rm -rf /"},
"session_id": "sess_abc123",
"cwd": "/home/user/project",
"extra": {...} # event-specific kwargs
}
**stdout** (JSON, optional anything else is ignored)::
# Block a pre_tool_call (either shape accepted; normalised internally):
{"decision": "block", "reason": "Forbidden command"} # Claude-Code-style
{"action": "block", "message": "Forbidden command"} # Hermes-canonical
# Inject context for pre_llm_call:
{"context": "Today is Friday"}
# Silent no-op:
<empty or any non-matching JSON object>
"""
from __future__ import annotations
import difflib
import json
import logging
import os
import re
import shlex
import subprocess
import sys
import tempfile
import threading
import time
from contextlib import contextmanager
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
try:
import fcntl # POSIX only; Windows falls back to best-effort without flock.
except ImportError: # pragma: no cover
fcntl = None # type: ignore[assignment]
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
DEFAULT_TIMEOUT_SECONDS = 60
MAX_TIMEOUT_SECONDS = 300
ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
# (event, matcher, command) triples that have been wired to the plugin
# manager in the current process. Matcher is part of the key because
# the same script can legitimately register for different matchers under
# the same event (e.g. one entry per tool the user wants to gate).
# Second registration attempts for the exact same triple become no-ops
# so the CLI and gateway can both call register_from_config() safely.
_registered: Set[Tuple[str, Optional[str], str]] = set()
_registered_lock = threading.Lock()
# Intra-process lock for allowlist read-modify-write on platforms that
# lack ``fcntl`` (non-POSIX). Kept separate from ``_registered_lock``
# because ``register_from_config`` already holds ``_registered_lock`` when
# it triggers ``_record_approval`` — reusing it here would self-deadlock
# (``threading.Lock`` is non-reentrant). POSIX callers use the sibling
# ``.lock`` file via ``fcntl.flock`` and bypass this.
_allowlist_write_lock = threading.Lock()
@dataclass
class ShellHookSpec:
"""Parsed and validated representation of a single ``hooks:`` entry."""
event: str
command: str
matcher: Optional[str] = None
timeout: int = DEFAULT_TIMEOUT_SECONDS
compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
def __post_init__(self) -> None:
# Strip whitespace introduced by YAML quirks (e.g. multi-line string
# folding) — a matcher of " terminal" would otherwise silently fail
# to match "terminal" without any diagnostic.
if isinstance(self.matcher, str):
stripped = self.matcher.strip()
self.matcher = stripped if stripped else None
if self.matcher:
try:
self.compiled_matcher = re.compile(self.matcher)
except re.error as exc:
logger.warning(
"shell hook matcher %r is invalid (%s) — treating as "
"literal equality", self.matcher, exc,
)
self.compiled_matcher = None
def matches_tool(self, tool_name: Optional[str]) -> bool:
if not self.matcher:
return True
if tool_name is None:
return False
if self.compiled_matcher is not None:
return self.compiled_matcher.fullmatch(tool_name) is not None
# compiled_matcher is None only when the regex failed to compile,
# in which case we already warned and fall back to literal equality.
return tool_name == self.matcher
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def register_from_config(
cfg: Optional[Dict[str, Any]],
*,
accept_hooks: bool = False,
) -> List[ShellHookSpec]:
"""Register every configured shell hook on the plugin manager.
``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
output). The ``hooks:`` key is read out of it. Missing, empty, or
non-dict ``hooks`` is treated as zero configured hooks.
``accept_hooks=True`` skips the TTY consent prompt the caller is
promising that the user has opted in via a flag, env var, or config
setting. ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
also honored inside this function so either CLI or gateway call sites
pick them up.
Returns the list of :class:`ShellHookSpec` entries that ended up wired
up on the plugin manager. Skipped entries (unknown events, malformed,
not allowlisted, already registered) are logged but not returned.
"""
if not isinstance(cfg, dict):
return []
effective_accept = _resolve_effective_accept(cfg, accept_hooks)
specs = _parse_hooks_block(cfg.get("hooks"))
if not specs:
return []
registered: List[ShellHookSpec] = []
# Import lazily — avoids circular imports at module-load time.
from hermes_cli.plugins import get_plugin_manager
manager = get_plugin_manager()
# Idempotence + allowlist read happen under the lock; the TTY
# prompt runs outside so other threads aren't parked on a blocking
# input(). Mutation re-takes the lock with a defensive idempotence
# re-check in case two callers ever race through the prompt.
for spec in specs:
key = (spec.event, spec.matcher, spec.command)
with _registered_lock:
if key in _registered:
continue
already_allowlisted = _is_allowlisted(spec.event, spec.command)
if not already_allowlisted:
if not _prompt_and_record(
spec.event, spec.command, accept_hooks=effective_accept,
):
logger.warning(
"shell hook for %s (%s) not allowlisted — skipped. "
"Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
"hooks_auto_accept: true, or approve at the TTY "
"prompt next run.",
spec.event, spec.command,
)
continue
with _registered_lock:
if key in _registered:
continue
manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
_registered.add(key)
registered.append(spec)
logger.info(
"shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
spec.event, spec.command, spec.matcher, spec.timeout,
)
return registered
def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
"""Return the parsed ``ShellHookSpec`` entries from config without
registering anything. Used by ``hermes hooks list`` and ``doctor``."""
if not isinstance(cfg, dict):
return []
return _parse_hooks_block(cfg.get("hooks"))
def reset_for_tests() -> None:
"""Clear the idempotence set. Test-only helper."""
with _registered_lock:
_registered.clear()
# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------
def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
"""Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
Malformed entries warn-and-skip we never raise from config parsing
because a broken hook must not crash the agent.
"""
from hermes_cli.plugins import VALID_HOOKS
if not isinstance(hooks_cfg, dict):
return []
specs: List[ShellHookSpec] = []
for event_name, entries in hooks_cfg.items():
if event_name not in VALID_HOOKS:
suggestion = difflib.get_close_matches(
str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
)
if suggestion:
logger.warning(
"unknown hook event %r in hooks: config — did you mean %r?",
event_name, suggestion[0],
)
else:
logger.warning(
"unknown hook event %r in hooks: config (valid: %s)",
event_name, ", ".join(sorted(VALID_HOOKS)),
)
continue
if entries is None:
continue
if not isinstance(entries, list):
logger.warning(
"hooks.%s must be a list of hook definitions; got %s",
event_name, type(entries).__name__,
)
continue
for i, raw in enumerate(entries):
spec = _parse_single_entry(event_name, i, raw)
if spec is not None:
specs.append(spec)
return specs
def _parse_single_entry(
event: str, index: int, raw: Any,
) -> Optional[ShellHookSpec]:
if not isinstance(raw, dict):
logger.warning(
"hooks.%s[%d] must be a mapping with a 'command' key; got %s",
event, index, type(raw).__name__,
)
return None
command = raw.get("command")
if not isinstance(command, str) or not command.strip():
logger.warning(
"hooks.%s[%d] is missing a non-empty 'command' field",
event, index,
)
return None
matcher = raw.get("matcher")
if matcher is not None and not isinstance(matcher, str):
logger.warning(
"hooks.%s[%d].matcher must be a string regex; ignoring",
event, index,
)
matcher = None
if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
logger.warning(
"hooks.%s[%d].matcher=%r will be ignored at runtime — the "
"matcher field is only honored for pre_tool_call / "
"post_tool_call. The hook will fire on every %s event.",
event, index, matcher, event,
)
matcher = None
timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
try:
timeout = int(timeout_raw)
except (TypeError, ValueError):
logger.warning(
"hooks.%s[%d].timeout must be an int (got %r); using default %ds",
event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
)
timeout = DEFAULT_TIMEOUT_SECONDS
if timeout < 1:
logger.warning(
"hooks.%s[%d].timeout must be >=1; using default %ds",
event, index, DEFAULT_TIMEOUT_SECONDS,
)
timeout = DEFAULT_TIMEOUT_SECONDS
if timeout > MAX_TIMEOUT_SECONDS:
logger.warning(
"hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
event, index, timeout, MAX_TIMEOUT_SECONDS,
)
timeout = MAX_TIMEOUT_SECONDS
return ShellHookSpec(
event=event,
command=command.strip(),
matcher=matcher,
timeout=timeout,
)
# ---------------------------------------------------------------------------
# Subprocess callback
# ---------------------------------------------------------------------------
_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
"""Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
Returns a diagnostic dict with the same keys for every outcome
(``returncode``, ``stdout``, ``stderr``, ``timed_out``,
``elapsed_seconds``, ``error``). This is the single place the
subprocess is actually invoked both the live callback path
(:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
go through it.
"""
result: Dict[str, Any] = {
"returncode": None,
"stdout": "",
"stderr": "",
"timed_out": False,
"elapsed_seconds": 0.0,
"error": None,
}
try:
argv = shlex.split(os.path.expanduser(spec.command))
except ValueError as exc:
result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
return result
if not argv:
result["error"] = "empty command"
return result
t0 = time.monotonic()
try:
proc = subprocess.run(
argv,
input=stdin_json,
capture_output=True,
timeout=spec.timeout,
text=True,
shell=False,
)
except subprocess.TimeoutExpired:
result["timed_out"] = True
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
return result
except FileNotFoundError:
result["error"] = "command not found"
return result
except PermissionError:
result["error"] = "command not executable"
return result
except Exception as exc: # pragma: no cover — defensive
result["error"] = str(exc)
return result
result["returncode"] = proc.returncode
result["stdout"] = proc.stdout or ""
result["stderr"] = proc.stderr or ""
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
return result
def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
"""Build the closure that ``invoke_hook()`` will call per firing."""
def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
# Matcher gate — only meaningful for tool-scoped events.
if spec.event in ("pre_tool_call", "post_tool_call"):
if not spec.matches_tool(kwargs.get("tool_name")):
return None
r = _spawn(spec, _serialize_payload(spec.event, kwargs))
if r["error"]:
logger.warning(
"shell hook failed (event=%s command=%s): %s",
spec.event, spec.command, r["error"],
)
return None
if r["timed_out"]:
logger.warning(
"shell hook timed out after %.2fs (event=%s command=%s)",
r["elapsed_seconds"], spec.event, spec.command,
)
return None
stderr = r["stderr"].strip()
if stderr:
logger.debug(
"shell hook stderr (event=%s command=%s): %s",
spec.event, spec.command, stderr[:400],
)
# Non-zero exits: log but still parse stdout so scripts that
# signal failure via exit code can also return a block directive.
if r["returncode"] != 0:
logger.warning(
"shell hook exited %d (event=%s command=%s); stderr=%s",
r["returncode"], spec.event, spec.command, stderr[:400],
)
return _parse_response(spec.event, r["stdout"])
_callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
_callback.__qualname__ = _callback.__name__
return _callback
def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
"""Render the stdin JSON payload. Unserialisable values are
stringified via ``default=str`` rather than dropped."""
extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
try:
cwd = str(Path.cwd())
except OSError:
cwd = ""
payload = {
"hook_event_name": event,
"tool_name": kwargs.get("tool_name"),
"tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
"session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
"cwd": cwd,
"extra": extras,
}
return json.dumps(payload, ensure_ascii=False, default=str)
def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
"""Translate stdout JSON into a Hermes wire-shape dict.
For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
"reason": "..."}`` payload is translated into the canonical Hermes
``{"action": "block", "message": "..."}`` shape expected by
:func:`hermes_cli.plugins.get_pre_tool_call_block_message`. This is
the single most important correctness invariant in this module
skipping the translation silently breaks every ``pre_tool_call``
block directive.
For ``pre_llm_call``, ``{"context": "..."}`` is passed through
unchanged to match the existing plugin-hook contract.
Anything else returns ``None``.
"""
stdout = (stdout or "").strip()
if not stdout:
return None
try:
data = json.loads(stdout)
except json.JSONDecodeError:
logger.warning(
"shell hook stdout was not valid JSON (event=%s): %s",
event, stdout[:200],
)
return None
if not isinstance(data, dict):
return None
if event == "pre_tool_call":
if data.get("action") == "block":
message = data.get("message") or data.get("reason") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
if data.get("decision") == "block":
message = data.get("reason") or data.get("message") or ""
if isinstance(message, str) and message:
return {"action": "block", "message": message}
return None
context = data.get("context")
if isinstance(context, str) and context.strip():
return {"context": context}
return None
# ---------------------------------------------------------------------------
# Allowlist / consent
# ---------------------------------------------------------------------------
def allowlist_path() -> Path:
"""Path to the per-user shell-hook allowlist file."""
return get_hermes_home() / ALLOWLIST_FILENAME
def load_allowlist() -> Dict[str, Any]:
"""Return the parsed allowlist, or an empty skeleton if absent."""
try:
raw = json.loads(allowlist_path().read_text())
except (FileNotFoundError, json.JSONDecodeError, OSError):
return {"approvals": []}
if not isinstance(raw, dict):
return {"approvals": []}
approvals = raw.get("approvals")
if not isinstance(approvals, list):
raw["approvals"] = []
return raw
def save_allowlist(data: Dict[str, Any]) -> None:
"""Atomically persist the allowlist via per-process ``mkstemp`` +
``os.replace``. Cross-process read-modify-write races are handled
by :func:`_locked_update_approvals` (``fcntl.flock``). On OSError
the failure is logged; the in-process hook still registers but
the approval won't survive across runs."""
p = allowlist_path()
try:
p.parent.mkdir(parents=True, exist_ok=True)
fd, tmp_path = tempfile.mkstemp(
prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
)
try:
with os.fdopen(fd, "w") as fh:
fh.write(json.dumps(data, indent=2, sort_keys=True))
os.replace(tmp_path, p)
except Exception:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
except OSError as exc:
logger.warning(
"Failed to persist shell hook allowlist to %s: %s. "
"The approval is in-memory for this run, but the next "
"startup will re-prompt (or skip registration on non-TTY "
"runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
p, exc,
)
def _is_allowlisted(event: str, command: str) -> bool:
data = load_allowlist()
return any(
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
for e in data.get("approvals", [])
)
@contextmanager
def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
"""Serialise read-modify-write on the allowlist across processes.
Holds an exclusive ``flock`` on a sibling lock file for the duration
of the update so concurrent ``_record_approval``/``revoke`` callers
cannot clobber each other's changes (the race Codex reproduced with
2050 simultaneous writers). Falls back to an in-process lock on
platforms without ``fcntl``.
"""
p = allowlist_path()
p.parent.mkdir(parents=True, exist_ok=True)
lock_path = p.with_suffix(p.suffix + ".lock")
if fcntl is None: # pragma: no cover — non-POSIX fallback
with _allowlist_write_lock:
data = load_allowlist()
yield data
save_allowlist(data)
return
with open(lock_path, "a+") as lock_fh:
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
try:
data = load_allowlist()
yield data
save_allowlist(data)
finally:
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
def _prompt_and_record(
event: str, command: str, *, accept_hooks: bool,
) -> bool:
"""Decide whether to approve an unseen ``(event, command)`` pair.
Returns ``True`` iff the approval was granted and recorded.
"""
if accept_hooks:
_record_approval(event, command)
logger.info(
"shell hook auto-approved via --accept-hooks / env / config: "
"%s -> %s", event, command,
)
return True
if not sys.stdin.isatty():
return False
print(
f"\n⚠ Hermes is about to register a shell hook that will run a\n"
f" command on your behalf.\n\n"
f" Event: {event}\n"
f" Command: {command}\n\n"
f" Commands run with your full user credentials. Only approve\n"
f" commands you trust."
)
try:
answer = input("Allow this hook to run? [y/N]: ").strip().lower()
except (EOFError, KeyboardInterrupt):
print() # keep the terminal tidy after ^C
return False
if answer in ("y", "yes"):
_record_approval(event, command)
return True
return False
def _record_approval(event: str, command: str) -> None:
entry = {
"event": event,
"command": command,
"approved_at": _utc_now_iso(),
"script_mtime_at_approval": script_mtime_iso(command),
}
with _locked_update_approvals() as data:
data["approvals"] = [
e for e in data.get("approvals", [])
if not (
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
)
] + [entry]
def _utc_now_iso() -> str:
return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
def revoke(command: str) -> int:
"""Remove every allowlist entry matching ``command``.
Returns the number of entries removed. Does not unregister any
callbacks that are already live on the plugin manager in the current
process restart the CLI / gateway to drop them.
"""
with _locked_update_approvals() as data:
before = len(data.get("approvals", []))
data["approvals"] = [
e for e in data.get("approvals", [])
if not (isinstance(e, dict) and e.get("command") == command)
]
after = len(data["approvals"])
return before - after
_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
".sh", ".bash", ".zsh", ".fish",
".py", ".pyw",
".rb", ".pl", ".lua",
".js", ".mjs", ".cjs", ".ts",
)
def _command_script_path(command: str) -> str:
"""Return the script path from ``command`` for doctor / drift checks.
Prefers a token ending in a known script extension, then a token
containing ``/`` or leading ``~``, then the first token. Handles
``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
common bare-path form.
"""
try:
parts = shlex.split(command)
except ValueError:
return command
if not parts:
return command
for part in parts:
if part.lower().endswith(_SCRIPT_EXTENSIONS):
return part
for part in parts:
if "/" in part or part.startswith("~"):
return part
return parts[0]
# ---------------------------------------------------------------------------
# Helpers for accept-hooks resolution
# ---------------------------------------------------------------------------
def _resolve_effective_accept(
cfg: Dict[str, Any], accept_hooks_arg: bool,
) -> bool:
"""Combine all three opt-in channels into a single boolean.
Precedence (any truthy source flips us on):
1. ``--accept-hooks`` flag (CLI) / explicit argument
2. ``HERMES_ACCEPT_HOOKS`` env var
3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
"""
if accept_hooks_arg:
return True
env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
if env in ("1", "true", "yes", "on"):
return True
cfg_val = cfg.get("hooks_auto_accept", False)
return bool(cfg_val)
# ---------------------------------------------------------------------------
# Introspection (used by `hermes hooks` CLI)
# ---------------------------------------------------------------------------
def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
"""Return the allowlist record for this pair, if any."""
for e in load_allowlist().get("approvals", []):
if (
isinstance(e, dict)
and e.get("event") == event
and e.get("command") == command
):
return e
return None
def script_mtime_iso(command: str) -> Optional[str]:
"""ISO-8601 mtime of the resolved script path, or ``None`` if the
script is missing."""
path = _command_script_path(command)
if not path:
return None
try:
expanded = os.path.expanduser(path)
return datetime.fromtimestamp(
os.path.getmtime(expanded), tz=timezone.utc,
).isoformat().replace("+00:00", "Z")
except OSError:
return None
def script_is_executable(command: str) -> bool:
"""Return ``True`` iff ``command`` is runnable as configured.
For a bare invocation (``/path/hook.sh``) the script itself must be
executable. For interpreter-prefixed commands (``python3
/path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
to be readable the interpreter doesn't care about the ``X_OK``
bit. Mirrors what ``_spawn`` would actually do at runtime."""
path = _command_script_path(command)
if not path:
return False
expanded = os.path.expanduser(path)
if not os.path.isfile(expanded):
return False
try:
argv = shlex.split(command)
except ValueError:
return False
is_bare_invocation = bool(argv) and argv[0] == path
required = os.X_OK if is_bare_invocation else os.R_OK
return os.access(expanded, required)
def run_once(
spec: ShellHookSpec, kwargs: Dict[str, Any],
) -> Dict[str, Any]:
"""Fire a single shell-hook invocation with a synthetic payload.
Used by ``hermes hooks test`` and ``hermes hooks doctor``.
``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
would pass at runtime. It is routed through :func:`_serialize_payload`
so the synthetic stdin exactly matches what a real hook firing would
produce otherwise scripts tested via ``hermes hooks test`` could
diverge silently from production behaviour.
Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
holding the canonical Hermes-wire-shape response."""
stdin_json = _serialize_payload(spec.event, kwargs)
result = _spawn(spec, stdin_json)
result["parsed"] = _parse_response(spec.event, result["stdout"])
return result
+5 -145
View File
@@ -8,13 +8,10 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
import json
import logging
import re
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
@@ -23,110 +20,6 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
# left as-is so the user can debug them.
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
# Matches inline shell snippets like: !`date +%Y-%m-%d`
# Non-greedy, single-line only — no newlines inside the backticks.
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
# Cap inline-shell output so a runaway command can't blow out the context.
_INLINE_SHELL_MAX_OUTPUT = 4000
def _load_skills_config() -> dict:
"""Load the ``skills`` section of config.yaml (best-effort)."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
skills_cfg = cfg.get("skills")
if isinstance(skills_cfg, dict):
return skills_cfg
except Exception:
logger.debug("Could not read skills config", exc_info=True)
return {}
def _substitute_template_vars(
content: str,
skill_dir: Path | None,
session_id: str | None,
) -> str:
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
Only substitutes tokens for which a concrete value is available
unresolved tokens are left in place so the author can spot them.
"""
if not content:
return content
skill_dir_str = str(skill_dir) if skill_dir else None
def _replace(match: re.Match) -> str:
token = match.group(1)
if token == "HERMES_SKILL_DIR" and skill_dir_str:
return skill_dir_str
if token == "HERMES_SESSION_ID" and session_id:
return str(session_id)
return match.group(0)
return _SKILL_TEMPLATE_RE.sub(_replace, content)
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
"""Execute a single inline-shell snippet and return its stdout (trimmed).
Failures return a short ``[inline-shell error: ...]`` marker instead of
raising, so one bad snippet can't wreck the whole skill message.
"""
try:
completed = subprocess.run(
["bash", "-c", command],
cwd=str(cwd) if cwd else None,
capture_output=True,
text=True,
timeout=max(1, int(timeout)),
check=False,
)
except subprocess.TimeoutExpired:
return f"[inline-shell timeout after {timeout}s: {command}]"
except FileNotFoundError:
return f"[inline-shell error: bash not found]"
except Exception as exc:
return f"[inline-shell error: {exc}]"
output = (completed.stdout or "").rstrip("\n")
if not output and completed.stderr:
output = completed.stderr.rstrip("\n")
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
return output
def _expand_inline_shell(
content: str,
skill_dir: Path | None,
timeout: int,
) -> str:
"""Replace every !`cmd` snippet in ``content`` with its stdout.
Runs each snippet with the skill directory as CWD so relative paths in
the snippet work the way the author expects.
"""
if "!`" not in content:
return content
def _replace(match: re.Match) -> str:
cmd = match.group(1).strip()
if not cmd:
return ""
return _run_inline_shell(cmd, skill_dir, timeout)
return _INLINE_SHELL_RE.sub(_replace, content)
def build_plan_path(
user_instruction: str = "",
@@ -177,14 +70,7 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
skill_name = str(loaded_skill.get("name") or normalized)
skill_path = str(loaded_skill.get("path") or "")
skill_dir = None
# Prefer the absolute skill_dir returned by skill_view() — this is
# correct for both local and external skills. Fall back to the old
# SKILLS_DIR-relative reconstruction only when skill_dir is absent
# (e.g. legacy skill_view responses).
abs_skill_dir = loaded_skill.get("skill_dir")
if abs_skill_dir:
skill_dir = Path(abs_skill_dir)
elif skill_path:
if skill_path:
try:
skill_dir = SKILLS_DIR / Path(skill_path).parent
except Exception:
@@ -222,7 +108,7 @@ def _inject_skill_config(loaded_skill: dict[str, Any], parts: list[str]) -> None
if not resolved:
return
lines = ["", f"[Skill config (from {display_hermes_home()}/config.yaml):"]
lines = ["", "[Skill config (from ~/.hermes/config.yaml):"]
for key, value in resolved.items():
display_val = str(value) if value else "(not set)"
lines.append(f" {key} = {display_val}")
@@ -238,36 +124,14 @@ def _build_skill_message(
activation_note: str,
user_instruction: str = "",
runtime_note: str = "",
session_id: str | None = None,
) -> str:
"""Format a loaded skill into a user/system message payload."""
from tools.skills_tool import SKILLS_DIR
content = str(loaded_skill.get("content") or "")
# ── Template substitution and inline-shell expansion ──
# Done before anything else so downstream blocks (setup notes,
# supporting-file hints) see the expanded content.
skills_cfg = _load_skills_config()
if skills_cfg.get("template_vars", True):
content = _substitute_template_vars(content, skill_dir, session_id)
if skills_cfg.get("inline_shell", False):
timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
content = _expand_inline_shell(content, skill_dir, timeout)
parts = [activation_note, "", content.strip()]
# ── Inject the absolute skill directory so the agent can reference
# bundled scripts without an extra skill_view() round-trip. ──
if skill_dir:
parts.append("")
parts.append(f"[Skill directory: {skill_dir}]")
parts.append(
"Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
"`templates/config.yaml`) against that directory, then run them "
"with the terminal tool using the absolute path."
)
# ── Inject resolved skill config values ──
_inject_skill_config(loaded_skill, parts)
@@ -315,13 +179,11 @@ def _build_skill_message(
# Skill is from an external dir — use the skill name instead
skill_view_target = skill_dir.name
parts.append("")
parts.append("[This skill has supporting files:]")
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
for sf in supporting:
parts.append(f"- {sf} -> {skill_dir / sf}")
parts.append(f"- {sf}")
parts.append(
f'\nLoad any of these with skill_view(name="{skill_view_target}", '
f'file_path="<path>"), or run scripts directly by absolute path '
f"(e.g. `node {skill_dir}/scripts/foo.js`)."
f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
)
if user_instruction:
@@ -461,7 +323,6 @@ def build_skill_invocation_message(
activation_note,
user_instruction=user_instruction,
runtime_note=runtime_note,
session_id=task_id,
)
@@ -500,7 +361,6 @@ def build_preloaded_skills_prompt(
loaded_skill,
skill_dir,
activation_note,
session_id=task_id,
)
)
loaded_names.append(skill_name)
+195
View File
@@ -0,0 +1,195 @@
"""Helpers for optional cheap-vs-strong model routing."""
from __future__ import annotations
import os
import re
from typing import Any, Dict, Optional
from utils import is_truthy_value
_COMPLEX_KEYWORDS = {
"debug",
"debugging",
"implement",
"implementation",
"refactor",
"patch",
"traceback",
"stacktrace",
"exception",
"error",
"analyze",
"analysis",
"investigate",
"architecture",
"design",
"compare",
"benchmark",
"optimize",
"optimise",
"review",
"terminal",
"shell",
"tool",
"tools",
"pytest",
"test",
"tests",
"plan",
"planning",
"delegate",
"subagent",
"cron",
"docker",
"kubernetes",
}
_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
def _coerce_bool(value: Any, default: bool = False) -> bool:
return is_truthy_value(value, default=default)
def _coerce_int(value: Any, default: int) -> int:
try:
return int(value)
except (TypeError, ValueError):
return default
def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
"""Return the configured cheap-model route when a message looks simple.
Conservative by design: if the message has signs of code/tool/debugging/
long-form work, keep the primary model.
"""
cfg = routing_config or {}
if not _coerce_bool(cfg.get("enabled"), False):
return None
cheap_model = cfg.get("cheap_model") or {}
if not isinstance(cheap_model, dict):
return None
provider = str(cheap_model.get("provider") or "").strip().lower()
model = str(cheap_model.get("model") or "").strip()
if not provider or not model:
return None
text = (user_message or "").strip()
if not text:
return None
max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
max_words = _coerce_int(cfg.get("max_simple_words"), 28)
if len(text) > max_chars:
return None
if len(text.split()) > max_words:
return None
if text.count("\n") > 1:
return None
if "```" in text or "`" in text:
return None
if _URL_RE.search(text):
return None
lowered = text.lower()
words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
if words & _COMPLEX_KEYWORDS:
return None
route = dict(cheap_model)
route["provider"] = provider
route["model"] = model
route["routing_reason"] = "simple_turn"
return route
def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
"""Resolve the effective model/runtime for one turn.
Returns a dict with model/runtime/signature/label fields.
"""
route = choose_cheap_model_route(user_message, routing_config)
if not route:
return {
"model": primary.get("model"),
"runtime": {
"api_key": primary.get("api_key"),
"base_url": primary.get("base_url"),
"provider": primary.get("provider"),
"api_mode": primary.get("api_mode"),
"command": primary.get("command"),
"args": list(primary.get("args") or []),
"credential_pool": primary.get("credential_pool"),
},
"label": None,
"signature": (
primary.get("model"),
primary.get("provider"),
primary.get("base_url"),
primary.get("api_mode"),
primary.get("command"),
tuple(primary.get("args") or ()),
),
}
from hermes_cli.runtime_provider import resolve_runtime_provider
explicit_api_key = None
api_key_env = str(route.get("api_key_env") or "").strip()
if api_key_env:
explicit_api_key = os.getenv(api_key_env) or None
try:
runtime = resolve_runtime_provider(
requested=route.get("provider"),
explicit_api_key=explicit_api_key,
explicit_base_url=route.get("base_url"),
)
except Exception:
return {
"model": primary.get("model"),
"runtime": {
"api_key": primary.get("api_key"),
"base_url": primary.get("base_url"),
"provider": primary.get("provider"),
"api_mode": primary.get("api_mode"),
"command": primary.get("command"),
"args": list(primary.get("args") or []),
"credential_pool": primary.get("credential_pool"),
},
"label": None,
"signature": (
primary.get("model"),
primary.get("provider"),
primary.get("base_url"),
primary.get("api_mode"),
primary.get("command"),
tuple(primary.get("args") or ()),
),
}
return {
"model": route.get("model"),
"runtime": {
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
"credential_pool": runtime.get("credential_pool"),
},
"label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
"signature": (
route.get("model"),
runtime.get("provider"),
runtime.get("base_url"),
runtime.get("api_mode"),
runtime.get("command"),
tuple(runtime.get("args") or ()),
),
}
-39
View File
@@ -1,39 +0,0 @@
"""Transport layer types and registry for provider response normalization.
Usage:
from agent.transports import get_transport
transport = get_transport("anthropic_messages")
result = transport.normalize_response(raw_response)
"""
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
_REGISTRY: dict = {}
def register_transport(api_mode: str, transport_cls: type) -> None:
"""Register a transport class for an api_mode string."""
_REGISTRY[api_mode] = transport_cls
def get_transport(api_mode: str):
"""Get a transport instance for the given api_mode.
Returns None if no transport is registered for this api_mode.
This allows gradual migration call sites can check for None
and fall back to the legacy code path.
"""
if not _REGISTRY:
_discover_transports()
cls = _REGISTRY.get(api_mode)
if cls is None:
return None
return cls()
def _discover_transports() -> None:
"""Import all transport modules to trigger auto-registration."""
try:
import agent.transports.anthropic # noqa: F401
except ImportError:
pass
-129
View File
@@ -1,129 +0,0 @@
"""Anthropic Messages API transport.
Delegates to the existing adapter functions in agent/anthropic_adapter.py.
This transport owns format conversion and normalization NOT client lifecycle.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse
class AnthropicTransport(ProviderTransport):
"""Transport for api_mode='anthropic_messages'.
Wraps the existing functions in anthropic_adapter.py behind the
ProviderTransport ABC. Each method delegates no logic is duplicated.
"""
@property
def api_mode(self) -> str:
return "anthropic_messages"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI messages to Anthropic (system, messages) tuple.
kwargs:
base_url: Optional[str] affects thinking signature handling.
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
base_url = kwargs.get("base_url")
return convert_messages_to_anthropic(messages, base_url=base_url)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Anthropic input_schema format."""
from agent.anthropic_adapter import convert_tools_to_anthropic
return convert_tools_to_anthropic(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Anthropic messages.create() kwargs.
Calls convert_messages and convert_tools internally.
params (all optional):
max_tokens: int
reasoning_config: dict | None
tool_choice: str | None
is_oauth: bool
preserve_dots: bool
context_length: int | None
base_url: str | None
fast_mode: bool
"""
from agent.anthropic_adapter import build_anthropic_kwargs
return build_anthropic_kwargs(
model=model,
messages=messages,
tools=tools,
max_tokens=params.get("max_tokens", 16384),
reasoning_config=params.get("reasoning_config"),
tool_choice=params.get("tool_choice"),
is_oauth=params.get("is_oauth", False),
preserve_dots=params.get("preserve_dots", False),
context_length=params.get("context_length"),
base_url=params.get("base_url"),
fast_mode=params.get("fast_mode", False),
)
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Anthropic response to NormalizedResponse.
kwargs:
strip_tool_prefix: bool strip 'mcp_mcp_' prefixes from tool names.
"""
from agent.anthropic_adapter import normalize_anthropic_response_v2
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
def validate_response(self, response: Any) -> bool:
"""Check Anthropic response structure is valid."""
if response is None:
return False
content_blocks = getattr(response, "content", None)
if not isinstance(content_blocks, list):
return False
if not content_blocks:
return False
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Extract Anthropic cache_read and cache_creation token counts."""
usage = getattr(response, "usage", None)
if usage is None:
return None
cached = getattr(usage, "cache_read_input_tokens", 0) or 0
written = getattr(usage, "cache_creation_input_tokens", 0) or 0
if cached or written:
return {"cached_tokens": cached, "creation_tokens": written}
return None
# Promote the adapter's canonical mapping to module level so it's shared
_STOP_REASON_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Anthropic stop_reason to OpenAI finish_reason."""
return self._STOP_REASON_MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("anthropic_messages", AnthropicTransport)
-89
View File
@@ -1,89 +0,0 @@
"""Abstract base for provider transports.
A transport owns the data path for one api_mode:
convert_messages convert_tools build_kwargs normalize_response
It does NOT own: client construction, streaming, credential refresh,
prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
"""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from agent.transports.types import NormalizedResponse
class ProviderTransport(ABC):
"""Base class for provider-specific format conversion and normalization."""
@property
@abstractmethod
def api_mode(self) -> str:
"""The api_mode string this transport handles (e.g. 'anthropic_messages')."""
...
@abstractmethod
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI-format messages to provider-native format.
Returns provider-specific structure (e.g. (system, messages) for Anthropic,
or the messages list unchanged for chat_completions).
"""
...
@abstractmethod
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI-format tool definitions to provider-native format.
Returns provider-specific tool list (e.g. Anthropic input_schema format).
"""
...
@abstractmethod
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build the complete API call kwargs dict.
This is the primary entry point it typically calls convert_messages()
and convert_tools() internally, then adds model-specific config.
Returns a dict ready to be passed to the provider's SDK client.
"""
...
@abstractmethod
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize a raw provider response to the shared NormalizedResponse type.
This is the only method that returns a transport-layer type.
"""
...
def validate_response(self, response: Any) -> bool:
"""Optional: check if the raw response is structurally valid.
Returns True if valid, False if the response should be treated as invalid.
Default implementation always returns True.
"""
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Optional: extract provider-specific cache hit/creation stats.
Returns dict with 'cached_tokens' and 'creation_tokens', or None.
Default returns None.
"""
return None
def map_finish_reason(self, raw_reason: str) -> str:
"""Optional: map provider-specific stop reason to OpenAI equivalent.
Default returns the raw reason unchanged. Override for providers
with different stop reason vocabularies.
"""
return raw_reason
-100
View File
@@ -1,100 +0,0 @@
"""Shared types for normalized provider responses.
These dataclasses define the canonical shape that all provider adapters
normalize responses to. The shared surface is intentionally minimal
only fields that every downstream consumer reads are top-level.
Protocol-specific state goes in ``provider_data`` dicts (response-level
and per-tool-call) so that protocol-aware code paths can access it
without polluting the shared type.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class ToolCall:
"""A normalized tool call from any provider.
``id`` is the protocol's canonical identifier — what gets used in
``tool_call_id`` / ``tool_use_id`` when constructing tool result
messages. May be ``None`` when the provider omits it; the agent
fills it via ``_deterministic_call_id()`` before storing in history.
``provider_data`` carries per-tool-call protocol metadata that only
protocol-aware code reads:
* Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
* Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
* Others: ``None``
"""
id: Optional[str]
name: str
arguments: str # JSON string
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
@dataclass
class Usage:
"""Token usage from an API response."""
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
cached_tokens: int = 0
@dataclass
class NormalizedResponse:
"""Normalized API response from any provider.
Shared fields are truly cross-provider every caller can rely on
them without branching on api_mode. Protocol-specific state goes in
``provider_data`` so that only protocol-aware code paths read it.
Response-level ``provider_data`` examples:
* Anthropic: ``{"reasoning_details": [...]}``
* Codex: ``{"codex_reasoning_items": [...]}``
* Others: ``None``
"""
content: Optional[str]
tool_calls: Optional[List[ToolCall]]
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
reasoning: Optional[str] = None
usage: Optional[Usage] = None
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
# ---------------------------------------------------------------------------
# Factory helpers
# ---------------------------------------------------------------------------
def build_tool_call(
id: Optional[str],
name: str,
arguments: Any,
**provider_fields: Any,
) -> ToolCall:
"""Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
Any extra keyword arguments are collected into ``provider_data``.
"""
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
pd = dict(provider_fields) if provider_fields else None
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
"""Translate a provider-specific stop reason to the normalised set.
Falls back to ``"stop"`` for unknown or ``None`` reasons.
"""
if reason is None:
return "stop"
return mapping.get(reason, "stop")
+1 -76
View File
@@ -6,7 +6,6 @@ from decimal import Decimal
from typing import Any, Dict, Literal, Optional
from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
from utils import base_url_host_matches
DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
@@ -285,80 +284,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
source_url="https://ai.google.dev/pricing",
pricing_version="google-pricing-2026-03-16",
),
# AWS Bedrock — pricing per the Bedrock pricing page.
# Bedrock charges the same per-token rates as the model provider but
# through AWS billing. These are the on-demand prices (no commitment).
# Source: https://aws.amazon.com/bedrock/pricing/
(
"bedrock",
"anthropic.claude-opus-4-6",
): PricingEntry(
input_cost_per_million=Decimal("15.00"),
output_cost_per_million=Decimal("75.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-sonnet-4-6",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-sonnet-4-5",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"anthropic.claude-haiku-4-5",
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("4.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-pro",
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("3.20"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-lite",
): PricingEntry(
input_cost_per_million=Decimal("0.06"),
output_cost_per_million=Decimal("0.24"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
(
"bedrock",
"amazon.nova-micro",
): PricingEntry(
input_cost_per_million=Decimal("0.035"),
output_cost_per_million=Decimal("0.14"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
),
}
@@ -394,7 +319,7 @@ def resolve_billing_route(
if provider_name == "openai-codex":
return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
if provider_name == "openrouter" or "openrouter.ai" in base:
return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
if provider_name == "anthropic":
return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
+5 -9
View File
@@ -444,7 +444,6 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
if not reasoning.get("has_any_reasoning", True):
print(f" 🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
discarded_no_reasoning += 1
completed_in_batch.append(prompt_index)
continue
# Get and normalize tool stats for consistent schema across all entries
@@ -562,10 +561,7 @@ class BatchRunner:
provider_sort (str): Sort providers by price/throughput/latency (optional)
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
reasoning_config (Dict): OpenRouter reasoning config override (e.g. {"effort": "none"} to disable thinking)
prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming).
NOTE: Anthropic Sonnet 4.6+ and Opus 4.6+ reject a trailing assistant-role prefill
(400 error). For those models use output_config.format or structured-output
schemas instead. Safe here for user-role priming and for older Claude / non-Claude models.
prefill_messages (List[Dict]): Messages to prepend as prefilled conversation context (few-shot priming)
max_samples (int): Only process the first N samples from the dataset (optional, processes all if not set)
"""
self.dataset_file = Path(dataset_file)
@@ -1190,12 +1186,12 @@ def main(
"""
# Handle list distributions
if list_distributions:
from toolset_distributions import print_distribution_info
from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
print("📊 Available Toolset Distributions")
print("=" * 70)
all_dists = list_distributions()
all_dists = get_all_dists()
for dist_name in sorted(all_dists.keys()):
print_distribution_info(dist_name)
+22 -96
View File
@@ -16,7 +16,7 @@ model:
# "nous" - Nous Portal OAuth (requires: hermes login)
# "nous-api" - Nous Portal API key (requires: NOUS_API_KEY)
# "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
# "openai-codex" - OpenAI Codex (requires: hermes auth)
# "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
# "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
# "gemini" - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
@@ -24,10 +24,8 @@ model:
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
# "nvidia" - NVIDIA NIM / build.nvidia.com (requires: NVIDIA_API_KEY)
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
#
@@ -39,6 +37,12 @@ model:
# base_url: "http://localhost:1234/v1"
# No API key needed — local servers typically ignore auth.
#
# For Ollama Cloud (https://ollama.com/pricing):
# provider: "custom"
# base_url: "https://ollama.com/v1"
# Set OLLAMA_API_KEY in .env — automatically picked up when base_url
# points to ollama.com.
#
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
provider: "auto"
@@ -63,38 +67,7 @@ model:
# Leave unset to use the model's native output ceiling (recommended).
# Set only if you want to deliberately limit individual response length.
#
# max_tokens: 8192
# Named provider overrides (optional)
# Use this for per-provider request timeouts, non-stream stale timeouts,
# and per-model exceptions.
# Applies to the primary turn client on every api_mode (OpenAI-wire, native
# Anthropic, and Anthropic-compatible providers), the fallback chain, and
# client rebuilds during credential rotation. For OpenAI-wire chat
# completions (streaming and non-streaming) the configured value is also
# used as the per-request ``timeout=`` kwarg so it wins over the legacy
# HERMES_API_TIMEOUT env var (which still applies when no config is set).
# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and
# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these
# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s).
#
# Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock
# SDK paths) — those use boto3 with its own timeout configuration.
#
# providers:
# ollama-local:
# request_timeout_seconds: 300 # Longer timeout for local cold-starts
# stale_timeout_seconds: 900 # Explicitly re-enable stale detection on local endpoints
# anthropic:
# request_timeout_seconds: 30 # Fast-fail cloud requests
# models:
# claude-opus-4.6:
# timeout_seconds: 600 # Longer timeout for extended-thinking Opus calls
# openai-codex:
# models:
# gpt-5.4:
# stale_timeout_seconds: 1800 # Longer non-stream stale timeout for slow large-context turns
# max_tokens: 8192
# =============================================================================
# OpenRouter Provider Routing (only applies when using OpenRouter)
@@ -122,6 +95,20 @@ model:
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
# # data_collection: "deny"
# =============================================================================
# Smart Model Routing (optional)
# =============================================================================
# Use a cheaper model for short/simple turns while keeping your main model for
# more complex requests. Disabled by default.
#
# smart_model_routing:
# enabled: true
# max_simple_chars: 160
# max_simple_words: 28
# cheap_model:
# provider: openrouter
# model: google/gemini-2.5-flash
# =============================================================================
# Git Worktree Isolation
# =============================================================================
@@ -350,7 +337,6 @@ compression:
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
# "nous" - Force Nous Portal (requires: hermes login)
# "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
# Uses gpt-5.3-codex which supports vision.
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
@@ -374,18 +360,6 @@ compression:
# web_extract:
# provider: "auto"
# model: ""
#
# # Session search — summarizes matching past sessions
# session_search:
# provider: "auto"
# model: ""
# timeout: 30
# max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s
# extra_body: {} # Provider-specific OpenAI-compatible request fields
# # Example for providers that support request-body
# # reasoning controls:
# # extra_body:
# # enable_thinking: false
# =============================================================================
# Persistent Memory
@@ -590,18 +564,6 @@ platform_toolsets:
homeassistant: [hermes-homeassistant]
qqbot: [hermes-qqbot]
# =============================================================================
# Gateway Platform Settings
# =============================================================================
# Optional per-platform messaging settings.
# Platform-specific knobs live under `extra`.
#
# platforms:
# telegram:
# reply_to_mode: "first" # off | first | all
# extra:
# disable_link_previews: false # Set true to suppress Telegram URL previews in bot messages
# ─────────────────────────────────────────────────────────────────────────────
# Available toolsets (use these names in platform_toolsets or the toolsets list)
#
@@ -917,39 +879,3 @@ display:
# # Names and usernames are NOT affected (user-chosen, publicly visible).
# # Routing/delivery still uses the original values internally.
# redact_pii: false
# =============================================================================
# Shell-script hooks
# =============================================================================
# Register shell scripts as plugin-hook callbacks. Each entry is executed as
# a subprocess (shell=False, shlex.split) with a JSON payload on stdin. On
# stdout the script may return JSON that either blocks the tool call or
# injects context into the next LLM call.
#
# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
# pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
# pre_api_request, post_api_request, on_session_start, on_session_end,
# on_session_finalize, on_session_reset, subagent_stop
#
# First-use consent: each (event, command) pair prompts once on a TTY, then
# is persisted to ~/.hermes/shell-hooks-allowlist.json. Non-interactive
# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
# hooks_auto_accept key below.
#
# See website/docs/user-guide/features/hooks.md for the full JSON wire
# protocol and worked examples.
#
# hooks:
# pre_tool_call:
# - matcher: "terminal"
# command: "~/.hermes/agent-hooks/block-rm-rf.sh"
# timeout: 10
# post_tool_call:
# - matcher: "write_file|patch"
# command: "~/.hermes/agent-hooks/auto-format.sh"
# pre_llm_call:
# - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
# subagent_stop:
# - command: "~/.hermes/agent-hooks/log-orchestration.sh"
#
# hooks_auto_accept: false
+308 -1267
View File
File diff suppressed because it is too large Load Diff
+46 -60
View File
@@ -9,7 +9,6 @@ import copy
import json
import logging
import tempfile
import threading
import os
import re
import uuid
@@ -35,11 +34,6 @@ except ImportError:
HERMES_DIR = get_hermes_home().resolve()
CRON_DIR = HERMES_DIR / "cron"
JOBS_FILE = CRON_DIR / "jobs.json"
# In-process lock protecting load_jobs→modify→save_jobs cycles.
# Required when tick() runs jobs in parallel threads — without this,
# concurrent mark_job_run / advance_next_run calls can clobber each other.
_jobs_file_lock = threading.Lock()
OUTPUT_DIR = CRON_DIR / "output"
ONESHOT_GRACE_SECONDS = 120
@@ -507,12 +501,6 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
if schedule_changed:
updated_schedule = updated["schedule"]
# The API may pass schedule as a raw string (e.g. "every 10m")
# instead of a pre-parsed dict. Normalize it the same way
# create_job() does so downstream code can call .get() safely.
if isinstance(updated_schedule, str):
updated_schedule = parse_schedule(updated_schedule)
updated["schedule"] = updated_schedule
updated["schedule_display"] = updates.get(
"schedule_display",
updated_schedule.get("display", updated.get("schedule_display")),
@@ -600,44 +588,43 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
``delivery_error`` is tracked separately from the agent error a job
can succeed (agent produced output) but fail delivery (platform down).
"""
with _jobs_file_lock:
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
jobs = load_jobs()
for i, job in enumerate(jobs):
if job["id"] == job_id:
now = _hermes_now().isoformat()
job["last_run_at"] = now
job["last_status"] = "ok" if success else "error"
job["last_error"] = error if not success else None
# Track delivery failures separately — cleared on successful delivery
job["last_delivery_error"] = delivery_error
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Increment completed count
if job.get("repeat"):
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# Check if we've hit the repeat limit
times = job["repeat"].get("times")
completed = job["repeat"]["completed"]
if times is not None and times > 0 and completed >= times:
# Remove the job (limit reached)
jobs.pop(i)
save_jobs(jobs)
return
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
save_jobs(jobs)
return
save_jobs(jobs)
return
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
def advance_next_run(job_id: str) -> bool:
@@ -652,21 +639,20 @@ def advance_next_run(job_id: str) -> bool:
Returns True if next_run_at was advanced, False otherwise.
"""
with _jobs_file_lock:
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in ("cron", "interval"):
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
if new_next and new_next != job.get("next_run_at"):
job["next_run_at"] = new_next
save_jobs(jobs)
return True
jobs = load_jobs()
for job in jobs:
if job["id"] == job_id:
kind = job.get("schedule", {}).get("kind")
if kind not in ("cron", "interval"):
return False
return False
now = _hermes_now().isoformat()
new_next = compute_next_run(job["schedule"], now)
if new_next and new_next != job.get("next_run_at"):
job["next_run_at"] = new_next
save_jobs(jobs)
return True
return False
return False
def get_due_jobs() -> List[Dict[str, Any]]:
+168 -349
View File
@@ -10,7 +10,6 @@ runs at a time if multiple processes overlap.
import asyncio
import concurrent.futures
import contextvars
import json
import logging
import os
@@ -27,7 +26,7 @@ except ImportError:
except ImportError:
msvcrt = None
from pathlib import Path
from typing import List, Optional
from typing import Optional
# Add parent directory to path for imports BEFORE repo-level imports.
# Without this, standalone invocations (e.g. after `hermes update` reloads
@@ -49,33 +48,6 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
"qqbot",
})
# Platforms that support a configured cron/notification home target, mapped to
# the environment variable used by gateway setup/runtime config.
_HOME_TARGET_ENV_VARS = {
"matrix": "MATRIX_HOME_ROOM",
"telegram": "TELEGRAM_HOME_CHANNEL",
"discord": "DISCORD_HOME_CHANNEL",
"slack": "SLACK_HOME_CHANNEL",
"signal": "SIGNAL_HOME_CHANNEL",
"mattermost": "MATTERMOST_HOME_CHANNEL",
"sms": "SMS_HOME_CHANNEL",
"email": "EMAIL_HOME_ADDRESS",
"dingtalk": "DINGTALK_HOME_CHANNEL",
"feishu": "FEISHU_HOME_CHANNEL",
"wecom": "WECOM_HOME_CHANNEL",
"weixin": "WEIXIN_HOME_CHANNEL",
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
"qqbot": "QQBOT_HOME_CHANNEL",
}
# Legacy env var names kept for back-compat. Each entry is the current
# primary env var → the previous name. _get_home_target_chat_id falls
# back to the legacy name if the primary is unset, so users who set the
# old name before the rename keep working until they migrate.
_LEGACY_HOME_TARGET_ENV_VARS = {
"QQBOT_HOME_CHANNEL": "QQ_HOME_CHANNEL",
}
from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
# Sentinel: when a cron agent has nothing new to report, it can start its
@@ -103,28 +75,15 @@ def _resolve_origin(job: dict) -> Optional[dict]:
return None
def _get_home_target_chat_id(platform_name: str) -> str:
"""Return the configured home target chat/room ID for a delivery platform."""
env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
if not env_var:
return ""
value = os.getenv(env_var, "")
if not value:
legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
if legacy:
value = os.getenv(legacy, "")
return value
def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
"""Resolve one concrete auto-delivery target for a cron job."""
def _resolve_delivery_target(job: dict) -> Optional[dict]:
"""Resolve the concrete auto-delivery target for a cron job, if any."""
deliver = job.get("deliver", "local")
origin = _resolve_origin(job)
if deliver_value == "local":
if deliver == "local":
return None
if deliver_value == "origin":
if deliver == "origin":
if origin:
return {
"platform": origin["platform"],
@@ -133,8 +92,8 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
}
# Origin missing (e.g. job created via API/script) — try each
# platform's home channel as a fallback instead of silently dropping.
for platform_name in _HOME_TARGET_ENV_VARS:
chat_id = _get_home_target_chat_id(platform_name)
for platform_name in ("matrix", "telegram", "discord", "slack", "bluebubbles"):
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
if chat_id:
logger.info(
"Job '%s' has deliver=origin but no origin; falling back to %s home channel",
@@ -148,8 +107,8 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
}
return None
if ":" in deliver_value:
platform_name, rest = deliver_value.split(":", 1)
if ":" in deliver:
platform_name, rest = deliver.split(":", 1)
platform_key = platform_name.lower()
from tools.send_message_tool import _parse_target_ref
@@ -179,7 +138,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
"thread_id": thread_id,
}
platform_name = deliver_value
platform_name = deliver
if origin and origin.get("platform") == platform_name:
return {
"platform": platform_name,
@@ -189,7 +148,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
if platform_name.lower() not in _KNOWN_DELIVERY_PLATFORMS:
return None
chat_id = _get_home_target_chat_id(platform_name)
chat_id = os.getenv(f"{platform_name.upper()}_HOME_CHANNEL", "")
if not chat_id:
return None
@@ -200,30 +159,6 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
}
def _resolve_delivery_targets(job: dict) -> List[dict]:
"""Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
deliver = job.get("deliver", "local")
if deliver == "local":
return []
parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
seen = set()
targets = []
for part in parts:
target = _resolve_single_delivery_target(job, part)
if target:
key = (target["platform"].lower(), str(target["chat_id"]), target.get("thread_id"))
if key not in seen:
seen.add(key)
targets.append(target)
return targets
def _resolve_delivery_target(job: dict) -> Optional[dict]:
"""Resolve the concrete auto-delivery target for a cron job, if any."""
targets = _resolve_delivery_targets(job)
return targets[0] if targets else None
# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
_VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
@@ -252,11 +187,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
future = asyncio.run_coroutine_threadsafe(coro, loop)
try:
result = future.result(timeout=30)
except TimeoutError:
future.cancel()
raise
result = future.result(timeout=30)
if result and not getattr(result, "success", True):
logger.warning(
"Job '%s': media send failed for %s: %s",
@@ -268,7 +199,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Optional[str]:
"""
Deliver job output to the configured target(s) (origin chat, specific platform, etc.).
Deliver job output to the configured target (origin chat, specific platform, etc.).
When ``adapters`` and ``loop`` are provided (gateway is running), tries to
use the live adapter first this supports E2EE rooms (e.g. Matrix) where
@@ -277,14 +208,33 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
Returns None on success, or an error string on failure.
"""
targets = _resolve_delivery_targets(job)
if not targets:
target = _resolve_delivery_target(job)
if not target:
if job.get("deliver", "local") != "local":
msg = f"no delivery target resolved for deliver={job.get('deliver', 'local')}"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
return None # local-only jobs don't deliver — not a failure
platform_name = target["platform"]
chat_id = target["chat_id"]
thread_id = target.get("thread_id")
# Diagnostic: log thread_id for topic-aware delivery debugging
origin = job.get("origin") or {}
origin_thread = origin.get("thread_id")
if origin_thread and not thread_id:
logger.warning(
"Job '%s': origin has thread_id=%s but delivery target lost it "
"(deliver=%s, target=%s)",
job["id"], origin_thread, job.get("deliver", "local"), target,
)
elif thread_id:
logger.debug(
"Job '%s': delivering to %s:%s thread_id=%s",
job["id"], platform_name, chat_id, thread_id,
)
from tools.send_message_tool import _send_to_platform
from gateway.config import load_gateway_config, Platform
@@ -307,6 +257,24 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
"bluebubbles": Platform.BLUEBUBBLES,
"qqbot": Platform.QQBOT,
}
platform = platform_map.get(platform_name.lower())
if not platform:
msg = f"unknown platform '{platform_name}'"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
try:
config = load_gateway_config()
except Exception as e:
msg = f"failed to load gateway config: {e}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
pconfig = config.platforms.get(platform)
if not pconfig or not pconfig.enabled:
msg = f"platform '{platform_name}' not configured/enabled"
logger.warning("Job '%s': %s", job["id"], msg)
return msg
# Optionally wrap the content with a header/footer so the user knows this
# is a cron delivery. Wrapping is on by default; set cron.wrap_response: false
@@ -320,13 +288,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
if wrap_response:
task_name = job.get("name", job["id"])
job_id = job.get("id", "")
delivery_content = (
f"Cronjob Response: {task_name}\n"
f"(job_id: {job_id})\n"
f"-------------\n\n"
f"{content}\n\n"
f"To stop or manage this job, send me a new message (e.g. \"stop reminder {task_name}\")."
f"Note: The agent cannot see this message, and therefore cannot respond to it."
)
else:
delivery_content = content
@@ -335,120 +301,67 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
from gateway.platforms.base import BasePlatformAdapter
media_files, cleaned_delivery_content = BasePlatformAdapter.extract_media(delivery_content)
# Prefer the live adapter when the gateway is running — this supports E2EE
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
runtime_adapter = (adapters or {}).get(platform)
if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
send_metadata = {"thread_id": thread_id} if thread_id else None
try:
# Send cleaned text (MEDIA tags stripped) — not the raw content
text_to_send = cleaned_delivery_content.strip()
adapter_ok = True
if text_to_send:
future = asyncio.run_coroutine_threadsafe(
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
send_result = future.result(timeout=60)
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, err,
)
adapter_ok = False # fall through to standalone path
# Send extracted media files as native attachments via the live adapter
if adapter_ok and media_files:
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
if adapter_ok:
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
return None
except Exception as e:
logger.warning(
"Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, e,
)
# Standalone path: run the async send in a fresh event loop (safe from any thread)
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
try:
config = load_gateway_config()
result = asyncio.run(coro)
except RuntimeError:
# asyncio.run() checks for a running loop before awaiting the coroutine;
# when it raises, the original coro was never started — close it to
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
except Exception as e:
msg = f"failed to load gateway config: {e}"
msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
delivery_errors = []
if result and result.get("error"):
msg = f"delivery error: {result['error']}"
logger.error("Job '%s': %s", job["id"], msg)
return msg
for target in targets:
platform_name = target["platform"]
chat_id = target["chat_id"]
thread_id = target.get("thread_id")
# Diagnostic: log thread_id for topic-aware delivery debugging
origin = job.get("origin") or {}
origin_thread = origin.get("thread_id")
if origin_thread and not thread_id:
logger.warning(
"Job '%s': origin has thread_id=%s but delivery target lost it "
"(deliver=%s, target=%s)",
job["id"], origin_thread, job.get("deliver", "local"), target,
)
elif thread_id:
logger.debug(
"Job '%s': delivering to %s:%s thread_id=%s",
job["id"], platform_name, chat_id, thread_id,
)
platform = platform_map.get(platform_name.lower())
if not platform:
msg = f"unknown platform '{platform_name}'"
logger.warning("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
# Prefer the live adapter when the gateway is running — this supports E2EE
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
runtime_adapter = (adapters or {}).get(platform)
delivered = False
if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
send_metadata = {"thread_id": thread_id} if thread_id else None
try:
# Send cleaned text (MEDIA tags stripped) — not the raw content
text_to_send = cleaned_delivery_content.strip()
adapter_ok = True
if text_to_send:
future = asyncio.run_coroutine_threadsafe(
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
try:
send_result = future.result(timeout=60)
except TimeoutError:
future.cancel()
raise
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(
"Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, err,
)
adapter_ok = False # fall through to standalone path
# Send extracted media files as native attachments via the live adapter
if adapter_ok and media_files:
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
if adapter_ok:
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
delivered = True
except Exception as e:
logger.warning(
"Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
job["id"], platform_name, chat_id, e,
)
if not delivered:
pconfig = config.platforms.get(platform)
if not pconfig or not pconfig.enabled:
msg = f"platform '{platform_name}' not configured/enabled"
logger.warning("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
# Standalone path: run the async send in a fresh event loop (safe from any thread)
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
try:
result = asyncio.run(coro)
except RuntimeError:
# asyncio.run() checks for a running loop before awaiting the coroutine;
# when it raises, the original coro was never started — close it to
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
# fresh thread that has no running loop.
coro.close()
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
result = future.result(timeout=30)
except Exception as e:
msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
logger.error("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
if result and result.get("error"):
msg = f"delivery error: {result['error']}"
logger.error("Job '%s': %s", job["id"], msg)
delivery_errors.append(msg)
continue
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
if delivery_errors:
return "; ".join(delivery_errors)
logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
return None
@@ -571,53 +484,15 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
return False, f"Script execution failed: {exc}"
def _parse_wake_gate(script_output: str) -> bool:
"""Parse the last non-empty stdout line of a cron job's pre-check script
as a wake gate.
The convention (ported from nanoclaw #1232): if the last stdout line is
JSON like ``{"wakeAgent": false}``, the agent is skipped entirely no
LLM run, no delivery. Any other output (non-JSON, missing flag, gate
absent, or ``wakeAgent: true``) means wake the agent normally.
Returns True if the agent should wake, False to skip.
"""
if not script_output:
return True
stripped_lines = [line for line in script_output.splitlines() if line.strip()]
if not stripped_lines:
return True
last_line = stripped_lines[-1].strip()
try:
gate = json.loads(last_line)
except (json.JSONDecodeError, ValueError):
return True
if not isinstance(gate, dict):
return True
return gate.get("wakeAgent", True) is not False
def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
"""Build the effective prompt for a cron job, optionally loading one or more skills first.
Args:
job: The cron job dict.
prerun_script: Optional ``(success, stdout)`` from a script that has
already been executed by the caller (e.g. for a wake-gate check).
When provided, the script is not re-executed and the cached
result is used for prompt injection. When omitted, the script
(if any) runs inline as before.
"""
def _build_job_prompt(job: dict) -> str:
"""Build the effective prompt for a cron job, optionally loading one or more skills first."""
prompt = job.get("prompt", "")
skills = job.get("skills")
# Run data-collection script if configured, inject output as context.
script_path = job.get("script")
if script_path:
if prerun_script is not None:
success, script_output = prerun_script
else:
success, script_output = _run_job_script(script_path)
success, script_output = _run_job_script(script_path)
if success:
if script_output:
prompt = (
@@ -719,52 +594,21 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
job_id = job["id"]
job_name = job["name"]
# Wake-gate: if this job has a pre-check script, run it BEFORE building
# the prompt so a ``{"wakeAgent": false}`` response can short-circuit
# the whole agent run. We pass the result into _build_job_prompt so
# the script is only executed once.
prerun_script = None
script_path = job.get("script")
if script_path:
prerun_script = _run_job_script(script_path)
_ran_ok, _script_output = prerun_script
if _ran_ok and not _parse_wake_gate(_script_output):
logger.info(
"Job '%s' (ID: %s): wakeAgent=false, skipping agent run",
job_name, job_id,
)
silent_doc = (
f"# Cron Job: {job_name}\n\n"
f"**Job ID:** {job_id}\n"
f"**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
"Script gate returned `wakeAgent=false` — agent skipped.\n"
)
return True, silent_doc, SILENT_MARKER, None
prompt = _build_job_prompt(job, prerun_script=prerun_script)
prompt = _build_job_prompt(job)
origin = _resolve_origin(job)
_cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
# Mark this as a cron session so the approval system can apply cron_mode.
# This env var is process-wide and persists for the lifetime of the
# scheduler process — every job this process runs is a cron job.
os.environ["HERMES_CRON_SESSION"] = "1"
# Use ContextVars for per-job session/delivery state so parallel jobs
# don't clobber each other's targets (os.environ is process-global).
from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
_ctx_tokens = set_session_vars(
platform=origin["platform"] if origin else "",
chat_id=str(origin["chat_id"]) if origin else "",
chat_name=origin.get("chat_name", "") if origin else "",
)
try:
# Inject origin context so the agent's send_message tool knows the chat.
# Must be INSIDE the try block so the finally cleanup always runs.
if origin:
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
if origin.get("chat_name"):
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
# Re-read .env and config.yaml fresh every run so provider/key
# changes take effect without a gateway restart.
from dotenv import load_dotenv
@@ -775,10 +619,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
delivery_target = _resolve_delivery_target(job)
if delivery_target:
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
if delivery_target.get("thread_id") is not None:
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
@@ -817,13 +661,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
prefill_messages = None
prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
if prefill_file:
import json as _json
pfpath = Path(prefill_file).expanduser()
if not pfpath.is_absolute():
pfpath = _hermes_home / pfpath
if pfpath.exists():
try:
with open(pfpath, "r", encoding="utf-8") as _pf:
prefill_messages = json.load(_pf)
prefill_messages = _json.load(_pf)
if not isinstance(prefill_messages, list):
prefill_messages = None
except Exception as e:
@@ -835,6 +680,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
# Provider routing
pr = _cfg.get("provider_routing", {})
smart_routing = _cfg.get("smart_model_routing", {}) or {}
from hermes_cli.runtime_provider import (
resolve_runtime_provider,
@@ -851,9 +697,24 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
message = format_runtime_provider_error(exc)
raise RuntimeError(message) from exc
from agent.smart_model_routing import resolve_turn_route
turn_route = resolve_turn_route(
prompt,
smart_routing,
{
"model": model,
"api_key": runtime.get("api_key"),
"base_url": runtime.get("base_url"),
"provider": runtime.get("provider"),
"api_mode": runtime.get("api_mode"),
"command": runtime.get("command"),
"args": list(runtime.get("args") or []),
},
)
fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
credential_pool = None
runtime_provider = str(runtime.get("provider") or "").strip().lower()
runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower()
if runtime_provider:
try:
from agent.credential_pool import load_pool
@@ -870,13 +731,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)
agent = AIAgent(
model=model,
api_key=runtime.get("api_key"),
base_url=runtime.get("base_url"),
provider=runtime.get("provider"),
api_mode=runtime.get("api_mode"),
acp_command=runtime.get("command"),
acp_args=runtime.get("args"),
model=turn_route["model"],
api_key=turn_route["runtime"].get("api_key"),
base_url=turn_route["runtime"].get("base_url"),
provider=turn_route["runtime"].get("provider"),
api_mode=turn_route["runtime"].get("api_mode"),
acp_command=turn_route["runtime"].get("command"),
acp_args=turn_route["runtime"].get("args"),
max_iterations=max_iterations,
reasoning_config=reasoning_config,
prefill_messages=prefill_messages,
@@ -907,11 +768,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
_POLL_INTERVAL = 5.0
_cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
# Preserve scheduler-scoped ContextVar state (for example skill-declared
# env passthrough registrations) when the cron run hops into the worker
# thread used for inactivity timeout monitoring.
_cron_context = contextvars.copy_context()
_cron_future = _cron_pool.submit(_cron_context.run, agent.run_conversation, prompt)
_cron_future = _cron_pool.submit(agent.run_conversation, prompt)
_inactivity_timeout = False
try:
if _cron_inactivity_limit is None:
@@ -973,9 +830,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
)
final_response = result.get("final_response", "") or ""
# Strip leaked placeholder text that upstream may inject on empty completions.
if final_response.strip() == "(No response generated)":
final_response = ""
# Use a separate variable for log display; keep final_response clean
# for delivery logic (empty response = no delivery).
logged_response = final_response if final_response else "(No response generated)"
@@ -1021,8 +875,16 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
return False, output, "", error_msg
finally:
# Clean up ContextVar session/delivery state for this job.
clear_session_vars(_ctx_tokens)
# Clean up injected env vars so they don't leak to other jobs
for key in (
"HERMES_SESSION_PLATFORM",
"HERMES_SESSION_CHAT_ID",
"HERMES_SESSION_CHAT_NAME",
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
):
os.environ.pop(key, None)
if _session_db:
try:
_session_db.end_session(_cron_session_id, "cron_complete")
@@ -1075,41 +937,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
if verbose:
logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
# Advance next_run_at for all recurring jobs FIRST, under the file lock,
# before any execution begins. This preserves at-most-once semantics.
executed = 0
for job in due_jobs:
advance_next_run(job["id"])
# Resolve max parallel workers: env var > config.yaml > unbounded.
# Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
_max_workers: Optional[int] = None
try:
_env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
if _env_par:
_max_workers = int(_env_par) or None
except (ValueError, TypeError):
logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
if _max_workers is None:
try:
_ucfg = load_config() or {}
_cfg_par = (
_ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
).get("max_parallel_jobs")
if _cfg_par is not None:
_max_workers = int(_cfg_par) or None
except Exception:
pass
# For recurring jobs (cron/interval), advance next_run_at to the
# next future occurrence BEFORE execution. This way, if the
# process crashes mid-run, the job won't re-fire on restart.
# One-shot jobs are left alone so they can retry on restart.
advance_next_run(job["id"])
if verbose:
logger.info(
"Running %d job(s) in parallel (max_workers=%s)",
len(due_jobs),
_max_workers if _max_workers else "unbounded",
)
def _process_job(job: dict) -> bool:
"""Run one due job end-to-end: execute, save, deliver, mark."""
try:
success, output, final_response, error = run_job(job)
output_file = save_job_output(job["id"], output)
@@ -1133,31 +969,14 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
delivery_error = str(de)
logger.error("Delivery failed for job %s: %s", job["id"], de)
# Treat empty final_response as a soft failure so last_status
# is not "ok" — the agent ran but produced nothing useful.
# (issue #8585)
if success and not final_response:
success = False
error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
return True
executed += 1
except Exception as e:
logger.error("Error processing job %s: %s", job['id'], e)
mark_job_run(job["id"], False, str(e))
return False
# Run all due jobs concurrently, each in its own ContextVar copy
# so session/delivery state stays isolated per-thread.
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
_futures = []
for job in due_jobs:
_ctx = contextvars.copy_context()
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
_results = [f.result() for f in _futures]
return sum(_results)
return executed
finally:
if fcntl:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
Executable → Regular
+6 -13
View File
@@ -1,14 +1,13 @@
#!/bin/bash
# Docker/Podman entrypoint: bootstrap config files into the mounted volume, then run hermes.
# Docker entrypoint: bootstrap config files into the mounted volume, then run hermes.
set -e
HERMES_HOME="${HERMES_HOME:-/opt/data}"
HERMES_HOME="/opt/data"
INSTALL_DIR="/opt/hermes"
# --- Privilege dropping via gosu ---
# When started as root (the default for Docker, or fakeroot in rootless Podman),
# optionally remap the hermes user/group to match host-side ownership, fix volume
# permissions, then re-exec as hermes.
# When started as root (the default), optionally remap the hermes user/group
# to match host-side ownership, fix volume permissions, then re-exec as hermes.
if [ "$(id -u)" = "0" ]; then
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "$(id -u hermes)" ]; then
echo "Changing hermes UID to $HERMES_UID"
@@ -17,19 +16,13 @@ if [ "$(id -u)" = "0" ]; then
if [ -n "$HERMES_GID" ] && [ "$HERMES_GID" != "$(id -g hermes)" ]; then
echo "Changing hermes GID to $HERMES_GID"
# -o allows non-unique GID (e.g. macOS GID 20 "staff" may already exist
# as "dialout" in the Debian-based container image)
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
groupmod -g "$HERMES_GID" hermes
fi
actual_hermes_uid=$(id -u hermes)
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
# In rootless Podman the container's "root" is mapped to an unprivileged
# host UID — chown will fail. That's fine: the volume is already owned
# by the mapped user on the host side.
chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
echo "Warning: chown failed (rootless container?) — continuing anyway"
chown -R hermes:hermes "$HERMES_HOME"
fi
echo "Dropping root privileges"
+228
View File
@@ -0,0 +1,228 @@
# Hermes Agent — ACP (Agent Client Protocol) Setup Guide
Hermes Agent supports the **Agent Client Protocol (ACP)**, allowing it to run as
a coding agent inside your editor. ACP lets your IDE send tasks to Hermes, and
Hermes responds with file edits, terminal commands, and explanations — all shown
natively in the editor UI.
---
## Prerequisites
- Hermes Agent installed and configured (`hermes setup` completed)
- An API key / provider set up in `~/.hermes/.env` or via `hermes login`
- Python 3.11+
Install the ACP extra:
```bash
pip install -e ".[acp]"
```
---
## VS Code Setup
### 1. Install the ACP Client extension
Open VS Code and install **ACP Client** from the marketplace:
- Press `Ctrl+Shift+X` (or `Cmd+Shift+X` on macOS)
- Search for **"ACP Client"**
- Click **Install**
Or install from the command line:
```bash
code --install-extension anysphere.acp-client
```
### 2. Configure settings.json
Open your VS Code settings (`Ctrl+,` → click the `{}` icon for JSON) and add:
```json
{
"acpClient.agents": [
{
"name": "hermes-agent",
"registryDir": "/path/to/hermes-agent/acp_registry"
}
]
}
```
Replace `/path/to/hermes-agent` with the actual path to your Hermes Agent
installation (e.g. `~/.hermes/hermes-agent`).
Alternatively, if `hermes` is on your PATH, the ACP Client can discover it
automatically via the registry directory.
### 3. Restart VS Code
After configuring, restart VS Code. You should see **Hermes Agent** appear in
the ACP agent picker in the chat/agent panel.
---
## Zed Setup
Zed has built-in ACP support.
### 1. Configure Zed settings
Open Zed settings (`Cmd+,` on macOS or `Ctrl+,` on Linux) and add to your
`settings.json`:
```json
{
"agent_servers": {
"hermes-agent": {
"type": "custom",
"command": "hermes",
"args": ["acp"],
},
},
}
```
### 2. Restart Zed
Hermes Agent will appear in the agent panel. Select it and start a conversation.
---
## JetBrains Setup (IntelliJ, PyCharm, WebStorm, etc.)
### 1. Install the ACP plugin
- Open **Settings****Plugins** → **Marketplace**
- Search for **"ACP"** or **"Agent Client Protocol"**
- Install and restart the IDE
### 2. Configure the agent
- Open **Settings****Tools** → **ACP Agents**
- Click **+** to add a new agent
- Set the registry directory to your `acp_registry/` folder:
`/path/to/hermes-agent/acp_registry`
- Click **OK**
### 3. Use the agent
Open the ACP panel (usually in the right sidebar) and select **Hermes Agent**.
---
## What You Will See
Once connected, your editor provides a native interface to Hermes Agent:
### Chat Panel
A conversational interface where you can describe tasks, ask questions, and
give instructions. Hermes responds with explanations and actions.
### File Diffs
When Hermes edits files, you see standard diffs in the editor. You can:
- **Accept** individual changes
- **Reject** changes you don't want
- **Review** the full diff before applying
### Terminal Commands
When Hermes needs to run shell commands (builds, tests, installs), the editor
shows them in an integrated terminal. Depending on your settings:
- Commands may run automatically
- Or you may be prompted to **approve** each command
### Approval Flow
For potentially destructive operations, the editor will prompt you for
approval before Hermes proceeds. This includes:
- File deletions
- Shell commands
- Git operations
---
## Configuration
Hermes Agent under ACP uses the **same configuration** as the CLI:
- **API keys / providers**: `~/.hermes/.env`
- **Agent config**: `~/.hermes/config.yaml`
- **Skills**: `~/.hermes/skills/`
- **Sessions**: `~/.hermes/state.db`
You can run `hermes setup` to configure providers, or edit `~/.hermes/.env`
directly.
### Changing the model
Edit `~/.hermes/config.yaml`:
```yaml
model: openrouter/nous/hermes-3-llama-3.1-70b
```
Or set the `HERMES_MODEL` environment variable.
### Toolsets
ACP sessions use the curated `hermes-acp` toolset by default. It is designed for editor workflows and intentionally excludes things like messaging delivery, cronjob management, and audio-first UX features.
---
## Troubleshooting
### Agent doesn't appear in the editor
1. **Check the registry path** — make sure the `acp_registry/` directory path
in your editor settings is correct and contains `agent.json`.
2. **Check `hermes` is on PATH** — run `which hermes` in a terminal. If not
found, you may need to activate your virtualenv or add it to PATH.
3. **Restart the editor** after changing settings.
### Agent starts but errors immediately
1. Run `hermes doctor` to check your configuration.
2. Check that you have a valid API key: `hermes status`
3. Try running `hermes acp` directly in a terminal to see error output.
### "Module not found" errors
Make sure you installed the ACP extra:
```bash
pip install -e ".[acp]"
```
### Slow responses
- ACP streams responses, so you should see incremental output. If the agent
appears stuck, check your network connection and API provider status.
- Some providers have rate limits. Try switching to a different model/provider.
### Permission denied for terminal commands
If the editor blocks terminal commands, check your ACP Client extension
settings for auto-approval or manual-approval preferences.
### Logs
Hermes logs are written to stderr when running in ACP mode. Check:
- VS Code: **Output** panel → select **ACP Client** or **Hermes Agent**
- Zed: **View****Toggle Terminal** and check the process output
- JetBrains: **Event Log** or the ACP tool window
You can also enable verbose logging:
```bash
HERMES_LOG_LEVEL=DEBUG hermes acp
```
---
## Further Reading
- [ACP Specification](https://github.com/anysphere/acp)
- [Hermes Agent Documentation](https://github.com/NousResearch/hermes-agent)
- Run `hermes --help` for all CLI options
+698
View File
@@ -0,0 +1,698 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>honcho-integration-spec</title>
<style>
:root {
--bg: #0b0e14;
--bg-surface: #11151c;
--bg-elevated: #181d27;
--bg-code: #0d1018;
--fg: #c9d1d9;
--fg-bright: #e6edf3;
--fg-muted: #6e7681;
--fg-subtle: #484f58;
--accent: #7eb8f6;
--accent-dim: #3d6ea5;
--accent-glow: rgba(126, 184, 246, 0.08);
--green: #7ee6a8;
--green-dim: #2ea04f;
--orange: #e6a855;
--red: #f47067;
--purple: #bc8cff;
--cyan: #56d4dd;
--border: #21262d;
--border-subtle: #161b22;
--radius: 6px;
--font-sans: 'New York', ui-serif, 'Iowan Old Style', 'Apple Garamond', Baskerville, 'Times New Roman', 'Noto Emoji', serif;
--font-mono: 'Departure Mono', 'Noto Emoji', monospace;
}
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
html { scroll-behavior: smooth; scroll-padding-top: 2rem; }
body {
font-family: var(--font-sans);
background: var(--bg);
color: var(--fg);
line-height: 1.7;
font-size: 15px;
-webkit-font-smoothing: antialiased;
}
.container { max-width: 860px; margin: 0 auto; padding: 3rem 2rem 6rem; }
.hero {
text-align: center;
padding: 4rem 0 3rem;
border-bottom: 1px solid var(--border);
margin-bottom: 3rem;
}
.hero h1 { font-family: var(--font-mono); font-size: 2.2rem; font-weight: 700; color: var(--fg-bright); letter-spacing: -0.03em; margin-bottom: 0.5rem; }
.hero h1 span { color: var(--accent); }
.hero .subtitle { font-family: var(--font-sans); color: var(--fg-muted); font-size: 0.92rem; max-width: 560px; margin: 0 auto; line-height: 1.6; }
.hero .meta { margin-top: 1.5rem; display: flex; justify-content: center; gap: 1.5rem; flex-wrap: wrap; }
.hero .meta span { font-size: 0.8rem; color: var(--fg-subtle); font-family: var(--font-mono); }
.toc { background: var(--bg-surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.5rem 2rem; margin-bottom: 3rem; }
.toc h2 { font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--fg-muted); margin-bottom: 1rem; }
.toc ol { list-style: none; counter-reset: toc; columns: 2; column-gap: 2rem; }
.toc li { counter-increment: toc; break-inside: avoid; margin-bottom: 0.35rem; }
.toc li::before { content: counter(toc, decimal-leading-zero) " "; color: var(--fg-subtle); font-family: var(--font-mono); font-size: 0.75rem; margin-right: 0.25rem; }
.toc a { font-family: var(--font-mono); color: var(--fg); text-decoration: none; font-size: 0.82rem; transition: color 0.15s; }
.toc a:hover { color: var(--accent); }
section { margin-bottom: 4rem; }
section + section { padding-top: 1rem; }
h2 { font-family: var(--font-mono); font-size: 1.3rem; font-weight: 700; color: var(--fg-bright); letter-spacing: -0.01em; margin-bottom: 1.25rem; padding-bottom: 0.5rem; border-bottom: 1px solid var(--border); }
h3 { font-family: var(--font-mono); font-size: 1rem; font-weight: 600; color: var(--fg-bright); margin-top: 2rem; margin-bottom: 0.75rem; }
h4 { font-family: var(--font-mono); font-size: 0.9rem; font-weight: 600; color: var(--accent); margin-top: 1.5rem; margin-bottom: 0.5rem; }
p { margin-bottom: 1rem; font-size: 0.95rem; line-height: 1.75; }
strong { color: var(--fg-bright); font-weight: 600; }
a { color: var(--accent); text-decoration: none; }
a:hover { text-decoration: underline; }
ul, ol { margin-bottom: 1rem; padding-left: 1.5rem; font-size: 0.93rem; line-height: 1.7; }
li { margin-bottom: 0.35rem; }
li::marker { color: var(--fg-subtle); }
.table-wrap { overflow-x: auto; margin-bottom: 1.5rem; }
table { width: 100%; border-collapse: collapse; font-size: 0.88rem; }
th, td { text-align: left; padding: 0.6rem 1rem; border-bottom: 1px solid var(--border-subtle); }
th { font-family: var(--font-mono); font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.06em; color: var(--fg-muted); background: var(--bg-surface); border-bottom-color: var(--border); white-space: nowrap; }
td { font-family: var(--font-sans); font-size: 0.88rem; color: var(--fg); }
tr:hover td { background: var(--accent-glow); }
td code { background: var(--bg-elevated); padding: 0.15em 0.4em; border-radius: 3px; font-family: var(--font-mono); font-size: 0.82em; color: var(--cyan); }
pre { background: var(--bg-code); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.25rem 1.5rem; overflow-x: auto; margin-bottom: 1.5rem; font-family: var(--font-mono); font-size: 0.82rem; line-height: 1.65; color: var(--fg); }
pre code { background: none; padding: 0; color: inherit; font-size: inherit; }
code { font-family: var(--font-mono); font-size: 0.85em; }
p code, li code { background: var(--bg-elevated); padding: 0.15em 0.4em; border-radius: 3px; color: var(--cyan); font-size: 0.85em; }
.kw { color: var(--purple); }
.str { color: var(--green); }
.cm { color: var(--fg-subtle); font-style: italic; }
.num { color: var(--orange); }
.key { color: var(--accent); }
.mermaid { margin: 1.5rem 0 2rem; text-align: center; }
.mermaid svg { max-width: 100%; height: auto; }
.callout { font-family: var(--font-sans); background: var(--bg-surface); border-left: 3px solid var(--accent-dim); border-radius: 0 var(--radius) var(--radius) 0; padding: 1rem 1.25rem; margin-bottom: 1.5rem; font-size: 0.88rem; color: var(--fg-muted); line-height: 1.6; }
.callout strong { font-family: var(--font-mono); color: var(--fg-bright); }
.callout.success { border-left-color: var(--green-dim); }
.callout.warn { border-left-color: var(--orange); }
.badge { display: inline-block; font-family: var(--font-mono); font-size: 0.65rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; padding: 0.2em 0.6em; border-radius: 3px; vertical-align: middle; margin-left: 0.4rem; }
.badge-done { background: var(--green-dim); color: #fff; }
.badge-wip { background: var(--orange); color: #0b0e14; }
.badge-todo { background: var(--fg-subtle); color: var(--fg); }
.checklist { list-style: none; padding-left: 0; }
.checklist li { padding-left: 1.5rem; position: relative; margin-bottom: 0.5rem; }
.checklist li::before { position: absolute; left: 0; font-family: var(--font-mono); font-size: 0.85rem; }
.checklist li.done { color: var(--fg-muted); }
.checklist li.done::before { content: "\2713"; color: var(--green); }
.checklist li.todo::before { content: "\25CB"; color: var(--fg-subtle); }
.checklist li.wip::before { content: "\25D4"; color: var(--orange); }
.compare { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-bottom: 2rem; }
.compare-card { background: var(--bg-surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.25rem; }
.compare-card h4 { margin-top: 0; font-size: 0.82rem; }
.compare-card.after { border-color: var(--accent-dim); }
.compare-card ul { font-family: var(--font-mono); padding-left: 1.25rem; font-size: 0.8rem; }
hr { border: none; border-top: 1px solid var(--border); margin: 3rem 0; }
.progress-bar { position: fixed; top: 0; left: 0; height: 2px; background: var(--accent); z-index: 999; transition: width 0.1s linear; }
@media (max-width: 640px) {
.container { padding: 2rem 1rem 4rem; }
.hero h1 { font-size: 1.6rem; }
.toc ol { columns: 1; }
.compare { grid-template-columns: 1fr; }
table { font-size: 0.8rem; }
th, td { padding: 0.4rem 0.6rem; }
}
</style>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Noto+Emoji&display=swap" rel="stylesheet">
<style>
@font-face {
font-family: 'Departure Mono';
src: url('https://cdn.jsdelivr.net/gh/rektdeckard/departure-mono@latest/fonts/DepartureMono-Regular.woff2') format('woff2');
font-weight: normal;
font-style: normal;
font-display: swap;
}
</style>
</head>
<body>
<div class="progress-bar" id="progress"></div>
<div class="container">
<header class="hero">
<h1>honcho<span>-integration-spec</span></h1>
<p class="subtitle">Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations.</p>
<div class="meta">
<span>hermes-agent / openclaw-honcho</span>
<span>Python + TypeScript</span>
<span>2026-03-09</span>
</div>
</header>
<nav class="toc">
<h2>Contents</h2>
<ol>
<li><a href="#overview">Overview</a></li>
<li><a href="#architecture">Architecture comparison</a></li>
<li><a href="#diff-table">Diff table</a></li>
<li><a href="#patterns">Hermes patterns to port</a></li>
<li><a href="#spec-async">Spec: async prefetch</a></li>
<li><a href="#spec-reasoning">Spec: dynamic reasoning level</a></li>
<li><a href="#spec-modes">Spec: per-peer memory modes</a></li>
<li><a href="#spec-identity">Spec: AI peer identity formation</a></li>
<li><a href="#spec-sessions">Spec: session naming strategies</a></li>
<li><a href="#spec-cli">Spec: CLI surface injection</a></li>
<li><a href="#openclaw-checklist">openclaw-honcho checklist</a></li>
<li><a href="#nanobot-checklist">nanobot-honcho checklist</a></li>
</ol>
</nav>
<!-- OVERVIEW -->
<section id="overview">
<h2>Overview</h2>
<p>Two independent Honcho integrations have been built for two different agent runtimes: <strong>Hermes Agent</strong> (Python, baked into the runner) and <strong>openclaw-honcho</strong> (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, <code>session.context()</code>, <code>peer.chat()</code> — but they made different tradeoffs at every layer.</p>
<p>This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language.</p>
<div class="callout">
<strong>Scope</strong> Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive.
</div>
</section>
<!-- ARCHITECTURE -->
<section id="architecture">
<h2>Architecture comparison</h2>
<h3>Hermes: baked-in runner</h3>
<p>Honcho is initialised directly inside <code>AIAgent.__init__</code>. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into <code>_cached_system_prompt</code>) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider.</p>
<div class="mermaid">
%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%%
flowchart TD
U["user message"] --> P["_honcho_prefetch()<br/>(reads cache — no HTTP)"]
P --> SP["_build_system_prompt()<br/>(first turn only, cached)"]
SP --> LLM["LLM call"]
LLM --> R["response"]
R --> FP["_honcho_fire_prefetch()<br/>(daemon threads, turn end)"]
FP --> C1["prefetch_context() thread"]
FP --> C2["prefetch_dialectic() thread"]
C1 --> CACHE["_context_cache / _dialectic_cache"]
C2 --> CACHE
style U fill:#162030,stroke:#3d6ea5,color:#c9d1d9
style P fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
style SP fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
style LLM fill:#162030,stroke:#3d6ea5,color:#c9d1d9
style R fill:#162030,stroke:#3d6ea5,color:#c9d1d9
style FP fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
style C1 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
style C2 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
style CACHE fill:#11151c,stroke:#484f58,color:#6e7681
</div>
<h3>openclaw-honcho: hook-based plugin</h3>
<p>The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside <code>before_prompt_build</code> on every turn. Message capture happens in <code>agent_end</code>. The multi-agent hierarchy is tracked via <code>subagent_spawned</code>. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin.</p>
<div class="mermaid">
%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%%
flowchart TD
U2["user message"] --> BPB["before_prompt_build<br/>(BLOCKING HTTP — every turn)"]
BPB --> CTX["session.context()"]
CTX --> SP2["system prompt assembled"]
SP2 --> LLM2["LLM call"]
LLM2 --> R2["response"]
R2 --> AE["agent_end hook"]
AE --> SAVE["session.addMessages()<br/>session.setMetadata()"]
style U2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
style BPB fill:#3a1515,stroke:#f47067,color:#c9d1d9
style CTX fill:#3a1515,stroke:#f47067,color:#c9d1d9
style SP2 fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
style LLM2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
style R2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
style AE fill:#162030,stroke:#3d6ea5,color:#c9d1d9
style SAVE fill:#11151c,stroke:#484f58,color:#6e7681
</div>
</section>
<!-- DIFF TABLE -->
<section id="diff-table">
<h2>Diff table</h2>
<div class="table-wrap">
<table>
<thead>
<tr>
<th>Dimension</th>
<th>Hermes Agent</th>
<th>openclaw-honcho</th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>Context injection timing</strong></td>
<td>Once per session (cached). Zero HTTP on response path after turn 1.</td>
<td>Every turn, blocking. Fresh context per turn but adds latency.</td>
</tr>
<tr>
<td><strong>Prefetch strategy</strong></td>
<td>Daemon threads fire at turn end; consumed next turn from cache.</td>
<td>None. Blocking call at prompt-build time.</td>
</tr>
<tr>
<td><strong>Dialectic (peer.chat)</strong></td>
<td>Prefetched async; result injected into system prompt next turn.</td>
<td>On-demand via <code>honcho_recall</code> / <code>honcho_analyze</code> tools.</td>
</tr>
<tr>
<td><strong>Reasoning level</strong></td>
<td>Dynamic: scales with message length. Floor = config default. Cap = "high".</td>
<td>Fixed per tool: recall=minimal, analyze=medium.</td>
</tr>
<tr>
<td><strong>Memory modes</strong></td>
<td><code>user_memory_mode</code> / <code>agent_memory_mode</code>: hybrid / honcho / local.</td>
<td>None. Always writes to Honcho.</td>
</tr>
<tr>
<td><strong>Write frequency</strong></td>
<td>async (background queue), turn, session, N turns.</td>
<td>After every agent_end (no control).</td>
</tr>
<tr>
<td><strong>AI peer identity</strong></td>
<td><code>observe_me=True</code>, <code>seed_ai_identity()</code>, <code>get_ai_representation()</code>, SOUL.md → AI peer.</td>
<td>Agent files uploaded to agent peer at setup. No ongoing self-observation seeding.</td>
</tr>
<tr>
<td><strong>Context scope</strong></td>
<td>User peer + AI peer representation, both injected.</td>
<td>User peer (owner) representation + conversation summary. <code>peerPerspective</code> on context call.</td>
</tr>
<tr>
<td><strong>Session naming</strong></td>
<td>per-directory / global / manual map / title-based.</td>
<td>Derived from platform session key.</td>
</tr>
<tr>
<td><strong>Multi-agent</strong></td>
<td>Single-agent only.</td>
<td>Parent observer hierarchy via <code>subagent_spawned</code>.</td>
</tr>
<tr>
<td><strong>Tool surface</strong></td>
<td>Single <code>query_user_context</code> tool (on-demand dialectic).</td>
<td>6 tools: session, profile, search, context (fast) + recall, analyze (LLM).</td>
</tr>
<tr>
<td><strong>Platform metadata</strong></td>
<td>Not stripped.</td>
<td>Explicitly stripped before Honcho storage.</td>
</tr>
<tr>
<td><strong>Message dedup</strong></td>
<td>None (sends on every save cycle).</td>
<td><code>lastSavedIndex</code> in session metadata prevents re-sending.</td>
</tr>
<tr>
<td><strong>CLI surface in prompt</strong></td>
<td>Management commands injected into system prompt. Agent knows its own CLI.</td>
<td>Not injected.</td>
</tr>
<tr>
<td><strong>AI peer name in identity</strong></td>
<td>Replaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured.</td>
<td>Not implemented.</td>
</tr>
<tr>
<td><strong>QMD / local file search</strong></td>
<td>Not implemented.</td>
<td>Passthrough tools when QMD backend configured.</td>
</tr>
<tr>
<td><strong>Workspace metadata</strong></td>
<td>Not implemented.</td>
<td><code>agentPeerMap</code> in workspace metadata tracks agent&#8594;peer ID.</td>
</tr>
</tbody>
</table>
</div>
</section>
<!-- PATTERNS -->
<section id="patterns">
<h2>Hermes patterns to port</h2>
<p>Six patterns from Hermes are worth adopting in any Honcho integration. They are described below as integration-agnostic interfaces — the implementation will differ per runtime, but the contract is the same.</p>
<div class="compare">
<div class="compare-card">
<h4>Patterns Hermes contributes</h4>
<ul>
<li>Async prefetch (zero-latency)</li>
<li>Dynamic reasoning level</li>
<li>Per-peer memory modes</li>
<li>AI peer identity formation</li>
<li>Session naming strategies</li>
<li>CLI surface injection</li>
</ul>
</div>
<div class="compare-card after">
<h4>Patterns openclaw contributes back</h4>
<ul>
<li>lastSavedIndex dedup</li>
<li>Platform metadata stripping</li>
<li>Multi-agent observer hierarchy</li>
<li>peerPerspective on context()</li>
<li>Tiered tool surface (fast/LLM)</li>
<li>Workspace agentPeerMap</li>
</ul>
</div>
</div>
</section>
<!-- SPEC: ASYNC PREFETCH -->
<section id="spec-async">
<h2>Spec: async prefetch</h2>
<h3>Problem</h3>
<p>Calling <code>session.context()</code> and <code>peer.chat()</code> synchronously before each LLM call adds 200800ms of Honcho round-trip latency to every turn. Users experience this as the agent "thinking slowly."</p>
<h3>Pattern</h3>
<p>Fire both calls as non-blocking background work at the <strong>end</strong> of each turn. Store results in a per-session cache keyed by session ID. At the <strong>start</strong> of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path.</p>
<h3>Interface contract</h3>
<pre><code><span class="cm">// TypeScript (openclaw / nanobot plugin shape)</span>
<span class="kw">interface</span> <span class="key">AsyncPrefetch</span> {
<span class="cm">// Fire context + dialectic fetches at turn end. Non-blocking.</span>
firePrefetch(sessionId: <span class="str">string</span>, userMessage: <span class="str">string</span>): <span class="kw">void</span>;
<span class="cm">// Pop cached results at turn start. Returns empty if cache is cold.</span>
popContextResult(sessionId: <span class="str">string</span>): ContextResult | <span class="kw">null</span>;
popDialecticResult(sessionId: <span class="str">string</span>): <span class="str">string</span> | <span class="kw">null</span>;
}
<span class="kw">type</span> <span class="key">ContextResult</span> = {
representation: <span class="str">string</span>;
card: <span class="str">string</span>[];
aiRepresentation?: <span class="str">string</span>; <span class="cm">// AI peer context if enabled</span>
summary?: <span class="str">string</span>; <span class="cm">// conversation summary if fetched</span>
};</code></pre>
<h3>Implementation notes</h3>
<ul>
<li>Python: <code>threading.Thread(daemon=True)</code>. Write to <code>dict[session_id, result]</code> — GIL makes this safe for simple writes.</li>
<li>TypeScript: <code>Promise</code> stored in <code>Map&lt;string, Promise&lt;ContextResult&gt;&gt;</code>. Await at pop time. If not resolved yet, skip (return null) — do not block.</li>
<li>The pop is destructive: clears the cache entry after reading so stale data never accumulates.</li>
<li>Prefetch should also fire on first turn (even though it won't be consumed until turn 2) — this ensures turn 2 is never cold.</li>
</ul>
<h3>openclaw-honcho adoption</h3>
<p>Move <code>session.context()</code> from <code>before_prompt_build</code> to a post-<code>agent_end</code> background task. Store result in <code>state.contextCache</code>. In <code>before_prompt_build</code>, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn.</p>
</section>
<!-- SPEC: DYNAMIC REASONING LEVEL -->
<section id="spec-reasoning">
<h2>Spec: dynamic reasoning level</h2>
<h3>Problem</h3>
<p>Honcho's dialectic endpoint supports reasoning levels from <code>minimal</code> to <code>max</code>. A fixed level per tool wastes budget on simple queries and under-serves complex ones.</p>
<h3>Pattern</h3>
<p>Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at <code>high</code> — never select <code>max</code> automatically.</p>
<h3>Interface contract</h3>
<pre><code><span class="cm">// Shared helper — identical logic in any language</span>
<span class="kw">const</span> LEVELS = [<span class="str">"minimal"</span>, <span class="str">"low"</span>, <span class="str">"medium"</span>, <span class="str">"high"</span>, <span class="str">"max"</span>];
<span class="kw">function</span> <span class="key">dynamicReasoningLevel</span>(
query: <span class="str">string</span>,
configDefault: <span class="str">string</span> = <span class="str">"low"</span>
): <span class="str">string</span> {
<span class="kw">const</span> baseIdx = Math.max(<span class="num">0</span>, LEVELS.indexOf(configDefault));
<span class="kw">const</span> n = query.length;
<span class="kw">const</span> bump = n &lt; <span class="num">120</span> ? <span class="num">0</span> : n &lt; <span class="num">400</span> ? <span class="num">1</span> : <span class="num">2</span>;
<span class="kw">return</span> LEVELS[Math.min(baseIdx + bump, <span class="num">3</span>)]; <span class="cm">// cap at "high" (idx 3)</span>
}</code></pre>
<h3>Config key</h3>
<p>Add a <code>dialecticReasoningLevel</code> config field (string, default <code>"low"</code>). This sets the floor. Users can raise or lower it. The dynamic bump always applies on top.</p>
<h3>openclaw-honcho adoption</h3>
<p>Apply in <code>honcho_recall</code> and <code>honcho_analyze</code>: replace the fixed <code>reasoningLevel</code> with the dynamic selector. <code>honcho_recall</code> should use floor <code>"minimal"</code> and <code>honcho_analyze</code> floor <code>"medium"</code> — both still bump with message length.</p>
</section>
<!-- SPEC: PER-PEER MEMORY MODES -->
<section id="spec-modes">
<h2>Spec: per-peer memory modes</h2>
<h3>Problem</h3>
<p>Users want independent control over whether user context and agent context are written locally, to Honcho, or both. A single <code>memoryMode</code> shorthand is not granular enough.</p>
<h3>Pattern</h3>
<p>Three modes per peer: <code>hybrid</code> (write both local + Honcho), <code>honcho</code> (Honcho only, disable local files), <code>local</code> (local files only, skip Honcho sync for this peer). Two orthogonal axes: user peer and agent peer.</p>
<h3>Config schema</h3>
<pre><code><span class="cm">// ~/.openclaw/openclaw.json (or ~/.nanobot/config.json)</span>
{
<span class="str">"plugins"</span>: {
<span class="str">"openclaw-honcho"</span>: {
<span class="str">"config"</span>: {
<span class="str">"apiKey"</span>: <span class="str">"..."</span>,
<span class="str">"memoryMode"</span>: <span class="str">"hybrid"</span>, <span class="cm">// shorthand: both peers</span>
<span class="str">"userMemoryMode"</span>: <span class="str">"honcho"</span>, <span class="cm">// override for user peer</span>
<span class="str">"agentMemoryMode"</span>: <span class="str">"hybrid"</span> <span class="cm">// override for agent peer</span>
}
}
}
}</code></pre>
<h3>Resolution order</h3>
<ol>
<li>Per-peer field (<code>userMemoryMode</code> / <code>agentMemoryMode</code>) — wins if present.</li>
<li>Shorthand <code>memoryMode</code> — applies to both peers as default.</li>
<li>Hardcoded default: <code>"hybrid"</code>.</li>
</ol>
<h3>Effect on Honcho sync</h3>
<ul>
<li><code>userMemoryMode=local</code>: skip adding user peer messages to Honcho.</li>
<li><code>agentMemoryMode=local</code>: skip adding assistant peer messages to Honcho.</li>
<li>Both local: skip <code>session.addMessages()</code> entirely.</li>
<li><code>userMemoryMode=honcho</code>: disable local USER.md writes.</li>
<li><code>agentMemoryMode=honcho</code>: disable local MEMORY.md / SOUL.md writes.</li>
</ul>
</section>
<!-- SPEC: AI PEER IDENTITY -->
<section id="spec-identity">
<h2>Spec: AI peer identity formation</h2>
<h3>Problem</h3>
<p>Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if <code>observe_me=True</code> is set for the agent peer. Without it, the agent peer accumulates nothing and Honcho's AI-side model never forms.</p>
<p>Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation, rather than waiting for it to emerge from scratch.</p>
<h3>Part A: observe_me=True for agent peer</h3>
<pre><code><span class="cm">// TypeScript — in session.addPeers() call</span>
<span class="kw">await</span> session.addPeers([
[ownerPeer.id, { observeMe: <span class="kw">true</span>, observeOthers: <span class="kw">false</span> }],
[agentPeer.id, { observeMe: <span class="kw">true</span>, observeOthers: <span class="kw">true</span> }], <span class="cm">// was false</span>
]);</code></pre>
<p>This is a one-line change but foundational. Without it, Honcho's AI peer representation stays empty regardless of what the agent says.</p>
<h3>Part B: seedAiIdentity()</h3>
<pre><code><span class="kw">async function</span> <span class="key">seedAiIdentity</span>(
session: HonchoSession,
agentPeer: Peer,
content: <span class="str">string</span>,
source: <span class="str">string</span>
): Promise&lt;<span class="kw">boolean</span>&gt; {
<span class="kw">const</span> wrapped = [
<span class="str">`&lt;ai_identity_seed&gt;`</span>,
<span class="str">`&lt;source&gt;${source}&lt;/source&gt;`</span>,
<span class="str">``</span>,
content.trim(),
<span class="str">`&lt;/ai_identity_seed&gt;`</span>,
].join(<span class="str">"\n"</span>);
<span class="kw">await</span> agentPeer.addMessage(<span class="str">"assistant"</span>, wrapped);
<span class="kw">return true</span>;
}</code></pre>
<h3>Part C: migrate agent files at setup</h3>
<p>During <code>openclaw honcho setup</code>, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md, BOOTSTRAP.md) to the agent peer using <code>seedAiIdentity()</code> instead of <code>session.uploadFile()</code>. This routes the content through Honcho's observation pipeline rather than the file store.</p>
<h3>Part D: AI peer name in identity</h3>
<p>When the agent has a configured name (non-default), inject it into the agent's self-identity prefix. In OpenClaw this means adding to the injected system prompt section:</p>
<pre><code><span class="cm">// In context hook return value</span>
<span class="kw">return</span> {
systemPrompt: [
agentName ? <span class="str">`You are ${agentName}.`</span> : <span class="str">""</span>,
<span class="str">"## User Memory Context"</span>,
...sections,
].filter(Boolean).join(<span class="str">"\n\n"</span>)
};</code></pre>
<h3>CLI surface: honcho identity subcommand</h3>
<pre><code>openclaw honcho identity &lt;file&gt; <span class="cm"># seed from file</span>
openclaw honcho identity --show <span class="cm"># show current AI peer representation</span></code></pre>
</section>
<!-- SPEC: SESSION NAMING -->
<section id="spec-sessions">
<h2>Spec: session naming strategies</h2>
<h3>Problem</h3>
<p>When Honcho is used across multiple projects or directories, a single global session means every project shares the same context. Per-directory sessions provide isolation without requiring users to name sessions manually.</p>
<h3>Strategies</h3>
<div class="table-wrap">
<table>
<thead><tr><th>Strategy</th><th>Session key</th><th>When to use</th></tr></thead>
<tbody>
<tr><td><code>per-directory</code></td><td>basename of CWD</td><td>Default. Each project gets its own session.</td></tr>
<tr><td><code>global</code></td><td>fixed string <code>"global"</code></td><td>Single cross-project session.</td></tr>
<tr><td>manual map</td><td>user-configured per path</td><td><code>sessions</code> config map overrides directory basename.</td></tr>
<tr><td>title-based</td><td>sanitized session title</td><td>When agent supports named sessions; title set mid-conversation.</td></tr>
</tbody>
</table>
</div>
<h3>Config schema</h3>
<pre><code>{
<span class="str">"sessionStrategy"</span>: <span class="str">"per-directory"</span>, <span class="cm">// "per-directory" | "global"</span>
<span class="str">"sessionPeerPrefix"</span>: <span class="kw">false</span>, <span class="cm">// prepend peer name to session key</span>
<span class="str">"sessions"</span>: { <span class="cm">// manual overrides</span>
<span class="str">"/home/user/projects/foo"</span>: <span class="str">"foo-project"</span>
}
}</code></pre>
<h3>CLI surface</h3>
<pre><code>openclaw honcho sessions <span class="cm"># list all mappings</span>
openclaw honcho map &lt;name&gt; <span class="cm"># map cwd to session name</span>
openclaw honcho map <span class="cm"># no-arg = list mappings</span></code></pre>
<p>Resolution order: manual map wins &rarr; session title &rarr; directory basename &rarr; platform key.</p>
</section>
<!-- SPEC: CLI SURFACE INJECTION -->
<section id="spec-cli">
<h2>Spec: CLI surface injection</h2>
<h3>Problem</h3>
<p>When a user asks "how do I change my memory settings?" or "what Honcho commands are available?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface.</p>
<h3>Pattern</h3>
<p>When Honcho is active, append a compact command reference to the system prompt. The agent can cite these commands directly instead of guessing.</p>
<pre><code><span class="cm">// In context hook, append to systemPrompt</span>
<span class="kw">const</span> honchoSection = [
<span class="str">"# Honcho memory integration"</span>,
<span class="str">`Active. Session: ${sessionKey}. Mode: ${mode}.`</span>,
<span class="str">"Management commands:"</span>,
<span class="str">" openclaw honcho status — show config + connection"</span>,
<span class="str">" openclaw honcho mode [hybrid|honcho|local] — show or set memory mode"</span>,
<span class="str">" openclaw honcho sessions — list session mappings"</span>,
<span class="str">" openclaw honcho map &lt;name&gt; — map directory to session"</span>,
<span class="str">" openclaw honcho identity [file] [--show] — seed or show AI identity"</span>,
<span class="str">" openclaw honcho setup — full interactive wizard"</span>,
].join(<span class="str">"\n"</span>);</code></pre>
<div class="callout warn">
<strong>Keep it compact.</strong> This section is injected every turn. Keep it under 300 chars of context. List commands, not explanations — the agent can explain them on request.
</div>
</section>
<!-- OPENCLAW CHECKLIST -->
<section id="openclaw-checklist">
<h2>openclaw-honcho checklist</h2>
<p>Ordered by impact. Each item maps to a spec section above.</p>
<ul class="checklist">
<li class="todo"><strong>Async prefetch</strong> — move <code>session.context()</code> out of <code>before_prompt_build</code> into post-<code>agent_end</code> background Promise. Pop from cache at prompt build. (<a href="#spec-async">spec</a>)</li>
<li class="todo"><strong>observe_me=True for agent peer</strong> — one-line change in <code>session.addPeers()</code> config for agent peer. (<a href="#spec-identity">spec</a>)</li>
<li class="todo"><strong>Dynamic reasoning level</strong> — add <code>dynamicReasoningLevel()</code> helper; apply in <code>honcho_recall</code> and <code>honcho_analyze</code>. Add <code>dialecticReasoningLevel</code> to config schema. (<a href="#spec-reasoning">spec</a>)</li>
<li class="todo"><strong>Per-peer memory modes</strong> — add <code>userMemoryMode</code> / <code>agentMemoryMode</code> to config; gate Honcho sync and local writes accordingly. (<a href="#spec-modes">spec</a>)</li>
<li class="todo"><strong>seedAiIdentity()</strong> — add helper; apply during setup migration for SOUL.md / IDENTITY.md instead of <code>session.uploadFile()</code>. (<a href="#spec-identity">spec</a>)</li>
<li class="todo"><strong>Session naming strategies</strong> — add <code>sessionStrategy</code>, <code>sessions</code> map, <code>sessionPeerPrefix</code> to config; implement resolution function. (<a href="#spec-sessions">spec</a>)</li>
<li class="todo"><strong>CLI surface injection</strong> — append command reference to <code>before_prompt_build</code> return value when Honcho is active. (<a href="#spec-cli">spec</a>)</li>
<li class="todo"><strong>honcho identity subcommand</strong> — add <code>openclaw honcho identity</code> CLI command. (<a href="#spec-identity">spec</a>)</li>
<li class="todo"><strong>AI peer name injection</strong> — if <code>aiPeer</code> name configured, prepend to injected system prompt. (<a href="#spec-identity">spec</a>)</li>
<li class="todo"><strong>honcho mode / honcho sessions / honcho map</strong> — CLI parity with Hermes. (<a href="#spec-sessions">spec</a>)</li>
</ul>
<div class="callout success">
<strong>Already done in openclaw-honcho (do not re-implement):</strong> lastSavedIndex dedup, platform metadata stripping, multi-agent parent observer hierarchy, peerPerspective on context(), tiered tool surface (fast/LLM), workspace agentPeerMap, QMD passthrough, self-hosted Honcho support.
</div>
</section>
<!-- NANOBOT CHECKLIST -->
<section id="nanobot-checklist">
<h2>nanobot-honcho checklist</h2>
<p>nanobot-honcho is a greenfield integration. Start from openclaw-honcho's architecture (hook-based, dual peer) and apply all Hermes patterns from day one rather than retrofitting. Priority order:</p>
<h3>Phase 1 — core correctness</h3>
<ul class="checklist">
<li class="todo">Dual peer model (owner + agent peer), both with <code>observe_me=True</code></li>
<li class="todo">Message capture at turn end with <code>lastSavedIndex</code> dedup</li>
<li class="todo">Platform metadata stripping before Honcho storage</li>
<li class="todo">Async prefetch from day one — do not implement blocking context injection</li>
<li class="todo">Legacy file migration at first activation (USER.md → owner peer, SOUL.md → <code>seedAiIdentity()</code>)</li>
</ul>
<h3>Phase 2 — configuration</h3>
<ul class="checklist">
<li class="todo">Config schema: <code>apiKey</code>, <code>workspaceId</code>, <code>baseUrl</code>, <code>memoryMode</code>, <code>userMemoryMode</code>, <code>agentMemoryMode</code>, <code>dialecticReasoningLevel</code>, <code>sessionStrategy</code>, <code>sessions</code></li>
<li class="todo">Per-peer memory mode gating</li>
<li class="todo">Dynamic reasoning level</li>
<li class="todo">Session naming strategies</li>
</ul>
<h3>Phase 3 — tools and CLI</h3>
<ul class="checklist">
<li class="todo">Tool surface: <code>honcho_profile</code>, <code>honcho_recall</code>, <code>honcho_analyze</code>, <code>honcho_search</code>, <code>honcho_context</code></li>
<li class="todo">CLI: <code>setup</code>, <code>status</code>, <code>sessions</code>, <code>map</code>, <code>mode</code>, <code>identity</code></li>
<li class="todo">CLI surface injection into system prompt</li>
<li class="todo">AI peer name wired into agent identity</li>
</ul>
</section>
</div>
<script type="module">
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
mermaid.initialize({ startOnLoad: true, securityLevel: 'loose', fontFamily: 'Departure Mono, Noto Emoji, monospace' });
</script>
<script>
window.addEventListener('scroll', () => {
const bar = document.getElementById('progress');
const max = document.documentElement.scrollHeight - window.innerHeight;
bar.style.width = (max > 0 ? (window.scrollY / max) * 100 : 0) + '%';
});
</script>
</body>
</html>
+377
View File
@@ -0,0 +1,377 @@
# honcho-integration-spec
Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations.
---
## Overview
Two independent Honcho integrations have been built for two different agent runtimes: **Hermes Agent** (Python, baked into the runner) and **openclaw-honcho** (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, `session.context()`, `peer.chat()` — but they made different tradeoffs at every layer.
This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language.
> **Scope** Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive.
---
## Architecture comparison
### Hermes: baked-in runner
Honcho is initialised directly inside `AIAgent.__init__`. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into `_cached_system_prompt`) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider.
Turn flow:
```
user message
→ _honcho_prefetch() (reads cache — no HTTP)
→ _build_system_prompt() (first turn only, cached)
→ LLM call
→ response
→ _honcho_fire_prefetch() (daemon threads, turn end)
→ prefetch_context() thread ──┐
→ prefetch_dialectic() thread ─┴→ _context_cache / _dialectic_cache
```
### openclaw-honcho: hook-based plugin
The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside `before_prompt_build` on every turn. Message capture happens in `agent_end`. The multi-agent hierarchy is tracked via `subagent_spawned`. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin.
Turn flow:
```
user message
→ before_prompt_build (BLOCKING HTTP — every turn)
→ session.context()
→ system prompt assembled
→ LLM call
→ response
→ agent_end hook
→ session.addMessages()
→ session.setMetadata()
```
---
## Diff table
| Dimension | Hermes Agent | openclaw-honcho |
|---|---|---|
| **Context injection timing** | Once per session (cached). Zero HTTP on response path after turn 1. | Every turn, blocking. Fresh context per turn but adds latency. |
| **Prefetch strategy** | Daemon threads fire at turn end; consumed next turn from cache. | None. Blocking call at prompt-build time. |
| **Dialectic (peer.chat)** | Prefetched async; result injected into system prompt next turn. | On-demand via `honcho_recall` / `honcho_analyze` tools. |
| **Reasoning level** | Dynamic: scales with message length. Floor = config default. Cap = "high". | Fixed per tool: recall=minimal, analyze=medium. |
| **Memory modes** | `user_memory_mode` / `agent_memory_mode`: hybrid / honcho / local. | None. Always writes to Honcho. |
| **Write frequency** | async (background queue), turn, session, N turns. | After every agent_end (no control). |
| **AI peer identity** | `observe_me=True`, `seed_ai_identity()`, `get_ai_representation()`, SOUL.md → AI peer. | Agent files uploaded to agent peer at setup. No ongoing self-observation. |
| **Context scope** | User peer + AI peer representation, both injected. | User peer (owner) representation + conversation summary. `peerPerspective` on context call. |
| **Session naming** | per-directory / global / manual map / title-based. | Derived from platform session key. |
| **Multi-agent** | Single-agent only. | Parent observer hierarchy via `subagent_spawned`. |
| **Tool surface** | Single `query_user_context` tool (on-demand dialectic). | 6 tools: session, profile, search, context (fast) + recall, analyze (LLM). |
| **Platform metadata** | Not stripped. | Explicitly stripped before Honcho storage. |
| **Message dedup** | None. | `lastSavedIndex` in session metadata prevents re-sending. |
| **CLI surface in prompt** | Management commands injected into system prompt. Agent knows its own CLI. | Not injected. |
| **AI peer name in identity** | Replaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured. | Not implemented. |
| **QMD / local file search** | Not implemented. | Passthrough tools when QMD backend configured. |
| **Workspace metadata** | Not implemented. | `agentPeerMap` in workspace metadata tracks agent→peer ID. |
---
## Patterns
Six patterns from Hermes are worth adopting in any Honcho integration. Each is described as an integration-agnostic interface.
**Hermes contributes:**
- Async prefetch (zero-latency)
- Dynamic reasoning level
- Per-peer memory modes
- AI peer identity formation
- Session naming strategies
- CLI surface injection
**openclaw-honcho contributes back (Hermes should adopt):**
- `lastSavedIndex` dedup
- Platform metadata stripping
- Multi-agent observer hierarchy
- `peerPerspective` on `context()`
- Tiered tool surface (fast/LLM)
- Workspace `agentPeerMap`
---
## Spec: async prefetch
### Problem
Calling `session.context()` and `peer.chat()` synchronously before each LLM call adds 200800ms of Honcho round-trip latency to every turn.
### Pattern
Fire both calls as non-blocking background work at the **end** of each turn. Store results in a per-session cache keyed by session ID. At the **start** of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path.
### Interface contract
```typescript
interface AsyncPrefetch {
// Fire context + dialectic fetches at turn end. Non-blocking.
firePrefetch(sessionId: string, userMessage: string): void;
// Pop cached results at turn start. Returns empty if cache is cold.
popContextResult(sessionId: string): ContextResult | null;
popDialecticResult(sessionId: string): string | null;
}
type ContextResult = {
representation: string;
card: string[];
aiRepresentation?: string; // AI peer context if enabled
summary?: string; // conversation summary if fetched
};
```
### Implementation notes
- **Python:** `threading.Thread(daemon=True)`. Write to `dict[session_id, result]` — GIL makes this safe for simple writes.
- **TypeScript:** `Promise` stored in `Map<string, Promise<ContextResult>>`. Await at pop time. If not resolved yet, return null — do not block.
- The pop is destructive: clears the cache entry after reading so stale data never accumulates.
- Prefetch should also fire on first turn (even though it won't be consumed until turn 2).
### openclaw-honcho adoption
Move `session.context()` from `before_prompt_build` to a post-`agent_end` background task. Store result in `state.contextCache`. In `before_prompt_build`, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn.
---
## Spec: dynamic reasoning level
### Problem
Honcho's dialectic endpoint supports reasoning levels from `minimal` to `max`. A fixed level per tool wastes budget on simple queries and under-serves complex ones.
### Pattern
Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at `high` — never select `max` automatically.
### Logic
```
< 120 chars → default (typically "low")
120400 chars → one level above default (cap at "high")
> 400 chars → two levels above default (cap at "high")
```
### Config key
Add `dialecticReasoningLevel` (string, default `"low"`). This sets the floor. The dynamic bump always applies on top.
### openclaw-honcho adoption
Apply in `honcho_recall` and `honcho_analyze`: replace fixed `reasoningLevel` with the dynamic selector. `honcho_recall` uses floor `"minimal"`, `honcho_analyze` uses floor `"medium"` — both still bump with message length.
---
## Spec: per-peer memory modes
### Problem
Users want independent control over whether user context and agent context are written locally, to Honcho, or both.
### Modes
| Mode | Effect |
|---|---|
| `hybrid` | Write to both local files and Honcho (default) |
| `honcho` | Honcho only — disable corresponding local file writes |
| `local` | Local files only — skip Honcho sync for this peer |
### Config schema
```json
{
"memoryMode": "hybrid",
"userMemoryMode": "honcho",
"agentMemoryMode": "hybrid"
}
```
Resolution order: per-peer field wins → shorthand `memoryMode` → default `"hybrid"`.
### Effect on Honcho sync
- `userMemoryMode=local`: skip adding user peer messages to Honcho
- `agentMemoryMode=local`: skip adding assistant peer messages to Honcho
- Both local: skip `session.addMessages()` entirely
- `userMemoryMode=honcho`: disable local USER.md writes
- `agentMemoryMode=honcho`: disable local MEMORY.md / SOUL.md writes
---
## Spec: AI peer identity formation
### Problem
Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if `observe_me=True` is set for the agent peer. Without it, the agent peer accumulates nothing.
Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation.
### Part A: observe_me=True for agent peer
```typescript
await session.addPeers([
[ownerPeer.id, { observeMe: true, observeOthers: false }],
[agentPeer.id, { observeMe: true, observeOthers: true }], // was false
]);
```
One-line change. Foundational. Without it, the AI peer representation stays empty regardless of what the agent says.
### Part B: seedAiIdentity()
```typescript
async function seedAiIdentity(
agentPeer: Peer,
content: string,
source: string
): Promise<boolean> {
const wrapped = [
`<ai_identity_seed>`,
`<source>${source}</source>`,
``,
content.trim(),
`</ai_identity_seed>`,
].join("\n");
await agentPeer.addMessage("assistant", wrapped);
return true;
}
```
### Part C: migrate agent files at setup
During `honcho setup`, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md) to the agent peer via `seedAiIdentity()` instead of `session.uploadFile()`. This routes content through Honcho's observation pipeline.
### Part D: AI peer name in identity
When the agent has a configured name, prepend it to the injected system prompt:
```typescript
const namePrefix = agentName ? `You are ${agentName}.\n\n` : "";
return { systemPrompt: namePrefix + "## User Memory Context\n\n" + sections };
```
### CLI surface
```
honcho identity <file> # seed from file
honcho identity --show # show current AI peer representation
```
---
## Spec: session naming strategies
### Problem
A single global session means every project shares the same Honcho context. Per-directory sessions provide isolation without requiring users to name sessions manually.
### Strategies
| Strategy | Session key | When to use |
|---|---|---|
| `per-directory` | basename of CWD | Default. Each project gets its own session. |
| `global` | fixed string `"global"` | Single cross-project session. |
| manual map | user-configured per path | `sessions` config map overrides directory basename. |
| title-based | sanitized session title | When agent supports named sessions set mid-conversation. |
### Config schema
```json
{
"sessionStrategy": "per-directory",
"sessionPeerPrefix": false,
"sessions": {
"/home/user/projects/foo": "foo-project"
}
}
```
### CLI surface
```
honcho sessions # list all mappings
honcho map <name> # map cwd to session name
honcho map # no-arg = list mappings
```
Resolution order: manual map → session title → directory basename → platform key.
---
## Spec: CLI surface injection
### Problem
When a user asks "how do I change my memory settings?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface.
### Pattern
When Honcho is active, append a compact command reference to the system prompt. Keep it under 300 chars.
```
# Honcho memory integration
Active. Session: {sessionKey}. Mode: {mode}.
Management commands:
honcho status — show config + connection
honcho mode [hybrid|honcho|local] — show or set memory mode
honcho sessions — list session mappings
honcho map <name> — map directory to session
honcho identity [file] [--show] — seed or show AI identity
honcho setup — full interactive wizard
```
---
## openclaw-honcho checklist
Ordered by impact:
- [ ] **Async prefetch** — move `session.context()` out of `before_prompt_build` into post-`agent_end` background Promise
- [ ] **observe_me=True for agent peer** — one-line change in `session.addPeers()`
- [ ] **Dynamic reasoning level** — add helper; apply in `honcho_recall` and `honcho_analyze`; add `dialecticReasoningLevel` to config
- [ ] **Per-peer memory modes** — add `userMemoryMode` / `agentMemoryMode` to config; gate Honcho sync and local writes
- [ ] **seedAiIdentity()** — add helper; use during setup migration for SOUL.md / IDENTITY.md
- [ ] **Session naming strategies** — add `sessionStrategy`, `sessions` map, `sessionPeerPrefix`
- [ ] **CLI surface injection** — append command reference to `before_prompt_build` return value
- [ ] **honcho identity subcommand** — seed from file or `--show` current representation
- [ ] **AI peer name injection** — if `aiPeer` name configured, prepend to injected system prompt
- [ ] **honcho mode / sessions / map** — CLI parity with Hermes
Already done in openclaw-honcho (do not re-implement): `lastSavedIndex` dedup, platform metadata stripping, multi-agent parent observer, `peerPerspective` on `context()`, tiered tool surface, workspace `agentPeerMap`, QMD passthrough, self-hosted Honcho.
---
## nanobot-honcho checklist
Greenfield integration. Start from openclaw-honcho's architecture and apply all Hermes patterns from day one.
### Phase 1 — core correctness
- [ ] Dual peer model (owner + agent peer), both with `observe_me=True`
- [ ] Message capture at turn end with `lastSavedIndex` dedup
- [ ] Platform metadata stripping before Honcho storage
- [ ] Async prefetch from day one — do not implement blocking context injection
- [ ] Legacy file migration at first activation (USER.md → owner peer, SOUL.md → `seedAiIdentity()`)
### Phase 2 — configuration
- [ ] Config schema: `apiKey`, `workspaceId`, `baseUrl`, `memoryMode`, `userMemoryMode`, `agentMemoryMode`, `dialecticReasoningLevel`, `sessionStrategy`, `sessions`
- [ ] Per-peer memory mode gating
- [ ] Dynamic reasoning level
- [ ] Session naming strategies
### Phase 3 — tools and CLI
- [ ] Tool surface: `honcho_profile`, `honcho_recall`, `honcho_analyze`, `honcho_search`, `honcho_context`
- [ ] CLI: `setup`, `status`, `sessions`, `map`, `mode`, `identity`
- [ ] CLI surface injection into system prompt
- [ ] AI peer name wired into agent identity
+142
View File
@@ -0,0 +1,142 @@
# Migrating from OpenClaw to Hermes Agent
This guide covers how to import your OpenClaw settings, memories, skills, and API keys into Hermes Agent.
## Three Ways to Migrate
### 1. Automatic (during first-time setup)
When you run `hermes setup` for the first time and Hermes detects `~/.openclaw`, it automatically offers to import your OpenClaw data before configuration begins. Just accept the prompt and everything is handled for you.
### 2. CLI Command (quick, scriptable)
```bash
hermes claw migrate # Preview then migrate (always shows preview first)
hermes claw migrate --dry-run # Preview only, no changes
hermes claw migrate --preset user-data # Migrate without API keys/secrets
hermes claw migrate --yes # Skip confirmation prompt
```
The migration always shows a full preview of what will be imported before making any changes. You review the preview and confirm before anything is written.
**All options:**
| Flag | Description |
|------|-------------|
| `--source PATH` | Path to OpenClaw directory (default: `~/.openclaw`) |
| `--dry-run` | Preview only — no files are modified |
| `--preset {user-data,full}` | Migration preset (default: `full`). `user-data` excludes secrets |
| `--overwrite` | Overwrite existing files (default: skip conflicts) |
| `--migrate-secrets` | Include allowlisted secrets (auto-enabled with `full` preset) |
| `--workspace-target PATH` | Copy workspace instructions (AGENTS.md) to this absolute path |
| `--skill-conflict {skip,overwrite,rename}` | How to handle skill name conflicts (default: `skip`) |
| `--yes`, `-y` | Skip confirmation prompts |
### 3. Agent-Guided (interactive, with previews)
Ask the agent to run the migration for you:
```
> Migrate my OpenClaw setup to Hermes
```
The agent will use the `openclaw-migration` skill to:
1. Run a preview first to show what would change
2. Ask about conflict resolution (SOUL.md, skills, etc.)
3. Let you choose between `user-data` and `full` presets
4. Execute the migration with your choices
5. Print a detailed summary of what was migrated
## What Gets Migrated
### `user-data` preset
| Item | Source | Destination |
|------|--------|-------------|
| SOUL.md | `~/.openclaw/workspace/SOUL.md` | `~/.hermes/SOUL.md` |
| Memory entries | `~/.openclaw/workspace/MEMORY.md` | `~/.hermes/memories/MEMORY.md` |
| User profile | `~/.openclaw/workspace/USER.md` | `~/.hermes/memories/USER.md` |
| Skills | `~/.openclaw/workspace/skills/` | `~/.hermes/skills/openclaw-imports/` |
| Command allowlist | `~/.openclaw/workspace/exec_approval_patterns.yaml` | Merged into `~/.hermes/config.yaml` |
| Messaging settings | `~/.openclaw/config.yaml` (TELEGRAM_ALLOWED_USERS, MESSAGING_CWD) | `~/.hermes/.env` |
| TTS assets | `~/.openclaw/workspace/tts/` | `~/.hermes/tts/` |
Workspace files are also checked at `workspace.default/` and `workspace-main/` as fallback paths (OpenClaw renamed `workspace/` to `workspace-main/` in recent versions).
### `full` preset (adds to `user-data`)
| Item | Source | Destination |
|------|--------|-------------|
| Telegram bot token | `openclaw.json` channels config | `~/.hermes/.env` |
| OpenRouter API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
| OpenAI API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
| Anthropic API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
| ElevenLabs API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
API keys are searched across four sources: inline config values, `~/.openclaw/.env`, the `openclaw.json` `"env"` sub-object, and per-agent auth profiles.
Only allowlisted secrets are ever imported. Other credentials are skipped and reported.
## OpenClaw Schema Compatibility
The migration handles both old and current OpenClaw config layouts:
- **Channel tokens**: Reads from flat paths (`channels.telegram.botToken`) and the newer `accounts.default` layout (`channels.telegram.accounts.default.botToken`)
- **TTS provider**: OpenClaw renamed "edge" to "microsoft" — both are recognized and mapped to Hermes' "edge"
- **Provider API types**: Both short (`openai`, `anthropic`) and hyphenated (`openai-completions`, `anthropic-messages`, `google-generative-ai`) values are mapped correctly
- **thinkingDefault**: All enum values are handled including newer ones (`minimal`, `xhigh`, `adaptive`)
- **Matrix**: Uses `accessToken` field (not `botToken`)
- **SecretRef formats**: Plain strings, env templates (`${VAR}`), and `source: "env"` SecretRefs are resolved. `source: "file"` and `source: "exec"` SecretRefs produce a warning — add those keys manually after migration.
## Conflict Handling
By default, the migration **will not overwrite** existing Hermes data:
- **SOUL.md** — skipped if one already exists in `~/.hermes/`
- **Memory entries** — skipped if memories already exist (to avoid duplicates)
- **Skills** — skipped if a skill with the same name already exists
- **API keys** — skipped if the key is already set in `~/.hermes/.env`
To overwrite conflicts, use `--overwrite`. The migration creates backups before overwriting.
For skills, you can also use `--skill-conflict rename` to import conflicting skills under a new name (e.g., `skill-name-imported`).
## Migration Report
Every migration produces a report showing:
- **Migrated items** — what was successfully imported
- **Conflicts** — items skipped because they already exist
- **Skipped items** — items not found in the source
- **Errors** — items that failed to import
For executed migrations, the full report is saved to `~/.hermes/migration/openclaw/<timestamp>/`.
## Post-Migration Notes
- **Skills require a new session** — imported skills take effect after restarting your agent or starting a new chat.
- **WhatsApp requires re-pairing** — WhatsApp uses QR-code pairing, not token-based auth. Run `hermes whatsapp` to pair.
- **Archive cleanup** — after migration, you'll be offered to rename `~/.openclaw/` to `.openclaw.pre-migration/` to prevent state confusion. You can also run `hermes claw cleanup` later.
## Troubleshooting
### "OpenClaw directory not found"
The migration looks for `~/.openclaw` by default, then tries `~/.clawdbot` and `~/.moltbot`. If your OpenClaw is installed elsewhere, use `--source`:
```bash
hermes claw migrate --source /path/to/.openclaw
```
### "Migration script not found"
The migration script ships with Hermes Agent. If you installed via pip (not git clone), the `optional-skills/` directory may not be present. Install the skill from the Skills Hub:
```bash
hermes skills install openclaw-migration
```
### Memory overflow
If your OpenClaw MEMORY.md or USER.md exceeds Hermes' character limits, excess entries are exported to an overflow file in the migration report directory. You can manually review and add the most important ones.
### API keys not found
Keys might be stored in different places depending on your OpenClaw setup:
- `~/.openclaw/.env` file
- Inline in `openclaw.json` under `models.providers.*.apiKey`
- In `openclaw.json` under the `"env"` or `"env.vars"` sub-objects
- In `~/.openclaw/agents/main/agent/auth-profiles.json`
The migration checks all four. If keys use `source: "file"` or `source: "exec"` SecretRefs, they can't be resolved automatically — add them via `hermes config set`.
@@ -0,0 +1,608 @@
# Pricing Accuracy Architecture
Date: 2026-03-16
## Goal
Hermes should only show dollar costs when they are backed by an official source for the user's actual billing path.
This design replaces the current static, heuristic pricing flow in:
- `run_agent.py`
- `agent/usage_pricing.py`
- `agent/insights.py`
- `cli.py`
with a provider-aware pricing system that:
- handles cache billing correctly
- distinguishes `actual` vs `estimated` vs `included` vs `unknown`
- reconciles post-hoc costs when providers expose authoritative billing data
- supports direct providers, OpenRouter, subscriptions, enterprise pricing, and custom endpoints
## Problems In The Current Design
Current Hermes behavior has four structural issues:
1. It stores only `prompt_tokens` and `completion_tokens`, which is insufficient for providers that bill cache reads and cache writes separately.
2. It uses a static model price table and fuzzy heuristics, which can drift from current official pricing.
3. It assumes public API list pricing matches the user's real billing path.
4. It has no distinction between live estimates and reconciled billed cost.
## Design Principles
1. Normalize usage before pricing.
2. Never fold cached tokens into plain input cost.
3. Track certainty explicitly.
4. Treat the billing path as part of the model identity.
5. Prefer official machine-readable sources over scraped docs.
6. Use post-hoc provider cost APIs when available.
7. Show `n/a` rather than inventing precision.
## High-Level Architecture
The new system has four layers:
1. `usage_normalization`
Converts raw provider usage into a canonical usage record.
2. `pricing_source_resolution`
Determines the billing path, source of truth, and applicable pricing source.
3. `cost_estimation_and_reconciliation`
Produces an immediate estimate when possible, then replaces or annotates it with actual billed cost later.
4. `presentation`
`/usage`, `/insights`, and the status bar display cost with certainty metadata.
## Canonical Usage Record
Add a canonical usage model that every provider path maps into before any pricing math happens.
Suggested structure:
```python
@dataclass
class CanonicalUsage:
provider: str
billing_provider: str
model: str
billing_route: str
input_tokens: int = 0
output_tokens: int = 0
cache_read_tokens: int = 0
cache_write_tokens: int = 0
reasoning_tokens: int = 0
request_count: int = 1
raw_usage: dict[str, Any] | None = None
raw_usage_fields: dict[str, str] | None = None
computed_fields: set[str] | None = None
provider_request_id: str | None = None
provider_generation_id: str | None = None
provider_response_id: str | None = None
```
Rules:
- `input_tokens` means non-cached input only.
- `cache_read_tokens` and `cache_write_tokens` are never merged into `input_tokens`.
- `output_tokens` excludes cache metrics.
- `reasoning_tokens` is telemetry unless a provider officially bills it separately.
This is the same normalization pattern used by `opencode`, extended with provenance and reconciliation ids.
## Provider Normalization Rules
### OpenAI Direct
Source usage fields:
- `prompt_tokens`
- `completion_tokens`
- `prompt_tokens_details.cached_tokens`
Normalization:
- `cache_read_tokens = cached_tokens`
- `input_tokens = prompt_tokens - cached_tokens`
- `cache_write_tokens = 0` unless OpenAI exposes it in the relevant route
- `output_tokens = completion_tokens`
### Anthropic Direct
Source usage fields:
- `input_tokens`
- `output_tokens`
- `cache_read_input_tokens`
- `cache_creation_input_tokens`
Normalization:
- `input_tokens = input_tokens`
- `output_tokens = output_tokens`
- `cache_read_tokens = cache_read_input_tokens`
- `cache_write_tokens = cache_creation_input_tokens`
### OpenRouter
Estimate-time usage normalization should use the response usage payload with the same rules as the underlying provider when possible.
Reconciliation-time records should also store:
- OpenRouter generation id
- native token fields when available
- `total_cost`
- `cache_discount`
- `upstream_inference_cost`
- `is_byok`
### Gemini / Vertex
Use official Gemini or Vertex usage fields where available.
If cached content tokens are exposed:
- map them to `cache_read_tokens`
If a route exposes no cache creation metric:
- store `cache_write_tokens = 0`
- preserve the raw usage payload for later extension
### DeepSeek And Other Direct Providers
Normalize only the fields that are officially exposed.
If a provider does not expose cache buckets:
- do not infer them unless the provider explicitly documents how to derive them
### Subscription / Included-Cost Routes
These still use the canonical usage model.
Tokens are tracked normally. Cost depends on billing mode, not on whether usage exists.
## Billing Route Model
Hermes must stop keying pricing solely by `model`.
Introduce a billing route descriptor:
```python
@dataclass
class BillingRoute:
provider: str
base_url: str | None
model: str
billing_mode: str
organization_hint: str | None = None
```
`billing_mode` values:
- `official_cost_api`
- `official_generation_api`
- `official_models_api`
- `official_docs_snapshot`
- `subscription_included`
- `user_override`
- `custom_contract`
- `unknown`
Examples:
- OpenAI direct API with Costs API access: `official_cost_api`
- Anthropic direct API with Usage & Cost API access: `official_cost_api`
- OpenRouter request before reconciliation: `official_models_api`
- OpenRouter request after generation lookup: `official_generation_api`
- GitHub Copilot style subscription route: `subscription_included`
- local OpenAI-compatible server: `unknown`
- enterprise contract with configured rates: `custom_contract`
## Cost Status Model
Every displayed cost should have:
```python
@dataclass
class CostResult:
amount_usd: Decimal | None
status: Literal["actual", "estimated", "included", "unknown"]
source: Literal[
"provider_cost_api",
"provider_generation_api",
"provider_models_api",
"official_docs_snapshot",
"user_override",
"custom_contract",
"none",
]
label: str
fetched_at: datetime | None
pricing_version: str | None
notes: list[str]
```
Presentation rules:
- `actual`: show dollar amount as final
- `estimated`: show dollar amount with estimate labeling
- `included`: show `included` or `$0.00 (included)` depending on UX choice
- `unknown`: show `n/a`
## Official Source Hierarchy
Resolve cost using this order:
1. Request-level or account-level official billed cost
2. Official machine-readable model pricing
3. Official docs snapshot
4. User override or custom contract
5. Unknown
The system must never skip to a lower level if a higher-confidence source exists for the current billing route.
## Provider-Specific Truth Rules
### OpenAI Direct
Preferred truth:
1. Costs API for reconciled spend
2. Official pricing page for live estimate
### Anthropic Direct
Preferred truth:
1. Usage & Cost API for reconciled spend
2. Official pricing docs for live estimate
### OpenRouter
Preferred truth:
1. `GET /api/v1/generation` for reconciled `total_cost`
2. `GET /api/v1/models` pricing for live estimate
Do not use underlying provider public pricing as the source of truth for OpenRouter billing.
### Gemini / Vertex
Preferred truth:
1. official billing export or billing API for reconciled spend when available for the route
2. official pricing docs for estimate
### DeepSeek
Preferred truth:
1. official machine-readable cost source if available in the future
2. official pricing docs snapshot today
### Subscription-Included Routes
Preferred truth:
1. explicit route config marking the model as included in subscription
These should display `included`, not an API list-price estimate.
### Custom Endpoint / Local Model
Preferred truth:
1. user override
2. custom contract config
3. unknown
These should default to `unknown`.
## Pricing Catalog
Replace the current `MODEL_PRICING` dict with a richer pricing catalog.
Suggested record:
```python
@dataclass
class PricingEntry:
provider: str
route_pattern: str
model_pattern: str
input_cost_per_million: Decimal | None = None
output_cost_per_million: Decimal | None = None
cache_read_cost_per_million: Decimal | None = None
cache_write_cost_per_million: Decimal | None = None
request_cost: Decimal | None = None
image_cost: Decimal | None = None
source: str = "official_docs_snapshot"
source_url: str | None = None
fetched_at: datetime | None = None
pricing_version: str | None = None
```
The catalog should be route-aware:
- `openai:gpt-5`
- `anthropic:claude-opus-4-6`
- `openrouter:anthropic/claude-opus-4.6`
- `copilot:gpt-4o`
This avoids conflating direct-provider billing with aggregator billing.
## Pricing Sync Architecture
Introduce a pricing sync subsystem instead of manually maintaining a single hardcoded table.
Suggested modules:
- `agent/pricing/catalog.py`
- `agent/pricing/sources.py`
- `agent/pricing/sync.py`
- `agent/pricing/reconcile.py`
- `agent/pricing/types.py`
### Sync Sources
- OpenRouter models API
- official provider docs snapshots where no API exists
- user overrides from config
### Sync Output
Cache pricing entries locally with:
- source URL
- fetch timestamp
- version/hash
- confidence/source type
### Sync Frequency
- startup warm cache
- background refresh every 6 to 24 hours depending on source
- manual `hermes pricing sync`
## Reconciliation Architecture
Live requests may produce only an estimate initially. Hermes should reconcile them later when a provider exposes actual billed cost.
Suggested flow:
1. Agent call completes.
2. Hermes stores canonical usage plus reconciliation ids.
3. Hermes computes an immediate estimate if a pricing source exists.
4. A reconciliation worker fetches actual cost when supported.
5. Session and message records are updated with `actual` cost.
This can run:
- inline for cheap lookups
- asynchronously for delayed provider accounting
## Persistence Changes
Session storage should stop storing only aggregate prompt/completion totals.
Add fields for both usage and cost certainty:
- `input_tokens`
- `output_tokens`
- `cache_read_tokens`
- `cache_write_tokens`
- `reasoning_tokens`
- `estimated_cost_usd`
- `actual_cost_usd`
- `cost_status`
- `cost_source`
- `pricing_version`
- `billing_provider`
- `billing_mode`
If schema expansion is too large for one PR, add a new pricing events table:
```text
session_cost_events
id
session_id
request_id
provider
model
billing_mode
input_tokens
output_tokens
cache_read_tokens
cache_write_tokens
estimated_cost_usd
actual_cost_usd
cost_status
cost_source
pricing_version
created_at
updated_at
```
## Hermes Touchpoints
### `run_agent.py`
Current responsibility:
- parse raw provider usage
- update session token counters
New responsibility:
- build `CanonicalUsage`
- update canonical counters
- store reconciliation ids
- emit usage event to pricing subsystem
### `agent/usage_pricing.py`
Current responsibility:
- static lookup table
- direct cost arithmetic
New responsibility:
- move or replace with pricing catalog facade
- no fuzzy model-family heuristics
- no direct pricing without billing-route context
### `cli.py`
Current responsibility:
- compute session cost directly from prompt/completion totals
New responsibility:
- display `CostResult`
- show status badges:
- `actual`
- `estimated`
- `included`
- `n/a`
### `agent/insights.py`
Current responsibility:
- recompute historical estimates from static pricing
New responsibility:
- aggregate stored pricing events
- prefer actual cost over estimate
- surface estimates only when reconciliation is unavailable
## UX Rules
### Status Bar
Show one of:
- `$1.42`
- `~$1.42`
- `included`
- `cost n/a`
Where:
- `$1.42` means `actual`
- `~$1.42` means `estimated`
- `included` means subscription-backed or explicitly zero-cost route
- `cost n/a` means unknown
### `/usage`
Show:
- token buckets
- estimated cost
- actual cost if available
- cost status
- pricing source
### `/insights`
Aggregate:
- actual cost totals
- estimated-only totals
- unknown-cost sessions count
- included-cost sessions count
## Config And Overrides
Add user-configurable pricing overrides in config:
```yaml
pricing:
mode: hybrid
sync_on_startup: true
sync_interval_hours: 12
overrides:
- provider: openrouter
model: anthropic/claude-opus-4.6
billing_mode: custom_contract
input_cost_per_million: 4.25
output_cost_per_million: 22.0
cache_read_cost_per_million: 0.5
cache_write_cost_per_million: 6.0
included_routes:
- provider: copilot
model: "*"
- provider: codex-subscription
model: "*"
```
Overrides must win over catalog defaults for the matching billing route.
## Rollout Plan
### Phase 1
- add canonical usage model
- split cache token buckets in `run_agent.py`
- stop pricing cache-inflated prompt totals
- preserve current UI with improved backend math
### Phase 2
- add route-aware pricing catalog
- integrate OpenRouter models API sync
- add `estimated` vs `included` vs `unknown`
### Phase 3
- add reconciliation for OpenRouter generation cost
- add actual cost persistence
- update `/insights` to prefer actual cost
### Phase 4
- add direct OpenAI and Anthropic reconciliation paths
- add user overrides and contract pricing
- add pricing sync CLI command
## Testing Strategy
Add tests for:
- OpenAI cached token subtraction
- Anthropic cache read/write separation
- OpenRouter estimated vs actual reconciliation
- subscription-backed models showing `included`
- custom endpoints showing `n/a`
- override precedence
- stale catalog fallback behavior
Current tests that assume heuristic pricing should be replaced with route-aware expectations.
## Non-Goals
- exact enterprise billing reconstruction without an official source or user override
- backfilling perfect historical cost for old sessions that lack cache bucket data
- scraping arbitrary provider web pages at request time
## Recommendation
Do not expand the existing `MODEL_PRICING` dict.
That path cannot satisfy the product requirement. Hermes should instead migrate to:
- canonical usage normalization
- route-aware pricing sources
- estimate-then-reconcile cost lifecycle
- explicit certainty states in the UI
This is the minimum architecture that makes the statement "Hermes pricing is backed by official sources where possible, and otherwise clearly labeled" defensible.
+97
View File
@@ -0,0 +1,97 @@
# ============================================================================
# Hermes Agent — Example Skin Template
# ============================================================================
#
# Copy this file to ~/.hermes/skins/<name>.yaml to create a custom skin.
# All fields are optional — missing values inherit from the default skin.
# Activate with: /skin <name> or display.skin: <name> in config.yaml
#
# See hermes_cli/skin_engine.py for the full schema reference.
# ============================================================================
# Required: unique skin name (used in /skin command and config)
name: example
description: An example custom skin — copy and modify this template
# ── Colors ──────────────────────────────────────────────────────────────────
# Hex color values for Rich markup. These control the CLI's visual palette.
colors:
# Banner panel (the startup welcome box)
banner_border: "#CD7F32" # Panel border
banner_title: "#FFD700" # Panel title text
banner_accent: "#FFBF00" # Section headers (Available Tools, Skills, etc.)
banner_dim: "#B8860B" # Dim/muted text (separators, model info)
banner_text: "#FFF8DC" # Body text (tool names, skill names)
# UI elements
ui_accent: "#FFBF00" # General accent color
ui_label: "#4dd0e1" # Labels
ui_ok: "#4caf50" # Success indicators
ui_error: "#ef5350" # Error indicators
ui_warn: "#ffa726" # Warning indicators
# Input area
prompt: "#FFF8DC" # Prompt text color
input_rule: "#CD7F32" # Horizontal rule around input
# Response box
response_border: "#FFD700" # Response box border (ANSI color)
# Session display
session_label: "#DAA520" # Session label
session_border: "#8B8682" # Session ID dim color
# TUI surfaces
status_bar_bg: "#1a1a2e" # Status / usage bar background
voice_status_bg: "#1a1a2e" # Voice-mode badge background
completion_menu_bg: "#1a1a2e" # Completion list background
completion_menu_current_bg: "#333355" # Active completion row background
completion_menu_meta_bg: "#1a1a2e" # Completion meta column background
completion_menu_meta_current_bg: "#333355" # Active completion meta background
# ── Spinner ─────────────────────────────────────────────────────────────────
# Customize the animated spinner shown during API calls and tool execution.
spinner:
# Faces shown while waiting for the API response
waiting_faces:
- "(。◕‿◕。)"
- "(◕‿◕✿)"
- "٩(◕‿◕。)۶"
# Faces shown during extended thinking/reasoning
thinking_faces:
- "(。•́︿•̀。)"
- "(◔_◔)"
- "(¬‿¬)"
# Verbs used in spinner messages (e.g., "pondering your request...")
thinking_verbs:
- "pondering"
- "contemplating"
- "musing"
- "ruminating"
# Optional: left/right decorations around the spinner
# Each entry is a [left, right] pair. Omit entirely for no wings.
# wings:
# - ["⟪⚔", "⚔⟫"]
# - ["⟪▲", "▲⟫"]
# ── Branding ────────────────────────────────────────────────────────────────
# Text strings used throughout the CLI interface.
branding:
agent_name: "Hermes Agent" # Banner title, about display
welcome: "Welcome! Type your message or /help for commands."
goodbye: "Goodbye! ⚕" # Exit message
response_label: " ⚕ Hermes " # Response box header label
prompt_symbol: " " # Input prompt symbol
help_header: "(^_^)? Available Commands" # /help header text
# ── Tool Output ─────────────────────────────────────────────────────────────
# Character used as the prefix for tool output lines.
# Default is "┊" (thin dotted vertical line). Some alternatives:
# "╎" (light triple dash vertical)
# "▏" (left one-eighth block)
# "│" (box drawing light vertical)
# "┃" (box drawing heavy vertical)
tool_prefix: "┊"
+329
View File
@@ -0,0 +1,329 @@
# Container-Aware CLI Review Fixes Spec
**PR:** NousResearch/hermes-agent#7543
**Review:** cursor[bot] bugbot review (4094049442) + two prior rounds
**Date:** 2026-04-12
**Branch:** `feat/container-aware-cli-clean`
## Review Issues Summary
Six issues were raised across three bugbot review rounds. Three were fixed in intermediate commits (38277a6a, 726cf90f). This spec addresses remaining design concerns surfaced by those reviews and simplifies the implementation based on interview decisions.
| # | Issue | Severity | Status |
|---|-------|----------|--------|
| 1 | `os.execvp` retry loop unreachable | Medium | Fixed in 79e8cd12 (switched to subprocess.run) |
| 2 | Redundant `shutil.which("sudo")` | Medium | Fixed in 38277a6a (reuses `sudo` var) |
| 3 | Missing `chown -h` on symlink update | Low | Fixed in 38277a6a |
| 4 | Container routing after `parse_args()` | High | Fixed in 726cf90f |
| 5 | Hardcoded `/home/${user}` | Medium | Fixed in 726cf90f |
| 6 | Group membership not gated on `container.enable` | Low | Fixed in 726cf90f |
The mechanical fixes are in place but the overall design needs revision. The retry loop, error swallowing, and process model have deeper issues than what the bugbot flagged.
---
## Spec: Revised `_exec_in_container`
### Design Principles
1. **Let it crash.** No silent fallbacks. If `.container-mode` exists but something goes wrong, the error propagates naturally (Python traceback). The only case where container routing is skipped is when `.container-mode` doesn't exist or `HERMES_DEV=1`.
2. **No retries.** Probe once for sudo, exec once. If it fails, docker/podman's stderr reaches the user verbatim.
3. **Completely transparent.** No error wrapping, no prefixes, no spinners. Docker's output goes straight through.
4. **`os.execvp` on the happy path.** Replace the Python process entirely so there's no idle parent during interactive sessions. Note: `execvp` never returns on success (process is replaced) and raises `OSError` on failure (it does not return a value). The container process's exit code becomes the process exit code by definition — no explicit propagation needed.
5. **One human-readable exception to "let it crash".** `subprocess.TimeoutExpired` from the sudo probe gets a specific catch with a readable message, since a raw traceback for "your Docker daemon is slow" is confusing. All other exceptions propagate naturally.
### Execution Flow
```
1. get_container_exec_info()
- HERMES_DEV=1 → return None (skip routing)
- Inside container → return None (skip routing)
- .container-mode doesn't exist → return None (skip routing)
- .container-mode exists → parse and return dict
- .container-mode exists but malformed/unreadable → LET IT CRASH (no try/except)
2. _exec_in_container(container_info, sys.argv[1:])
a. shutil.which(backend) → if None, print "{backend} not found on PATH" and sys.exit(1)
b. Sudo probe: subprocess.run([runtime, "inspect", "--format", "ok", container_name], timeout=15)
- If succeeds → needs_sudo = False
- If fails → try subprocess.run([sudo, "-n", runtime, "inspect", ...], timeout=15)
- If succeeds → needs_sudo = True
- If fails → print error with sudoers hint (including why -n is required) and sys.exit(1)
- If TimeoutExpired → catch specifically, print human-readable message about slow daemon
c. Build exec_cmd: [sudo? + runtime, "exec", tty_flags, "-u", exec_user, env_flags, container, hermes_bin, *cli_args]
d. os.execvp(exec_cmd[0], exec_cmd)
- On success: process is replaced — Python is gone, container exit code IS the process exit code
- On OSError: let it crash (natural traceback)
```
### Changes to `hermes_cli/main.py`
#### `_exec_in_container` — rewrite
Remove:
- The entire retry loop (`max_retries`, `for attempt in range(...)`)
- Spinner logic (`"Waiting for container..."`, dots)
- Exit code classification (125/126/127 handling)
- `subprocess.run` for the exec call (keep it only for the sudo probe)
- Special TTY vs non-TTY retry counts
- The `time` import (no longer needed)
Change:
- Use `os.execvp(exec_cmd[0], exec_cmd)` as the final call
- Keep the `subprocess` import only for the sudo probe
- Keep TTY detection for the `-it` vs `-i` flag
- Keep env var forwarding (TERM, COLORTERM, LANG, LC_ALL)
- Keep the sudo probe as-is (it's the one "smart" part)
- Bump probe `timeout` from 5s to 15s — cold podman on a loaded machine needs headroom
- Catch `subprocess.TimeoutExpired` specifically on both probe calls — print a readable message about the daemon being unresponsive instead of a raw traceback
- Expand the sudoers hint error message to explain *why* `-n` (non-interactive) is required: a password prompt would hang the CLI or break piped commands
The function becomes roughly:
```python
def _exec_in_container(container_info: dict, cli_args: list):
"""Replace the current process with a command inside the managed container.
Probes whether sudo is needed (rootful containers), then os.execvp
into the container. If exec fails, the OS error propagates naturally.
"""
import shutil
import subprocess
backend = container_info["backend"]
container_name = container_info["container_name"]
exec_user = container_info["exec_user"]
hermes_bin = container_info["hermes_bin"]
runtime = shutil.which(backend)
if not runtime:
print(f"Error: {backend} not found on PATH. Cannot route to container.",
file=sys.stderr)
sys.exit(1)
# Probe whether we need sudo to see the rootful container.
# Timeout is 15s — cold podman on a loaded machine can take a while.
# TimeoutExpired is caught specifically for a human-readable message;
# all other exceptions propagate naturally.
needs_sudo = False
sudo = None
try:
probe = subprocess.run(
[runtime, "inspect", "--format", "ok", container_name],
capture_output=True, text=True, timeout=15,
)
except subprocess.TimeoutExpired:
print(
f"Error: timed out waiting for {backend} to respond.\n"
f"The {backend} daemon may be unresponsive or starting up.",
file=sys.stderr,
)
sys.exit(1)
if probe.returncode != 0:
sudo = shutil.which("sudo")
if sudo:
try:
probe2 = subprocess.run(
[sudo, "-n", runtime, "inspect", "--format", "ok", container_name],
capture_output=True, text=True, timeout=15,
)
except subprocess.TimeoutExpired:
print(
f"Error: timed out waiting for sudo {backend} to respond.",
file=sys.stderr,
)
sys.exit(1)
if probe2.returncode == 0:
needs_sudo = True
else:
print(
f"Error: container '{container_name}' not found via {backend}.\n"
f"\n"
f"The NixOS service runs the container as root. Your user cannot\n"
f"see it because {backend} uses per-user namespaces.\n"
f"\n"
f"Fix: grant passwordless sudo for {backend}. The -n (non-interactive)\n"
f"flag is required because the CLI calls sudo non-interactively —\n"
f"a password prompt would hang or break piped commands:\n"
f"\n"
f' security.sudo.extraRules = [{{\n'
f' users = [ "{os.getenv("USER", "your-user")}" ];\n'
f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n'
f' }}];\n'
f"\n"
f"Or run: sudo hermes {' '.join(cli_args)}",
file=sys.stderr,
)
sys.exit(1)
else:
print(
f"Error: container '{container_name}' not found via {backend}.\n"
f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}",
file=sys.stderr,
)
sys.exit(1)
is_tty = sys.stdin.isatty()
tty_flags = ["-it"] if is_tty else ["-i"]
env_flags = []
for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"):
val = os.environ.get(var)
if val:
env_flags.extend(["-e", f"{var}={val}"])
cmd_prefix = [sudo, "-n", runtime] if needs_sudo else [runtime]
exec_cmd = (
cmd_prefix + ["exec"]
+ tty_flags
+ ["-u", exec_user]
+ env_flags
+ [container_name, hermes_bin]
+ cli_args
)
# execvp replaces this process entirely — it never returns on success.
# On failure it raises OSError, which propagates naturally.
os.execvp(exec_cmd[0], exec_cmd)
```
#### Container routing call site in `main()` — remove try/except
Current:
```python
try:
from hermes_cli.config import get_container_exec_info
container_info = get_container_exec_info()
if container_info:
_exec_in_container(container_info, sys.argv[1:])
sys.exit(1) # exec failed if we reach here
except SystemExit:
raise
except Exception:
pass # Container routing unavailable, proceed locally
```
Revised:
```python
from hermes_cli.config import get_container_exec_info
container_info = get_container_exec_info()
if container_info:
_exec_in_container(container_info, sys.argv[1:])
# Unreachable: os.execvp never returns on success (process is replaced)
# and raises OSError on failure (which propagates as a traceback).
# This line exists only as a defensive assertion.
sys.exit(1)
```
No try/except. If `.container-mode` doesn't exist, `get_container_exec_info()` returns `None` and we skip routing. If it exists but is broken, the exception propagates with a natural traceback.
Note: `sys.exit(1)` after `_exec_in_container` is dead code in all paths — `os.execvp` either replaces the process or raises. It's kept as a belt-and-suspenders assertion with a comment marking it unreachable, not as actual error handling.
### Changes to `hermes_cli/config.py`
#### `get_container_exec_info` — remove inner try/except
Current code catches `(OSError, IOError)` and returns `None`. This silently hides permission errors, corrupt files, etc.
Change: Remove the try/except around file reading. Keep the early returns for `HERMES_DEV=1` and `_is_inside_container()`. The `FileNotFoundError` from `open()` when `.container-mode` doesn't exist should still return `None` (this is the "container mode not enabled" case). All other exceptions propagate.
```python
def get_container_exec_info() -> Optional[dict]:
if os.environ.get("HERMES_DEV") == "1":
return None
if _is_inside_container():
return None
container_mode_file = get_hermes_home() / ".container-mode"
try:
with open(container_mode_file, "r") as f:
# ... parse key=value lines ...
except FileNotFoundError:
return None
# All other exceptions (PermissionError, malformed data, etc.) propagate
return { ... }
```
---
## Spec: NixOS Module Changes
### Symlink creation — simplify to two branches
Current: 4 branches (symlink exists, directory exists, other file, doesn't exist).
Revised: 2 branches.
```bash
if [ -d "${symlinkPath}" ] && [ ! -L "${symlinkPath}" ]; then
# Real directory — back it up, then create symlink
_backup="${symlinkPath}.bak.$(date +%s)"
echo "hermes-agent: backing up existing ${symlinkPath} to $_backup"
mv "${symlinkPath}" "$_backup"
fi
# For everything else (symlink, doesn't exist, etc.) — just force-create
ln -sfn "${target}" "${symlinkPath}"
chown -h ${user}:${cfg.group} "${symlinkPath}"
```
`ln -sfn` handles: existing symlink (replaces), doesn't exist (creates), and after the `mv` above (creates). The only case that needs special handling is a real directory, because `ln -sfn` cannot atomically replace a directory.
Note: there is a theoretical race between the `[ -d ... ]` check and the `mv` (something could create/remove the directory in between). In practice this is a NixOS activation script running as root during `nixos-rebuild switch` — no other process should be touching `~/.hermes` at that moment. Not worth adding locking for.
### Sudoers — document, don't auto-configure
Do NOT add `security.sudo.extraRules` to the module. Document the sudoers requirement in the module's description/comments and in the error message the CLI prints when sudo probe fails.
### Group membership gating — keep as-is
The fix in 726cf90f (`cfg.container.enable && cfg.container.hostUsers != []`) is correct. Leftover group membership when container mode is disabled is harmless. No cleanup needed.
---
## Spec: Test Rewrite
The existing test file (`tests/hermes_cli/test_container_aware_cli.py`) has 16 tests. With the simplified exec model, several are obsolete.
### Tests to keep (update as needed)
- `test_is_inside_container_dockerenv` — unchanged
- `test_is_inside_container_containerenv` — unchanged
- `test_is_inside_container_cgroup_docker` — unchanged
- `test_is_inside_container_false_on_host` — unchanged
- `test_get_container_exec_info_returns_metadata` — unchanged
- `test_get_container_exec_info_none_inside_container` — unchanged
- `test_get_container_exec_info_none_without_file` — unchanged
- `test_get_container_exec_info_skipped_when_hermes_dev` — unchanged
- `test_get_container_exec_info_not_skipped_when_hermes_dev_zero` — unchanged
- `test_get_container_exec_info_defaults` — unchanged
- `test_get_container_exec_info_docker_backend` — unchanged
### Tests to add
- `test_get_container_exec_info_crashes_on_permission_error` — verify that `PermissionError` propagates (no silent `None` return)
- `test_exec_in_container_calls_execvp` — verify `os.execvp` is called with correct args (runtime, tty flags, user, env, container, binary, cli args)
- `test_exec_in_container_sudo_probe_sets_prefix` — verify that when first probe fails and sudo probe succeeds, `os.execvp` is called with `sudo -n` prefix
- `test_exec_in_container_no_runtime_hard_fails` — keep existing, verify `sys.exit(1)` when `shutil.which` returns None
- `test_exec_in_container_non_tty_uses_i_only` — update to check `os.execvp` args instead of `subprocess.run` args
- `test_exec_in_container_probe_timeout_prints_message` — verify that `subprocess.TimeoutExpired` from the probe produces a human-readable error and `sys.exit(1)`, not a raw traceback
- `test_exec_in_container_container_not_running_no_sudo` — verify the path where runtime exists (`shutil.which` returns a path) but probe returns non-zero and no sudo is available. Should print the "container may be running under root" error. This is distinct from `no_runtime_hard_fails` which covers `shutil.which` returning None.
### Tests to delete
- `test_exec_in_container_tty_retries_on_container_failure` — retry loop removed
- `test_exec_in_container_non_tty_retries_silently_exits_126` — retry loop removed
- `test_exec_in_container_propagates_hermes_exit_code` — no subprocess.run to check exit codes; execvp replaces the process. Note: exit code propagation still works correctly — when `os.execvp` succeeds, the container's process *becomes* this process, so its exit code is the process exit code by OS semantics. No application code needed, no test needed. A comment in the function docstring documents this intent for future readers.
---
## Out of Scope
- Auto-configuring sudoers rules in the NixOS module
- Any changes to `get_container_exec_info` parsing logic beyond the try/except narrowing
- Changes to `.container-mode` file format
- Changes to the `HERMES_DEV=1` bypass
- Changes to container detection logic (`_is_inside_container`)
+1
View File
@@ -53,6 +53,7 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
try:
loop = asyncio.get_running_loop()
# We're in an async context -- need to run in thread
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(
handle_function_call, tool_name, arguments, task_id
Generated
-21
View File
@@ -36,26 +36,6 @@
"type": "github"
}
},
"npm-lockfile-fix": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1775903712,
"narHash": "sha256-2GV79U6iVH4gKAPWYrxUReB0S41ty/Y3dBLquU8AlaA=",
"owner": "jeslie0",
"repo": "npm-lockfile-fix",
"rev": "c6093acb0c0548e0f9b8b3d82918823721930fe8",
"type": "github"
},
"original": {
"owner": "jeslie0",
"repo": "npm-lockfile-fix",
"type": "github"
}
},
"pyproject-build-systems": {
"inputs": {
"nixpkgs": [
@@ -144,7 +124,6 @@
"inputs": {
"flake-parts": "flake-parts",
"nixpkgs": "nixpkgs",
"npm-lockfile-fix": "npm-lockfile-fix",
"pyproject-build-systems": "pyproject-build-systems",
"pyproject-nix": "pyproject-nix_2",
"uv2nix": "uv2nix_2"
+2 -11
View File
@@ -19,20 +19,11 @@
url = "github:pyproject-nix/build-system-pkgs";
inputs.nixpkgs.follows = "nixpkgs";
};
npm-lockfile-fix = {
url = "github:jeslie0/npm-lockfile-fix";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs =
inputs:
outputs = inputs:
inputs.flake-parts.lib.mkFlake { inherit inputs; } {
systems = [
"x86_64-linux"
"aarch64-linux"
"aarch64-darwin"
];
systems = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ];
imports = [
./nix/packages.nix
+1 -19
View File
@@ -100,7 +100,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
def _build_discord(adapter) -> List[Dict[str, str]]:
"""Enumerate all text channels and forum channels the Discord bot can see."""
"""Enumerate all text channels the Discord bot can see."""
channels = []
client = getattr(adapter, "_client", None)
if not client:
@@ -119,15 +119,6 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
"guild": guild.name,
"type": "channel",
})
# Forum channels (type 15) — creating a message auto-spawns a thread post.
forums = getattr(guild, "forum_channels", None) or []
for ch in forums:
channels.append({
"id": str(ch.id),
"name": ch.name,
"guild": guild.name,
"type": "forum",
})
# Also include DM-capable users we've interacted with is not
# feasible via guild enumeration; those come from sessions.
@@ -200,15 +191,6 @@ def load_directory() -> Dict[str, Any]:
return {"updated_at": None, "platforms": {}}
def lookup_channel_type(platform_name: str, chat_id: str) -> Optional[str]:
"""Return the channel ``type`` string (e.g. ``"channel"``, ``"forum"``) for *chat_id*, or *None* if unknown."""
directory = load_directory()
for ch in directory.get("platforms", {}).get(platform_name, []):
if ch.get("id") == chat_id:
return ch.get("type")
return None
def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
"""
Resolve a human-friendly channel name to a numeric ID.
+4 -129
View File
@@ -258,13 +258,6 @@ class GatewayConfig:
# Streaming configuration
streaming: StreamingConfig = field(default_factory=StreamingConfig)
# Session store pruning: drop SessionEntry records older than this many
# days from the in-memory dict and sessions.json. Keeps the store from
# growing unbounded in gateways serving many chats/threads/users over
# months. Pruning is invisible to users — if they resume, they get a
# fresh session exactly as if the reset policy had fired. 0 = disabled.
session_store_max_age_days: int = 90
def get_connected_platforms(self) -> List[Platform]:
"""Return list of platforms that are enabled and configured."""
connected = []
@@ -314,14 +307,6 @@ class GatewayConfig:
# QQBot uses extra dict for app credentials
elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
connected.append(platform)
# DingTalk uses client_id/client_secret from config.extra or env vars
elif platform == Platform.DINGTALK and (
config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
) and (
config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
):
connected.append(platform)
return connected
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@@ -372,7 +357,6 @@ class GatewayConfig:
"thread_sessions_per_user": self.thread_sessions_per_user,
"unauthorized_dm_behavior": self.unauthorized_dm_behavior,
"streaming": self.streaming.to_dict(),
"session_store_max_age_days": self.session_store_max_age_days,
}
@classmethod
@@ -420,13 +404,6 @@ class GatewayConfig:
"pair",
)
try:
session_store_max_age_days = int(data.get("session_store_max_age_days", 90))
if session_store_max_age_days < 0:
session_store_max_age_days = 0
except (TypeError, ValueError):
session_store_max_age_days = 90
return cls(
platforms=platforms,
default_reset_policy=default_policy,
@@ -441,7 +418,6 @@ class GatewayConfig:
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
unauthorized_dm_behavior=unauthorized_dm_behavior,
streaming=StreamingConfig.from_dict(data.get("streaming", {})),
session_store_max_age_days=session_store_max_age_days,
)
def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
@@ -576,22 +552,8 @@ def load_gateway_config() -> GatewayConfig:
bridged["free_response_channels"] = platform_cfg["free_response_channels"]
if "mention_patterns" in platform_cfg:
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
if "dm_policy" in platform_cfg:
bridged["dm_policy"] = platform_cfg["dm_policy"]
if "allow_from" in platform_cfg:
bridged["allow_from"] = platform_cfg["allow_from"]
if "group_policy" in platform_cfg:
bridged["group_policy"] = platform_cfg["group_policy"]
if "group_allow_from" in platform_cfg:
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
channel_prompts = platform_cfg["channel_prompts"]
if isinstance(channel_prompts, dict):
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
else:
bridged["channel_prompts"] = channel_prompts
if not bridged:
continue
plat_data = platforms_data.setdefault(plat.value, {})
@@ -649,20 +611,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(ntc, list):
ntc = ",".join(str(v) for v in ntc)
os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
# allow_mentions: granular control over what the bot can ping.
# Safe defaults (no @everyone/roles) are applied in the adapter;
# these YAML keys only override when set and let users opt back
# into unsafe modes (e.g. roles=true) if they actually want it.
allow_mentions_cfg = discord_cfg.get("allow_mentions")
if isinstance(allow_mentions_cfg, dict):
for yaml_key, env_key in (
("everyone", "DISCORD_ALLOW_MENTION_EVERYONE"),
("roles", "DISCORD_ALLOW_MENTION_ROLES"),
("users", "DISCORD_ALLOW_MENTION_USERS"),
("replied_user", "DISCORD_ALLOW_MENTION_REPLIED_USER"),
):
if yaml_key in allow_mentions_cfg and not os.getenv(env_key):
os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower()
# Telegram settings → env vars (env vars take precedence)
telegram_cfg = yaml_cfg.get("telegram", {})
@@ -670,7 +618,8 @@ def load_gateway_config() -> GatewayConfig:
if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
import json as _json
os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
frc = telegram_cfg.get("free_response_chats")
if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
if isinstance(frc, list):
@@ -683,18 +632,6 @@ def load_gateway_config() -> GatewayConfig:
os.environ["TELEGRAM_IGNORED_THREADS"] = str(ignored_threads)
if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
if "disable_link_previews" in telegram_cfg:
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[Platform.TELEGRAM.value] = plat_data
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
extra["disable_link_previews"] = telegram_cfg["disable_link_previews"]
whatsapp_cfg = yaml_cfg.get("whatsapp", {})
if isinstance(whatsapp_cfg, dict):
@@ -707,38 +644,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
af = whatsapp_cfg.get("allow_from")
if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
if isinstance(af, list):
af = ",".join(str(v) for v in af)
os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
gaf = whatsapp_cfg.get("group_allow_from")
if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
if isinstance(gaf, list):
gaf = ",".join(str(v) for v in gaf)
os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)
# DingTalk settings → env vars (env vars take precedence)
dingtalk_cfg = yaml_cfg.get("dingtalk", {})
if isinstance(dingtalk_cfg, dict):
if "require_mention" in dingtalk_cfg and not os.getenv("DINGTALK_REQUIRE_MENTION"):
os.environ["DINGTALK_REQUIRE_MENTION"] = str(dingtalk_cfg["require_mention"]).lower()
if "mention_patterns" in dingtalk_cfg and not os.getenv("DINGTALK_MENTION_PATTERNS"):
os.environ["DINGTALK_MENTION_PATTERNS"] = json.dumps(dingtalk_cfg["mention_patterns"])
frc = dingtalk_cfg.get("free_response_chats")
if frc is not None and not os.getenv("DINGTALK_FREE_RESPONSE_CHATS"):
if isinstance(frc, list):
frc = ",".join(str(v) for v in frc)
os.environ["DINGTALK_FREE_RESPONSE_CHATS"] = str(frc)
allowed = dingtalk_cfg.get("allowed_users")
if allowed is not None and not os.getenv("DINGTALK_ALLOWED_USERS"):
if isinstance(allowed, list):
allowed = ",".join(str(v) for v in allowed)
os.environ["DINGTALK_ALLOWED_USERS"] = str(allowed)
# Matrix settings → env vars (env vars take precedence)
matrix_cfg = yaml_cfg.get("matrix", {})
@@ -1083,25 +988,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
if webhook_secret:
config.platforms[Platform.WEBHOOK].extra["secret"] = webhook_secret
# DingTalk
dingtalk_client_id = os.getenv("DINGTALK_CLIENT_ID")
dingtalk_client_secret = os.getenv("DINGTALK_CLIENT_SECRET")
if dingtalk_client_id and dingtalk_client_secret:
if Platform.DINGTALK not in config.platforms:
config.platforms[Platform.DINGTALK] = PlatformConfig()
config.platforms[Platform.DINGTALK].enabled = True
config.platforms[Platform.DINGTALK].extra.update({
"client_id": dingtalk_client_id,
"client_secret": dingtalk_client_secret,
})
dingtalk_home = os.getenv("DINGTALK_HOME_CHANNEL")
if dingtalk_home:
config.platforms[Platform.DINGTALK].home_channel = HomeChannel(
platform=Platform.DINGTALK,
chat_id=dingtalk_home,
name=os.getenv("DINGTALK_HOME_CHANNEL_NAME", "Home"),
)
# Feishu / Lark
feishu_app_id = os.getenv("FEISHU_APP_ID")
feishu_app_secret = os.getenv("FEISHU_APP_SECRET")
@@ -1250,23 +1136,12 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
qq_group_allowed = os.getenv("QQ_GROUP_ALLOWED_USERS", "").strip()
if qq_group_allowed:
extra["group_allow_from"] = qq_group_allowed
qq_home = os.getenv("QQBOT_HOME_CHANNEL", "").strip()
qq_home_name_env = "QQBOT_HOME_CHANNEL_NAME"
if not qq_home:
# Back-compat: accept the pre-rename name and log a one-time warning.
legacy_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
if legacy_home:
qq_home = legacy_home
qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
logging.getLogger(__name__).warning(
"QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
"in your .env for consistency with the platform key."
)
qq_home = os.getenv("QQ_HOME_CHANNEL", "").strip()
if qq_home:
config.platforms[Platform.QQBOT].home_channel = HomeChannel(
platform=Platform.QQBOT,
chat_id=qq_home,
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
name=os.getenv("QQ_HOME_CHANNEL_NAME", "Home"),
)
# Session settings
+25 -1
View File
@@ -3,11 +3,12 @@ Event Hook System
A lightweight event-driven system that fires handlers at key lifecycle points.
Hooks are discovered from ~/.hermes/hooks/ directories, each containing:
- HOOK.yaml (metadata: name, description, events list)
- HOOK.yaml (metadata: name, description, events list, optional startup_readiness)
- handler.py (Python handler with async def handle(event_type, context))
Events:
- gateway:startup -- Gateway process starts
- gateway:shutdown -- Gateway process is shutting down
- session:start -- New session created (first message of a new session)
- session:end -- Session ends (user ran /new or /reset)
- session:reset -- Session reset completed (new session entry created)
@@ -31,6 +32,26 @@ from hermes_cli.config import get_hermes_home
HOOKS_DIR = get_hermes_home() / "hooks"
def _normalize_startup_readiness(hook_name: str, manifest: dict[str, Any]) -> Optional[dict[str, Any]]:
"""Validate and normalize optional startup readiness metadata."""
readiness = manifest.get("startup_readiness")
if readiness is None:
return None
if not isinstance(readiness, dict):
print(f"[hooks] Ignoring startup_readiness for {hook_name}: expected mapping", flush=True)
return None
check_id = str(readiness.get("id", "")).strip()
if not check_id:
print(f"[hooks] Ignoring startup_readiness for {hook_name}: missing id", flush=True)
return None
return {
"id": check_id,
"required": bool(readiness.get("required", True)),
}
class HookRegistry:
"""
Discovers, loads, and fires event hooks.
@@ -62,6 +83,7 @@ class HookRegistry:
"description": "Run ~/.hermes/BOOT.md on gateway startup",
"events": ["gateway:startup"],
"path": "(builtin)",
"startup_readiness": None,
})
except Exception as e:
print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
@@ -102,6 +124,7 @@ class HookRegistry:
if not events:
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
continue
startup_readiness = _normalize_startup_readiness(hook_name, manifest)
# Dynamically load the handler module
spec = importlib.util.spec_from_file_location(
@@ -128,6 +151,7 @@ class HookRegistry:
"description": manifest.get("description", ""),
"events": events,
"path": str(hook_dir),
"startup_readiness": startup_readiness,
})
print(f"[hooks] Loaded hook '{hook_name}' for events: {events}", flush=True)
File diff suppressed because it is too large Load Diff
+45 -339
View File
@@ -6,7 +6,6 @@ and implement the required methods.
"""
import asyncio
import inspect
import ipaddress
import logging
import os
@@ -19,8 +18,6 @@ import uuid
from abc import ABC, abstractmethod
from urllib.parse import urlsplit
from utils import normalize_proxy_url
logger = logging.getLogger(__name__)
@@ -161,13 +158,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
if platform_env_var:
value = (os.environ.get(platform_env_var) or "").strip()
if value:
return normalize_proxy_url(value)
return value
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = (os.environ.get(key) or "").strip()
if value:
return normalize_proxy_url(value)
return normalize_proxy_url(_detect_macos_system_proxy())
return value
return _detect_macos_system_proxy()
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -393,9 +390,12 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
import asyncio
import httpx
_log = logging.getLogger(__name__)
import logging as _logging
_log = _logging.getLogger(__name__)
last_exc = None
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
@@ -413,6 +413,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
response.raise_for_status()
return cache_image_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < retries:
@@ -428,6 +429,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
await asyncio.sleep(wait)
continue
raise
raise last_exc
def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -507,9 +509,12 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
import asyncio
import httpx
_log = logging.getLogger(__name__)
import logging as _logging
_log = _logging.getLogger(__name__)
last_exc = None
async with httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
@@ -527,6 +532,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
response.raise_for_status()
return cache_audio_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < retries:
@@ -542,39 +548,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
await asyncio.sleep(wait)
continue
raise
# ---------------------------------------------------------------------------
# Video cache utilities
#
# Same pattern as image/audio cache -- videos from platforms are downloaded
# here so the agent can reference them by local file path.
# ---------------------------------------------------------------------------
VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
SUPPORTED_VIDEO_TYPES = {
".mp4": "video/mp4",
".mov": "video/quicktime",
".webm": "video/webm",
".mkv": "video/x-matroska",
".avi": "video/x-msvideo",
}
def get_video_cache_dir() -> Path:
"""Return the video cache directory, creating it if it doesn't exist."""
VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
return VIDEO_CACHE_DIR
def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
"""Save raw video bytes to the cache and return the absolute file path."""
cache_dir = get_video_cache_dir()
filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
filepath = cache_dir / filename
filepath.write_bytes(data)
return str(filepath)
raise last_exc
# ---------------------------------------------------------------------------
@@ -695,15 +669,6 @@ class MessageEvent:
# Original platform data
raw_message: Any = None
message_id: Optional[str] = None
# Platform-specific update identifier. For Telegram this is the
# ``update_id`` from the PTB Update wrapper; other platforms currently
# ignore it. Used by ``/restart`` to record the triggering update so the
# new gateway can advance the Telegram offset past it and avoid processing
# the same ``/restart`` twice if PTB's graceful-shutdown ACK times out
# ("Error while calling `get_updates` one more time to mark all fetched
# updates" in gateway.log).
platform_update_id: Optional[int] = None
# Media attachments
# media_urls: local file paths (for vision tool access)
@@ -717,10 +682,6 @@ class MessageEvent:
# Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
# Discord channel_skill_bindings). A single name or ordered list.
auto_skill: Optional[str | list[str]] = None
# Per-channel ephemeral system prompt (e.g. Discord channel_prompts).
# Applied at API call time and never persisted to transcript history.
channel_prompt: Optional[str] = None
# Internal flag — set for synthetic events (e.g. background process
# completion notifications) that must bypass user authorization checks.
@@ -769,56 +730,25 @@ def merge_pending_message_event(
pending_messages: Dict[str, MessageEvent],
session_key: str,
event: MessageEvent,
*,
merge_text: bool = False,
) -> None:
"""Store or merge a pending event for a session.
Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
events. Merge those into the existing queued event so the next turn sees
the whole burst.
When ``merge_text`` is enabled, rapid follow-up TEXT events are appended
instead of replacing the pending turn. This is used for Telegram bursty
follow-ups so a multi-part user thought is not silently truncated to only
the last queued fragment.
the whole burst, while non-photo follow-ups still replace the pending
event normally.
"""
existing = pending_messages.get(session_key)
if existing:
existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO
incoming_is_photo = event.message_type == MessageType.PHOTO
existing_has_media = bool(existing.media_urls)
incoming_has_media = bool(event.media_urls)
if existing_is_photo and incoming_is_photo:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
return
if existing_has_media or incoming_has_media:
if incoming_has_media:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
if existing.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
else:
existing.text = event.text
if existing_is_photo or incoming_is_photo:
existing.message_type = MessageType.PHOTO
return
if (
merge_text
and getattr(existing, "message_type", None) == MessageType.TEXT
and event.message_type == MessageType.TEXT
):
if event.text:
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
return
if (
existing
and getattr(existing, "message_type", None) == MessageType.PHOTO
and event.message_type == MessageType.PHOTO
):
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
return
pending_messages[session_key] = event
@@ -846,36 +776,6 @@ _RETRYABLE_ERROR_PATTERNS = (
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
def resolve_channel_prompt(
config_extra: dict,
channel_id: str,
parent_id: str | None = None,
) -> str | None:
"""Resolve a per-channel ephemeral prompt from platform config.
Looks up ``channel_prompts`` in the adapter's ``config.extra`` dict.
Prefers an exact match on *channel_id*; falls back to *parent_id*
(useful for forum threads / child channels inheriting a parent prompt).
Returns the prompt string, or None if no match is found. Blank/whitespace-
only prompts are treated as absent.
"""
prompts = config_extra.get("channel_prompts") or {}
if not isinstance(prompts, dict):
return None
for key in (channel_id, parent_id):
if not key:
continue
prompt = prompts.get(key)
if prompt is None:
continue
prompt = str(prompt).strip()
if prompt:
return prompt
return None
class BasePlatformAdapter(ABC):
"""
Base class for platform adapters.
@@ -905,12 +805,6 @@ class BasePlatformAdapter(ABC):
# Gateway shutdown cancels these so an old gateway instance doesn't keep
# working on a task after --replace or manual restarts.
self._background_tasks: set[asyncio.Task] = set()
# One-shot callbacks to fire after the main response is delivered.
# Keyed by session_key. Values are either a bare callback (legacy) or
# a ``(generation, callback)`` tuple so GatewayRunner can make deferred
# deliveries generation-aware and avoid stale runs clearing callbacks
# registered by a fresher run for the same session.
self._post_delivery_callbacks: Dict[str, Any] = {}
self._expected_cancelled_tasks: set[asyncio.Task] = set()
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
# Chats where auto-TTS on voice input is disabled (set by /voice off)
@@ -1081,40 +975,16 @@ class BasePlatformAdapter(ABC):
"""
pass
# Default: the adapter treats ``finalize=True`` on edit_message as a
# no-op and is happy to have the stream consumer skip redundant final
# edits. Subclasses that *require* an explicit finalize call to close
# out the message lifecycle (e.g. rich card / AI assistant surfaces
# such as DingTalk AI Cards) override this to True (class attribute or
# property) so the stream consumer knows not to short-circuit.
REQUIRES_EDIT_FINALIZE: bool = False
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""
Edit a previously sent message. Optional platforms that don't
support editing return success=False and callers fall back to
sending a new message.
``finalize`` signals that this is the last edit in a streaming
sequence. Most platforms (Telegram, Slack, Discord, Matrix,
etc.) treat it as a no-op because their edit APIs have no notion
of message lifecycle state an edit is an edit. Platforms that
render streaming updates with a distinct "in progress" state and
require explicit closure (e.g. rich card / AI assistant surfaces
such as DingTalk AI Cards) use it to finalize the message and
transition the UI out of the streaming indicator those should
also set ``REQUIRES_EDIT_FINALIZE = True`` so callers route a
final edit through even when content is unchanged. Callers
should set ``finalize=True`` on the final edit of a streamed
response (typically when ``got_done`` fires in the stream
consumer) and leave it ``False`` on intermediate edits.
"""
return SendResult(success=False, error="Not supported")
@@ -1351,7 +1221,7 @@ class BasePlatformAdapter(ABC):
path = path[1:-1].strip()
path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
if path:
media.append((os.path.expanduser(path), has_voice_tag))
media.append((path, has_voice_tag))
# Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
if media:
@@ -1428,13 +1298,7 @@ class BasePlatformAdapter(ABC):
return paths, cleaned
async def _keep_typing(
self,
chat_id: str,
interval: float = 2.0,
metadata=None,
stop_event: asyncio.Event | None = None,
) -> None:
async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
"""
Continuously send typing indicator until cancelled.
@@ -1448,18 +1312,9 @@ class BasePlatformAdapter(ABC):
"""
try:
while True:
if stop_event is not None and stop_event.is_set():
return
if chat_id not in self._typing_paused:
await self.send_typing(chat_id, metadata=metadata)
if stop_event is None:
await asyncio.sleep(interval)
continue
try:
await asyncio.wait_for(stop_event.wait(), timeout=interval)
except asyncio.TimeoutError:
continue
return
await asyncio.sleep(interval)
except asyncio.CancelledError:
pass # Normal cancellation when handler completes
finally:
@@ -1486,59 +1341,6 @@ class BasePlatformAdapter(ABC):
"""Resume typing indicator for a chat after approval resolves."""
self._typing_paused.discard(chat_id)
async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None:
"""Signal the active session loop to stop and clear typing immediately."""
if session_key:
interrupt_event = self._active_sessions.get(session_key)
if interrupt_event is not None:
interrupt_event.set()
try:
await self.stop_typing(chat_id)
except Exception:
pass
def register_post_delivery_callback(
self,
session_key: str,
callback: Callable,
*,
generation: int | None = None,
) -> None:
"""Register a deferred callback to fire after the main response.
``generation`` lets callers tie the callback to a specific gateway run
generation so stale runs cannot clear callbacks owned by a fresher run.
"""
if not session_key or not callable(callback):
return
if generation is None:
self._post_delivery_callbacks[session_key] = callback
else:
self._post_delivery_callbacks[session_key] = (int(generation), callback)
def pop_post_delivery_callback(
self,
session_key: str,
*,
generation: int | None = None,
) -> Callable | None:
"""Pop a deferred callback, optionally requiring generation ownership."""
if not session_key:
return None
entry = self._post_delivery_callbacks.get(session_key)
if entry is None:
return None
if isinstance(entry, tuple) and len(entry) == 2:
entry_generation, callback = entry
if generation is not None and int(entry_generation) != int(generation):
return None
self._post_delivery_callbacks.pop(session_key, None)
return callback if callable(callback) else None
if generation is not None:
return None
self._post_delivery_callbacks.pop(session_key, None)
return entry if callable(entry) else None
# ── Processing lifecycle hooks ──────────────────────────────────────────
# Subclasses override these to react to message processing events
# (e.g. Discord adds 👀/✅/❌ reactions).
@@ -1707,9 +1509,7 @@ class BasePlatformAdapter(ABC):
# session lifecycle and its cleanup races with the running task
# (see PR #4926).
cmd = event.get_command()
from hermes_cli.commands import should_bypass_active_session
if should_bypass_active_session(cmd):
if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background", "restart"):
logger.debug(
"[%s] Command '/%s' bypassing active-session guard for %s",
self.name, cmd, session_key,
@@ -1779,6 +1579,8 @@ class BasePlatformAdapter(ABC):
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
"""
import random
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
if mode == "off":
return 0.0
@@ -1807,23 +1609,10 @@ class BasePlatformAdapter(ABC):
# Fall back to a new Event only if the entry was removed externally.
interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
self._active_sessions[session_key] = interrupt_event
callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
# Start continuous typing indicator (refreshes every 2 seconds)
_thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
_keep_typing_kwargs = {"metadata": _thread_metadata}
try:
_keep_typing_sig = inspect.signature(self._keep_typing)
except (TypeError, ValueError):
_keep_typing_sig = None
if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters:
_keep_typing_kwargs["stop_event"] = interrupt_event
typing_task = asyncio.create_task(
self._keep_typing(
event.source.chat_id,
**_keep_typing_kwargs,
)
)
typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata))
try:
await self._run_processing_hook("on_processing_start", event)
@@ -1835,21 +1624,6 @@ class BasePlatformAdapter(ABC):
# streaming already delivered the text (already_sent=True) or
# when the message was queued behind an active agent. Log at
# DEBUG to avoid noisy warnings for expected behavior.
#
# Suppress stale response when the session was interrupted by a
# new message that hasn't been consumed yet. The pending message
# is processed by the pending-message handler below (#8221/#2483).
if (
response
and interrupt_event.is_set()
and session_key in self._pending_messages
):
logger.info(
"[%s] Suppressing stale response for interrupted session %s",
self.name,
session_key,
)
response = None
if not response:
logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
if response:
@@ -2032,18 +1806,9 @@ class BasePlatformAdapter(ABC):
if session_key in self._pending_messages:
pending_event = self._pending_messages.pop(session_key)
logger.debug("[%s] Processing queued message from interrupt", self.name)
# Keep the _active_sessions entry live across the turn chain
# and only CLEAR the interrupt Event — do NOT delete the entry.
# If we deleted here, a concurrent inbound message arriving
# during the awaits below would pass the Level-1 guard, spawn
# its own _process_message_background, and run simultaneously
# with the recursive drain below. Two agents on one
# session_key = duplicate responses, duplicate tool calls.
# Clearing the Event keeps the guard live so follow-ups take
# the busy-handler path (queue + interrupt) as intended.
_active = self._active_sessions.get(session_key)
if _active is not None:
_active.clear()
# Clean up current session before processing pending
if session_key in self._active_sessions:
del self._active_sessions[session_key]
typing_task.cancel()
try:
await typing_task
@@ -2080,21 +1845,6 @@ class BasePlatformAdapter(ABC):
except Exception:
pass # Last resort — don't let error reporting crash the handler
finally:
# Fire any one-shot post-delivery callback registered for this
# session (e.g. deferred background-review notifications).
_callback_generation = callback_generation
if hasattr(self, "pop_post_delivery_callback"):
_post_cb = self.pop_post_delivery_callback(
session_key,
generation=_callback_generation,
)
else:
_post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
if callable(_post_cb):
try:
_post_cb()
except Exception:
pass
# Stop typing indicator
typing_task.cancel()
try:
@@ -2108,37 +1858,9 @@ class BasePlatformAdapter(ABC):
await self.stop_typing(event.source.chat_id)
except Exception:
pass
# Late-arrival drain: a message may have arrived during the
# cleanup awaits above (typing_task cancel, stop_typing). Such
# messages passed the Level-1 guard (entry still live, Event
# possibly set) and landed in _pending_messages via the
# busy-handler path. Without this block, we would delete the
# active-session entry and the queued message would be silently
# dropped (user never gets a reply).
late_pending = self._pending_messages.pop(session_key, None)
if late_pending is not None:
logger.debug(
"[%s] Late-arrival pending message during cleanup — spawning drain task",
self.name,
)
_active = self._active_sessions.get(session_key)
if _active is not None:
_active.clear()
drain_task = asyncio.create_task(
self._process_message_background(late_pending, session_key)
)
try:
self._background_tasks.add(drain_task)
drain_task.add_done_callback(self._background_tasks.discard)
except TypeError:
# Tests stub create_task() with non-hashable sentinels; tolerate.
pass
# Leave _active_sessions[session_key] populated — the drain
# task's own lifecycle will clean it up.
else:
# Clean up session tracking
if session_key in self._active_sessions:
del self._active_sessions[session_key]
# Clean up session tracking
if session_key in self._active_sessions:
del self._active_sessions[session_key]
async def cancel_background_tasks(self) -> None:
"""Cancel any in-flight background message-processing tasks.
@@ -2146,26 +1868,12 @@ class BasePlatformAdapter(ABC):
Used during gateway shutdown/replacement so active sessions from the old
process do not keep running after adapters are being torn down.
"""
# Loop until no new tasks appear. Without this, a message
# arriving during the `await asyncio.gather` below would spawn
# a fresh _process_message_background task (added to
# self._background_tasks at line ~1668 via handle_message),
# and the _background_tasks.clear() at the end of this method
# would drop the reference — the task runs untracked against a
# disconnecting adapter, logs send-failures, and may linger
# until it completes on its own. Retrying the drain until the
# task set stabilizes closes the window.
MAX_DRAIN_ROUNDS = 5
for _ in range(MAX_DRAIN_ROUNDS):
tasks = [task for task in self._background_tasks if not task.done()]
if not tasks:
break
for task in tasks:
self._expected_cancelled_tasks.add(task)
task.cancel()
tasks = [task for task in self._background_tasks if not task.done()]
for task in tasks:
self._expected_cancelled_tasks.add(task)
task.cancel()
if tasks:
await asyncio.gather(*tasks, return_exceptions=True)
# Loop: late-arrival tasks spawned during the gather above
# will be in self._background_tasks now. Re-check.
self._background_tasks.clear()
self._expected_cancelled_tasks.clear()
self._pending_messages.clear()
@@ -2190,7 +1898,6 @@ class BasePlatformAdapter(ABC):
chat_topic: Optional[str] = None,
user_id_alt: Optional[str] = None,
chat_id_alt: Optional[str] = None,
is_bot: bool = False,
) -> SessionSource:
"""Helper to build a SessionSource for this platform."""
# Normalize empty topic to None
@@ -2207,7 +1914,6 @@ class BasePlatformAdapter(ABC):
chat_topic=chat_topic.strip() if chat_topic else None,
user_id_alt=user_id_alt,
chat_id_alt=chat_id_alt,
is_bot=is_bot,
)
@abstractmethod
+1 -1
View File
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
def check_bluebubbles_requirements() -> bool:
try:
import aiohttp # noqa: F401
import httpx # noqa: F401
import httpx as _httpx # noqa: F401
except ImportError:
return False
return True
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+39 -413
View File
@@ -8,8 +8,7 @@ Supports:
- Gateway allowlist integration via FEISHU_ALLOWED_USERS
- Persistent dedup state across restarts
- Per-chat serial message processing (matches openclaw createChatQueue)
- Processing status reactions: Typing while working, removed on success,
swapped for CrossMark on failure
- Persistent ACK emoji reaction on inbound messages
- Reaction events routed as synthetic text events (matches openclaw)
- Interactive card button-click events routed as synthetic COMMAND events
- Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
@@ -30,7 +29,6 @@ import re
import threading
import time
import uuid
from collections import OrderedDict
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
@@ -100,7 +98,6 @@ from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
ProcessingOutcome,
SendResult,
SUPPORTED_DOCUMENT_TYPES,
cache_document_from_bytes,
@@ -122,8 +119,6 @@ _MARKDOWN_HINT_RE = re.compile(
re.MULTILINE,
)
_MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$")
_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$")
_MENTION_RE = re.compile(r"@_user_\d+")
_MULTISPACE_RE = re.compile(r"[ \t]{2,}")
_POST_CONTENT_INVALID_RE = re.compile(r"content format of the post type is incorrect", re.IGNORECASE)
@@ -193,17 +188,7 @@ _APPROVAL_LABEL_MAP: Dict[str, str] = {
}
_FEISHU_BOT_MSG_TRACK_SIZE = 512 # LRU size for tracking sent message IDs
_FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003}) # reply target withdrawn/missing → create fallback
# Feishu reactions render as prominent badges, unlike Discord/Telegram's
# small footer emoji — a success badge on every message would add noise, so
# we only mark start (Typing) and failure (CrossMark); the reply itself is
# the success signal.
_FEISHU_REACTION_IN_PROGRESS = "Typing"
_FEISHU_REACTION_FAILURE = "CrossMark"
# Bound on the (message_id → reaction_id) handle cache. Happy-path entries
# drain on completion; the cap is a safeguard against unbounded growth from
# delete-failures, not a capacity plan.
_FEISHU_PROCESSING_REACTION_CACHE_SIZE = 1024
_FEISHU_ACK_EMOJI = "OK"
# QR onboarding constants
_ONBOARD_ACCOUNTS_URLS = {
@@ -445,66 +430,23 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
def _build_markdown_post_payload(content: str) -> str:
rows = _build_markdown_post_rows(content)
return json.dumps(
{
"zh_cn": {
"content": rows,
"content": [
[
{
"tag": "md",
"text": content,
}
]
],
}
},
ensure_ascii=False,
)
def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
"""Build Feishu post rows while isolating fenced code blocks.
Feishu's `md` renderer can swallow trailing content when a fenced code block
appears inside one large markdown element. Split the reply at real fence
lines so prose before/after the code block remains visible while code stays
in a dedicated row.
"""
if not content:
return [[{"tag": "md", "text": ""}]]
if "```" not in content:
return [[{"tag": "md", "text": content}]]
rows: List[List[Dict[str, str]]] = []
current: List[str] = []
in_code_block = False
def _flush_current() -> None:
nonlocal current
if not current:
return
segment = "\n".join(current)
if segment.strip():
rows.append([{"tag": "md", "text": segment}])
current = []
for raw_line in content.splitlines():
stripped_line = raw_line.strip()
is_fence = bool(
_MARKDOWN_FENCE_CLOSE_RE.match(stripped_line)
if in_code_block
else _MARKDOWN_FENCE_OPEN_RE.match(stripped_line)
)
if is_fence:
if not in_code_block:
_flush_current()
current.append(raw_line)
in_code_block = not in_code_block
if not in_code_block:
_flush_current()
continue
current.append(raw_line)
_flush_current()
return rows or [[{"tag": "md", "text": content}]]
def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
resolved = _resolve_post_payload(payload)
if not resolved:
@@ -1131,13 +1073,6 @@ class FeishuAdapter(BasePlatformAdapter):
self._webhook_rate_counts: Dict[str, tuple[int, float]] = {} # rate_key → (count, window_start)
self._webhook_anomaly_counts: Dict[str, tuple[int, str, float]] = {} # ip → (count, last_status, first_seen)
self._card_action_tokens: Dict[str, float] = {} # token → first_seen_time
# Inbound events that arrived before the adapter loop was ready
# (e.g. during startup/restart or network-flap reconnect). A single
# drainer thread replays them as soon as the loop becomes available.
self._pending_inbound_events: List[Any] = []
self._pending_inbound_lock = threading.Lock()
self._pending_drain_scheduled = False
self._pending_inbound_max_depth = 1000 # cap queue; drop oldest beyond
self._chat_locks: Dict[str, asyncio.Lock] = {} # chat_id → lock (per-chat serial processing)
self._sent_message_ids_to_chat: Dict[str, str] = {} # message_id → chat_id (for reaction routing)
self._sent_message_id_order: List[str] = [] # LRU order for _sent_message_ids_to_chat
@@ -1154,9 +1089,6 @@ class FeishuAdapter(BasePlatformAdapter):
# Exec approval button state (approval_id → {session_key, message_id, chat_id})
self._approval_state: Dict[int, Dict[str, str]] = {}
self._approval_counter = itertools.count(1)
# Feishu reaction deletion requires the opaque reaction_id returned
# by create, so we cache it per message_id.
self._pending_processing_reactions: "OrderedDict[str, str]" = OrderedDict()
self._load_seen_message_ids()
@staticmethod
@@ -1287,12 +1219,6 @@ class FeishuAdapter(BasePlatformAdapter):
.register_p2_card_action_trigger(self._on_card_action_trigger)
.register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat)
.register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat)
.register_p2_im_chat_access_event_bot_p2p_chat_entered_v1(self._on_p2p_chat_entered)
.register_p2_im_message_recalled_v1(self._on_message_recalled)
.register_p2_customized_event(
"drive.notice.comment_add_v1",
self._on_drive_comment_event,
)
.build()
)
@@ -1484,8 +1410,6 @@ class FeishuAdapter(BasePlatformAdapter):
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""Edit a previously sent Feishu text/post message."""
if not self._client:
@@ -1833,22 +1757,10 @@ class FeishuAdapter(BasePlatformAdapter):
# =========================================================================
def _on_message_event(self, data: Any) -> None:
"""Normalize Feishu inbound events into MessageEvent.
Called by the lark_oapi SDK's event dispatcher on a background thread.
If the adapter loop is not currently accepting callbacks (brief window
during startup/restart or network-flap reconnect), the event is queued
for replay instead of dropped.
"""
"""Normalize Feishu inbound events into MessageEvent."""
loop = self._loop
if not self._loop_accepts_callbacks(loop):
start_drainer = self._enqueue_pending_inbound_event(data)
if start_drainer:
threading.Thread(
target=self._drain_pending_inbound_events,
name="feishu-pending-inbound-drainer",
daemon=True,
).start()
if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
logger.warning("[Feishu] Dropping inbound message before adapter loop is ready")
return
future = asyncio.run_coroutine_threadsafe(
self._handle_message_event_data(data),
@@ -1856,124 +1768,6 @@ class FeishuAdapter(BasePlatformAdapter):
)
future.add_done_callback(self._log_background_failure)
def _enqueue_pending_inbound_event(self, data: Any) -> bool:
"""Append an event to the pending-inbound queue.
Returns True if the caller should spawn a drainer thread (no drainer
currently scheduled), False if a drainer is already running and will
pick up the new event on its next pass.
"""
with self._pending_inbound_lock:
if len(self._pending_inbound_events) >= self._pending_inbound_max_depth:
# Queue full — drop the oldest to make room. This happens only
# if the loop stays unavailable for an extended period AND the
# WS keeps firing callbacks. Still better than silent drops.
dropped = self._pending_inbound_events.pop(0)
try:
event = getattr(dropped, "event", None)
message = getattr(event, "message", None)
message_id = str(getattr(message, "message_id", "") or "unknown")
except Exception:
message_id = "unknown"
logger.error(
"[Feishu] Pending-inbound queue full (%d); dropped oldest event %s",
self._pending_inbound_max_depth,
message_id,
)
self._pending_inbound_events.append(data)
depth = len(self._pending_inbound_events)
should_start = not self._pending_drain_scheduled
if should_start:
self._pending_drain_scheduled = True
logger.warning(
"[Feishu] Queued inbound event for replay (loop not ready, queue depth=%d)",
depth,
)
return should_start
def _drain_pending_inbound_events(self) -> None:
"""Replay queued inbound events once the adapter loop is ready.
Runs in a dedicated daemon thread. Polls ``_running`` and
``_loop_accepts_callbacks`` until events can be dispatched or the
adapter shuts down. A single drainer handles the entire queue;
concurrent ``_on_message_event`` calls just append.
"""
poll_interval = 0.25
max_wait_seconds = 120.0 # safety cap: drop queue after 2 minutes
waited = 0.0
try:
while True:
if not getattr(self, "_running", True):
# Adapter shutting down — drop queued events rather than
# holding them against a closed loop.
with self._pending_inbound_lock:
dropped = len(self._pending_inbound_events)
self._pending_inbound_events.clear()
if dropped:
logger.warning(
"[Feishu] Dropped %d queued inbound event(s) during shutdown",
dropped,
)
return
loop = self._loop
if self._loop_accepts_callbacks(loop):
with self._pending_inbound_lock:
batch = self._pending_inbound_events[:]
self._pending_inbound_events.clear()
if not batch:
# Queue emptied between check and grab; done.
with self._pending_inbound_lock:
if not self._pending_inbound_events:
return
continue
dispatched = 0
requeue: List[Any] = []
for event in batch:
try:
fut = asyncio.run_coroutine_threadsafe(
self._handle_message_event_data(event),
loop,
)
fut.add_done_callback(self._log_background_failure)
dispatched += 1
except RuntimeError:
# Loop closed between check and submit — requeue
# and poll again.
requeue.append(event)
if requeue:
with self._pending_inbound_lock:
self._pending_inbound_events[:0] = requeue
if dispatched:
logger.info(
"[Feishu] Replayed %d queued inbound event(s)",
dispatched,
)
if not requeue:
# Successfully drained; check if more arrived while
# we were dispatching and exit if not.
with self._pending_inbound_lock:
if not self._pending_inbound_events:
return
# More events queued or requeue pending — loop again.
continue
if waited >= max_wait_seconds:
with self._pending_inbound_lock:
dropped = len(self._pending_inbound_events)
self._pending_inbound_events.clear()
logger.error(
"[Feishu] Adapter loop unavailable for %.0fs; "
"dropped %d queued inbound event(s)",
max_wait_seconds,
dropped,
)
return
time.sleep(poll_interval)
waited += poll_interval
finally:
with self._pending_inbound_lock:
self._pending_drain_scheduled = False
async def _handle_message_event_data(self, data: Any) -> None:
"""Shared inbound message handling for websocket and webhook transports."""
event = getattr(data, "event", None)
@@ -1988,8 +1782,8 @@ class FeishuAdapter(BasePlatformAdapter):
if not message_id or self._is_duplicate(message_id):
logger.debug("[Feishu] Dropping duplicate/missing message_id: %s", message_id)
return
if self._is_self_sent_bot_message(event):
logger.debug("[Feishu] Dropping self-sent bot event: %s", message_id)
if getattr(sender, "sender_type", "") == "bot":
logger.debug("[Feishu] Dropping bot-originated event: %s", message_id)
return
chat_type = getattr(message, "chat_type", "p2p")
@@ -2026,31 +1820,6 @@ class FeishuAdapter(BasePlatformAdapter):
logger.info("[Feishu] Bot removed from chat: %s", chat_id)
self._chat_info_cache.pop(chat_id, None)
def _on_p2p_chat_entered(self, data: Any) -> None:
logger.debug("[Feishu] User entered P2P chat with bot")
def _on_message_recalled(self, data: Any) -> None:
logger.debug("[Feishu] Message recalled by user")
def _on_drive_comment_event(self, data: Any) -> None:
"""Handle drive document comment notification (drive.notice.comment_add_v1).
Delegates to :mod:`gateway.platforms.feishu_comment` for parsing,
logging, and reaction. Scheduling follows the same
``run_coroutine_threadsafe`` pattern used by ``_on_message_event``.
"""
from gateway.platforms.feishu_comment import handle_drive_comment_event
loop = self._loop
if not self._loop_accepts_callbacks(loop):
logger.warning("[Feishu] Dropping drive comment event before adapter loop is ready")
return
future = asyncio.run_coroutine_threadsafe(
handle_drive_comment_event(self._client, data, self_open_id=self._bot_open_id),
loop,
)
future.add_done_callback(self._log_background_failure)
def _on_reaction_event(self, event_type: str, data: Any) -> None:
"""Route user reactions on bot messages as synthetic text events."""
event = getattr(data, "event", None)
@@ -2066,12 +1835,12 @@ class FeishuAdapter(BasePlatformAdapter):
operator_type,
emoji_type,
)
# Drop bot/app-origin reactions to break the feedback loop from our
# own lifecycle reactions. A human reacting with the same emoji (e.g.
# clicking Typing on a bot message) is still routed through.
# Only process reactions from real users. Ignore app/bot-generated reactions
# and Hermes' own ACK emoji to avoid feedback loops.
loop = self._loop
if (
operator_type in {"bot", "app"}
or emoji_type == _FEISHU_ACK_EMOJI
or not message_id
or loop is None
or bool(getattr(loop, "is_closed", lambda: False)())
@@ -2295,35 +2064,33 @@ class FeishuAdapter(BasePlatformAdapter):
async def _handle_message_with_guards(self, event: MessageEvent) -> None:
"""Dispatch a single event through the agent pipeline with per-chat serialization
before handing the event off to the agent.
and a persistent ACK emoji reaction before processing starts.
Per-chat lock ensures messages in the same chat are processed one at a
time (matches openclaw's createChatQueue serial queue behaviour).
- Per-chat lock: ensures messages in the same chat are processed one at a time
(matches openclaw's createChatQueue serial queue behaviour).
- ACK indicator: adds a CHECK reaction to the triggering message before handing
off to the agent and leaves it in place as a receipt marker.
"""
chat_id = getattr(event.source, "chat_id", "") or "" if event.source else ""
chat_lock = self._get_chat_lock(chat_id)
async with chat_lock:
message_id = event.message_id
if message_id:
await self._add_ack_reaction(message_id)
await self.handle_message(event)
# =========================================================================
# Processing status reactions
# =========================================================================
def _reactions_enabled(self) -> bool:
return os.getenv("FEISHU_REACTIONS", "true").strip().lower() not in ("false", "0", "no")
async def _add_reaction(self, message_id: str, emoji_type: str) -> Optional[str]:
"""Return the reaction_id on success, else None. The id is needed later for deletion."""
if not self._client or not message_id or not emoji_type:
async def _add_ack_reaction(self, message_id: str) -> Optional[str]:
"""Add a persistent ACK emoji reaction to signal the message was received."""
if not self._client or not message_id:
return None
try:
from lark_oapi.api.im.v1 import (
from lark_oapi.api.im.v1 import ( # lazy import — keeps optional dep optional
CreateMessageReactionRequest,
CreateMessageReactionRequestBody,
)
body = (
CreateMessageReactionRequestBody.builder()
.reaction_type({"emoji_type": emoji_type})
.reaction_type({"emoji_type": _FEISHU_ACK_EMOJI})
.build()
)
request = (
@@ -2336,93 +2103,16 @@ class FeishuAdapter(BasePlatformAdapter):
if response and getattr(response, "success", lambda: False)():
data = getattr(response, "data", None)
return getattr(data, "reaction_id", None)
logger.debug(
"[Feishu] Add reaction %s on %s rejected: code=%s msg=%s",
emoji_type,
logger.warning(
"[Feishu] Failed to add ack reaction to %s: code=%s msg=%s",
message_id,
getattr(response, "code", None),
getattr(response, "msg", None),
)
except Exception:
logger.warning(
"[Feishu] Add reaction %s on %s raised",
emoji_type,
message_id,
exc_info=True,
)
logger.warning("[Feishu] Failed to add ack reaction to %s", message_id, exc_info=True)
return None
async def _remove_reaction(self, message_id: str, reaction_id: str) -> bool:
if not self._client or not message_id or not reaction_id:
return False
try:
from lark_oapi.api.im.v1 import DeleteMessageReactionRequest
request = (
DeleteMessageReactionRequest.builder()
.message_id(message_id)
.reaction_id(reaction_id)
.build()
)
response = await asyncio.to_thread(self._client.im.v1.message_reaction.delete, request)
if response and getattr(response, "success", lambda: False)():
return True
logger.debug(
"[Feishu] Remove reaction %s on %s rejected: code=%s msg=%s",
reaction_id,
message_id,
getattr(response, "code", None),
getattr(response, "msg", None),
)
except Exception:
logger.warning(
"[Feishu] Remove reaction %s on %s raised",
reaction_id,
message_id,
exc_info=True,
)
return False
def _remember_processing_reaction(self, message_id: str, reaction_id: str) -> None:
cache = self._pending_processing_reactions
cache[message_id] = reaction_id
cache.move_to_end(message_id)
while len(cache) > _FEISHU_PROCESSING_REACTION_CACHE_SIZE:
cache.popitem(last=False)
def _pop_processing_reaction(self, message_id: str) -> Optional[str]:
return self._pending_processing_reactions.pop(message_id, None)
async def on_processing_start(self, event: MessageEvent) -> None:
if not self._reactions_enabled():
return
message_id = event.message_id
if not message_id or message_id in self._pending_processing_reactions:
return
reaction_id = await self._add_reaction(message_id, _FEISHU_REACTION_IN_PROGRESS)
if reaction_id:
self._remember_processing_reaction(message_id, reaction_id)
async def on_processing_complete(
self, event: MessageEvent, outcome: ProcessingOutcome
) -> None:
if not self._reactions_enabled():
return
message_id = event.message_id
if not message_id:
return
start_reaction_id = self._pending_processing_reactions.get(message_id)
if start_reaction_id:
if not await self._remove_reaction(message_id, start_reaction_id):
# Don't stack a second badge on top of a Typing we couldn't
# remove — UI would read as both "working" and "done/failed"
# simultaneously. Keep the handle so LRU eventually evicts it.
return
self._pop_processing_reaction(message_id)
if outcome is ProcessingOutcome.FAILURE:
await self._add_reaction(message_id, _FEISHU_REACTION_FAILURE)
# =========================================================================
# Webhook server and security
# =========================================================================
@@ -2755,8 +2445,6 @@ class FeishuAdapter(BasePlatformAdapter):
self._on_reaction_event(event_type, data)
elif event_type == "card.action.trigger":
self._on_card_action_trigger(data)
elif event_type == "drive.notice.comment_add_v1":
self._on_drive_comment_event(data)
else:
logger.debug("[Feishu] Ignoring webhook event type: %s", event_type or "unknown")
return web.json_response({"code": 0, "msg": "ok"})
@@ -3391,23 +3079,6 @@ class FeishuAdapter(BasePlatformAdapter):
return self._post_mentions_bot(normalized.mentioned_ids)
return False
def _is_self_sent_bot_message(self, event: Any) -> bool:
"""Return True only for Feishu events emitted by this Hermes bot."""
sender = getattr(event, "sender", None)
sender_type = str(getattr(sender, "sender_type", "") or "").strip().lower()
if sender_type not in {"bot", "app"}:
return False
sender_id = getattr(sender, "sender_id", None)
sender_open_id = str(getattr(sender_id, "open_id", "") or "").strip()
sender_user_id = str(getattr(sender_id, "user_id", "") or "").strip()
if self._bot_open_id and sender_open_id == self._bot_open_id:
return True
if self._bot_user_id and sender_user_id == self._bot_user_id:
return True
return False
def _message_mentions_bot(self, mentions: List[Any]) -> bool:
"""Check whether any mention targets the configured or inferred bot identity."""
for mention in mentions:
@@ -3435,55 +3106,10 @@ class FeishuAdapter(BasePlatformAdapter):
return False
async def _hydrate_bot_identity(self) -> None:
"""Best-effort discovery of bot identity for precise group mention gating
and self-sent bot event filtering.
Populates ``_bot_open_id`` and ``_bot_name`` from /open-apis/bot/v3/info
(no extra scopes required beyond the tenant access token). Falls back to
the application info endpoint for ``_bot_name`` only when the first probe
doesn't return it. Each field is hydrated independently — a value already
supplied via env vars (FEISHU_BOT_OPEN_ID / FEISHU_BOT_USER_ID /
FEISHU_BOT_NAME) is preserved and skips its probe.
"""
"""Best-effort discovery of bot identity for precise group mention gating."""
if not self._client:
return
if self._bot_open_id and self._bot_name:
# Everything the self-send filter and precise mention gate need is
# already in place; nothing to probe.
return
# Primary probe: /open-apis/bot/v3/info — returns bot_name + open_id, no
# extra scopes required. This is the same endpoint the onboarding wizard
# uses via probe_bot().
if not self._bot_open_id or not self._bot_name:
try:
resp = await asyncio.to_thread(
self._client.request,
method="GET",
url="/open-apis/bot/v3/info",
body=None,
raw_response=True,
)
content = getattr(resp, "content", None)
if content:
payload = json.loads(content)
parsed = _parse_bot_response(payload) or {}
open_id = (parsed.get("bot_open_id") or "").strip()
bot_name = (parsed.get("bot_name") or "").strip()
if open_id and not self._bot_open_id:
self._bot_open_id = open_id
if bot_name and not self._bot_name:
self._bot_name = bot_name
except Exception:
logger.debug(
"[Feishu] /bot/v3/info probe failed during hydration",
exc_info=True,
)
# Fallback probe for _bot_name only: application info endpoint. Needs
# admin:app.info:readonly or application:application:self_manage scope,
# so it's best-effort.
if self._bot_name:
if any((self._bot_open_id, self._bot_user_id, self._bot_name)):
return
try:
request = self._build_get_application_request(app_id=self._app_id, lang="en_us")
@@ -3492,17 +3118,17 @@ class FeishuAdapter(BasePlatformAdapter):
code = getattr(response, "code", None)
if code == 99991672:
logger.warning(
"[Feishu] Unable to hydrate bot name from application info. "
"[Feishu] Unable to hydrate bot identity from application info. "
"Grant admin:app.info:readonly or application:application:self_manage "
"so group @mention gating can resolve the bot name precisely."
)
return
app = getattr(getattr(response, "data", None), "app", None)
app_name = (getattr(app, "app_name", None) or "").strip()
if app_name and not self._bot_name:
if app_name:
self._bot_name = app_name
except Exception:
logger.debug("[Feishu] Failed to hydrate bot name from application info", exc_info=True)
logger.debug("[Feishu] Failed to hydrate bot identity", exc_info=True)
# =========================================================================
# Deduplication — seen message ID cache (persistent)
File diff suppressed because it is too large Load Diff
-429
View File
@@ -1,429 +0,0 @@
"""
Feishu document comment access-control rules.
3-tier rule resolution: exact doc > wildcard "*" > top-level > code defaults.
Each field (enabled/policy/allow_from) falls back independently.
Config: ~/.hermes/feishu_comment_rules.json (mtime-cached, hot-reload).
Pairing store: ~/.hermes/feishu_comment_pairing.json.
"""
from __future__ import annotations
import json
import logging
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Optional
from hermes_constants import get_hermes_home
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
#
# Uses the canonical ``get_hermes_home()`` helper (HERMES_HOME-aware and
# profile-safe). Resolved at import time; this module is lazy-imported by
# the Feishu comment event handler, which runs long after profile overrides
# have been applied, so freezing paths here is safe.
RULES_FILE = get_hermes_home() / "feishu_comment_rules.json"
PAIRING_FILE = get_hermes_home() / "feishu_comment_pairing.json"
# ---------------------------------------------------------------------------
# Data models
# ---------------------------------------------------------------------------
_VALID_POLICIES = ("allowlist", "pairing")
@dataclass(frozen=True)
class CommentDocumentRule:
"""Per-document rule. ``None`` means 'inherit from lower tier'."""
enabled: Optional[bool] = None
policy: Optional[str] = None
allow_from: Optional[frozenset] = None
@dataclass(frozen=True)
class CommentsConfig:
"""Top-level comment access config."""
enabled: bool = True
policy: str = "pairing"
allow_from: frozenset = field(default_factory=frozenset)
documents: Dict[str, CommentDocumentRule] = field(default_factory=dict)
@dataclass(frozen=True)
class ResolvedCommentRule:
"""Fully resolved rule after field-by-field fallback."""
enabled: bool
policy: str
allow_from: frozenset
match_source: str # e.g. "exact:docx:xxx" | "wildcard" | "top" | "default"
# ---------------------------------------------------------------------------
# Mtime-cached file loading
# ---------------------------------------------------------------------------
class _MtimeCache:
"""Generic mtime-based file cache. ``stat()`` per access, re-read only on change."""
def __init__(self, path: Path):
self._path = path
self._mtime: float = 0.0
self._data: Optional[dict] = None
def load(self) -> dict:
try:
st = self._path.stat()
mtime = st.st_mtime
except FileNotFoundError:
self._mtime = 0.0
self._data = {}
return {}
if mtime == self._mtime and self._data is not None:
return self._data
try:
with open(self._path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
data = {}
except (json.JSONDecodeError, OSError):
logger.warning("[Feishu-Rules] Failed to read %s, using empty config", self._path)
data = {}
self._mtime = mtime
self._data = data
return data
_rules_cache = _MtimeCache(RULES_FILE)
_pairing_cache = _MtimeCache(PAIRING_FILE)
# ---------------------------------------------------------------------------
# Config parsing
# ---------------------------------------------------------------------------
def _parse_frozenset(raw: Any) -> Optional[frozenset]:
"""Parse a list of strings into a frozenset; return None if key absent."""
if raw is None:
return None
if isinstance(raw, (list, tuple)):
return frozenset(str(u).strip() for u in raw if str(u).strip())
return None
def _parse_document_rule(raw: dict) -> CommentDocumentRule:
enabled = raw.get("enabled")
if enabled is not None:
enabled = bool(enabled)
policy = raw.get("policy")
if policy is not None:
policy = str(policy).strip().lower()
if policy not in _VALID_POLICIES:
policy = None
allow_from = _parse_frozenset(raw.get("allow_from"))
return CommentDocumentRule(enabled=enabled, policy=policy, allow_from=allow_from)
def load_config() -> CommentsConfig:
"""Load comment rules from disk (mtime-cached)."""
raw = _rules_cache.load()
if not raw:
return CommentsConfig()
documents: Dict[str, CommentDocumentRule] = {}
raw_docs = raw.get("documents", {})
if isinstance(raw_docs, dict):
for key, rule_raw in raw_docs.items():
if isinstance(rule_raw, dict):
documents[str(key)] = _parse_document_rule(rule_raw)
policy = str(raw.get("policy", "pairing")).strip().lower()
if policy not in _VALID_POLICIES:
policy = "pairing"
return CommentsConfig(
enabled=raw.get("enabled", True),
policy=policy,
allow_from=_parse_frozenset(raw.get("allow_from")) or frozenset(),
documents=documents,
)
# ---------------------------------------------------------------------------
# Rule resolution (§8.4 field-by-field fallback)
# ---------------------------------------------------------------------------
def has_wiki_keys(cfg: CommentsConfig) -> bool:
"""Check if any document rule key starts with 'wiki:'."""
return any(k.startswith("wiki:") for k in cfg.documents)
def resolve_rule(
cfg: CommentsConfig,
file_type: str,
file_token: str,
wiki_token: str = "",
) -> ResolvedCommentRule:
"""Resolve effective rule: exact doc → wiki key → wildcard → top-level → defaults."""
exact_key = f"{file_type}:{file_token}"
exact = cfg.documents.get(exact_key)
exact_src = f"exact:{exact_key}"
if exact is None and wiki_token:
wiki_key = f"wiki:{wiki_token}"
exact = cfg.documents.get(wiki_key)
exact_src = f"exact:{wiki_key}"
wildcard = cfg.documents.get("*")
layers = []
if exact is not None:
layers.append((exact, exact_src))
if wildcard is not None:
layers.append((wildcard, "wildcard"))
def _pick(field_name: str):
for layer, source in layers:
val = getattr(layer, field_name)
if val is not None:
return val, source
return getattr(cfg, field_name), "top"
enabled, en_src = _pick("enabled")
policy, pol_src = _pick("policy")
allow_from, _ = _pick("allow_from")
# match_source = highest-priority tier that contributed any field
priority_order = {"exact": 0, "wildcard": 1, "top": 2}
best_src = min(
[en_src, pol_src],
key=lambda s: priority_order.get(s.split(":")[0], 3),
)
return ResolvedCommentRule(
enabled=enabled,
policy=policy,
allow_from=allow_from,
match_source=best_src,
)
# ---------------------------------------------------------------------------
# Pairing store
# ---------------------------------------------------------------------------
def _load_pairing_approved() -> set:
"""Return set of approved user open_ids (mtime-cached)."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if isinstance(approved, dict):
return set(approved.keys())
if isinstance(approved, list):
return set(str(u) for u in approved if u)
return set()
def _save_pairing(data: dict) -> None:
PAIRING_FILE.parent.mkdir(parents=True, exist_ok=True)
tmp = PAIRING_FILE.with_suffix(".tmp")
with open(tmp, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
tmp.replace(PAIRING_FILE)
# Invalidate cache so next load picks up change
_pairing_cache._mtime = 0.0
_pairing_cache._data = None
def pairing_add(user_open_id: str) -> bool:
"""Add a user to the pairing-approved list. Returns True if newly added."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if not isinstance(approved, dict):
approved = {}
if user_open_id in approved:
return False
approved[user_open_id] = {"approved_at": time.time()}
data["approved"] = approved
_save_pairing(data)
return True
def pairing_remove(user_open_id: str) -> bool:
"""Remove a user from the pairing-approved list. Returns True if removed."""
data = _pairing_cache.load()
approved = data.get("approved", {})
if not isinstance(approved, dict):
return False
if user_open_id not in approved:
return False
del approved[user_open_id]
data["approved"] = approved
_save_pairing(data)
return True
def pairing_list() -> Dict[str, Any]:
"""Return the approved dict {user_open_id: {approved_at: ...}}."""
data = _pairing_cache.load()
approved = data.get("approved", {})
return dict(approved) if isinstance(approved, dict) else {}
# ---------------------------------------------------------------------------
# Access check (public API for feishu_comment.py)
# ---------------------------------------------------------------------------
def is_user_allowed(rule: ResolvedCommentRule, user_open_id: str) -> bool:
"""Check if user passes the resolved rule's policy gate."""
if user_open_id in rule.allow_from:
return True
if rule.policy == "pairing":
return user_open_id in _load_pairing_approved()
return False
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def _print_status() -> None:
cfg = load_config()
print(f"Rules file: {RULES_FILE}")
print(f" exists: {RULES_FILE.exists()}")
print(f"Pairing file: {PAIRING_FILE}")
print(f" exists: {PAIRING_FILE.exists()}")
print()
print(f"Top-level:")
print(f" enabled: {cfg.enabled}")
print(f" policy: {cfg.policy}")
print(f" allow_from: {sorted(cfg.allow_from) if cfg.allow_from else '[]'}")
print()
if cfg.documents:
print(f"Document rules ({len(cfg.documents)}):")
for key, rule in sorted(cfg.documents.items()):
parts = []
if rule.enabled is not None:
parts.append(f"enabled={rule.enabled}")
if rule.policy is not None:
parts.append(f"policy={rule.policy}")
if rule.allow_from is not None:
parts.append(f"allow_from={sorted(rule.allow_from)}")
print(f" [{key}] {', '.join(parts) if parts else '(empty — inherits all)'}")
else:
print("Document rules: (none)")
print()
approved = pairing_list()
print(f"Pairing approved ({len(approved)}):")
for uid, meta in sorted(approved.items()):
ts = meta.get("approved_at", 0)
print(f" {uid} (approved_at={ts})")
def _do_check(doc_key: str, user_open_id: str) -> None:
cfg = load_config()
parts = doc_key.split(":", 1)
if len(parts) != 2:
print(f"Error: doc_key must be 'fileType:fileToken', got '{doc_key}'")
return
file_type, file_token = parts
rule = resolve_rule(cfg, file_type, file_token)
allowed = is_user_allowed(rule, user_open_id)
print(f"Document: {doc_key}")
print(f"User: {user_open_id}")
print(f"Resolved rule:")
print(f" enabled: {rule.enabled}")
print(f" policy: {rule.policy}")
print(f" allow_from: {sorted(rule.allow_from) if rule.allow_from else '[]'}")
print(f" match_source: {rule.match_source}")
print(f"Result: {'ALLOWED' if allowed else 'DENIED'}")
def _main() -> int:
import sys
try:
from hermes_cli.env_loader import load_hermes_dotenv
load_hermes_dotenv()
except Exception:
pass
usage = (
"Usage: python -m gateway.platforms.feishu_comment_rules <command> [args]\n"
"\n"
"Commands:\n"
" status Show rules config and pairing state\n"
" check <fileType:token> <user> Simulate access check\n"
" pairing add <user_open_id> Add user to pairing-approved list\n"
" pairing remove <user_open_id> Remove user from pairing-approved list\n"
" pairing list List pairing-approved users\n"
"\n"
f"Rules config file: {RULES_FILE}\n"
" Edit this JSON file directly to configure policies and document rules.\n"
" Changes take effect on the next comment event (no restart needed).\n"
)
args = sys.argv[1:]
if not args:
print(usage)
return 1
cmd = args[0]
if cmd == "status":
_print_status()
elif cmd == "check":
if len(args) < 3:
print("Usage: check <fileType:fileToken> <user_open_id>")
return 1
_do_check(args[1], args[2])
elif cmd == "pairing":
if len(args) < 2:
print("Usage: pairing <add|remove|list> [args]")
return 1
sub = args[1]
if sub == "add":
if len(args) < 3:
print("Usage: pairing add <user_open_id>")
return 1
if pairing_add(args[2]):
print(f"Added: {args[2]}")
else:
print(f"Already approved: {args[2]}")
elif sub == "remove":
if len(args) < 3:
print("Usage: pairing remove <user_open_id>")
return 1
if pairing_remove(args[2]):
print(f"Removed: {args[2]}")
else:
print(f"Not in approved list: {args[2]}")
elif sub == "list":
approved = pairing_list()
if not approved:
print("(no approved users)")
for uid, meta in sorted(approved.items()):
print(f" {uid} approved_at={meta.get('approved_at', '?')}")
else:
print(f"Unknown pairing subcommand: {sub}")
return 1
else:
print(f"Unknown command: {cmd}\n")
print(usage)
return 1
return 0
if __name__ == "__main__":
import sys
sys.exit(_main())
+1 -4
View File
@@ -49,10 +49,7 @@ class MessageDeduplicator:
return False
now = time.time()
if msg_id in self._seen:
if now - self._seen[msg_id] < self._ttl:
return True
# Entry has expired — remove it and treat as new
del self._seen[msg_id]
return True
self._seen[msg_id] = now
if len(self._seen) > self._max_size:
cutoff = now - self._ttl
File diff suppressed because it is too large Load Diff
+2 -8
View File
@@ -304,7 +304,7 @@ class MattermostAdapter(BasePlatformAdapter):
)
async def edit_message(
self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
self, chat_id: str, message_id: str, content: str
) -> SendResult:
"""Edit an existing post."""
formatted = self.format_message(content)
@@ -410,6 +410,7 @@ class MattermostAdapter(BasePlatformAdapter):
logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
import asyncio
import aiohttp
last_exc = None
@@ -717,12 +718,6 @@ class MattermostAdapter(BasePlatformAdapter):
thread_id=thread_id,
)
# Per-channel ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_channel_prompt = resolve_channel_prompt(
self.config.extra, channel_id, None,
)
msg_event = MessageEvent(
text=message_text,
message_type=msg_type,
@@ -731,7 +726,6 @@ class MattermostAdapter(BasePlatformAdapter):
message_id=post_id,
media_urls=media_urls if media_urls else None,
media_types=media_types if media_types else None,
channel_prompt=_channel_prompt,
)
await self.handle_message(msg_event)
File diff suppressed because it is too large Load Diff
-57
View File
@@ -1,57 +0,0 @@
"""
QQBot platform package.
Re-exports the main adapter symbols from ``adapter.py`` (the original
``qqbot.py``) so that **all existing import paths remain unchanged**::
from gateway.platforms.qqbot import QQAdapter # works
from gateway.platforms.qqbot import check_qq_requirements # works
New modules:
- ``constants`` shared constants (API URLs, timeouts, message types)
- ``utils`` User-Agent builder, config helpers
- ``crypto`` AES-256-GCM key generation and decryption
- ``onboard`` QR-code scan-to-configure flow
"""
# -- Adapter (original qqbot.py) ------------------------------------------
from .adapter import ( # noqa: F401
QQAdapter,
QQCloseError,
check_qq_requirements,
_coerce_list,
_ssrf_redirect_guard,
)
# -- Onboard (QR-code scan-to-configure) -----------------------------------
from .onboard import ( # noqa: F401
BindStatus,
create_bind_task,
poll_bind_result,
build_connect_url,
)
from .crypto import decrypt_secret, generate_bind_key # noqa: F401
# -- Utils -----------------------------------------------------------------
from .utils import build_user_agent, get_api_headers, coerce_list # noqa: F401
__all__ = [
# adapter
"QQAdapter",
"QQCloseError",
"check_qq_requirements",
"_coerce_list",
"_ssrf_redirect_guard",
# onboard
"BindStatus",
"create_bind_task",
"poll_bind_result",
"build_connect_url",
# crypto
"decrypt_secret",
"generate_bind_key",
# utils
"build_user_agent",
"get_api_headers",
"coerce_list",
]
-74
View File
@@ -1,74 +0,0 @@
"""QQBot package-level constants shared across adapter, onboard, and other modules."""
from __future__ import annotations
import os
# ---------------------------------------------------------------------------
# QQBot adapter version — bump on functional changes to the adapter package.
# ---------------------------------------------------------------------------
QQBOT_VERSION = "1.1.0"
# ---------------------------------------------------------------------------
# API endpoints
# ---------------------------------------------------------------------------
# The portal domain is configurable via QQ_API_HOST for corporate proxies
# or test environments. Default: q.qq.com (production).
PORTAL_HOST = os.getenv("QQ_PORTAL_HOST", "q.qq.com")
API_BASE = "https://api.sgroup.qq.com"
TOKEN_URL = "https://bots.qq.com/app/getAppAccessToken"
GATEWAY_URL_PATH = "/gateway"
# QR-code onboard endpoints (on the portal host)
ONBOARD_CREATE_PATH = "/lite/create_bind_task"
ONBOARD_POLL_PATH = "/lite/poll_bind_result"
QR_URL_TEMPLATE = (
"https://q.qq.com/qqbot/openclaw/connect.html"
"?task_id={task_id}&_wv=2&source=hermes"
)
# ---------------------------------------------------------------------------
# Timeouts & retry
# ---------------------------------------------------------------------------
DEFAULT_API_TIMEOUT = 30.0
FILE_UPLOAD_TIMEOUT = 120.0
CONNECT_TIMEOUT_SECONDS = 20.0
RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
MAX_RECONNECT_ATTEMPTS = 100
RATE_LIMIT_DELAY = 60 # seconds
QUICK_DISCONNECT_THRESHOLD = 5.0 # seconds
MAX_QUICK_DISCONNECT_COUNT = 3
ONBOARD_POLL_INTERVAL = 2.0 # seconds between poll_bind_result calls
ONBOARD_API_TIMEOUT = 10.0
# ---------------------------------------------------------------------------
# Message limits
# ---------------------------------------------------------------------------
MAX_MESSAGE_LENGTH = 4000
DEDUP_WINDOW_SECONDS = 300
DEDUP_MAX_SIZE = 1000
# ---------------------------------------------------------------------------
# QQ Bot message types
# ---------------------------------------------------------------------------
MSG_TYPE_TEXT = 0
MSG_TYPE_MARKDOWN = 2
MSG_TYPE_MEDIA = 7
MSG_TYPE_INPUT_NOTIFY = 6
# ---------------------------------------------------------------------------
# QQ Bot file media types
# ---------------------------------------------------------------------------
MEDIA_TYPE_IMAGE = 1
MEDIA_TYPE_VIDEO = 2
MEDIA_TYPE_VOICE = 3
MEDIA_TYPE_FILE = 4
-45
View File
@@ -1,45 +0,0 @@
"""AES-256-GCM utilities for QQBot scan-to-configure credential decryption."""
from __future__ import annotations
import base64
import os
def generate_bind_key() -> str:
"""Generate a 256-bit random AES key and return it as base64.
The key is passed to ``create_bind_task`` so the server can encrypt
the bot's *client_secret* before returning it. Only this CLI holds
the key, ensuring the secret never travels in plaintext.
"""
return base64.b64encode(os.urandom(32)).decode()
def decrypt_secret(encrypted_base64: str, key_base64: str) -> str:
"""Decrypt a base64-encoded AES-256-GCM ciphertext.
Ciphertext layout (after base64-decoding)::
IV (12 bytes) ciphertext (N bytes) AuthTag (16 bytes)
Args:
encrypted_base64: The ``bot_encrypt_secret`` value from
``poll_bind_result``.
key_base64: The base64 AES key generated by
:func:`generate_bind_key`.
Returns:
The decrypted *client_secret* as a UTF-8 string.
"""
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
key = base64.b64decode(key_base64)
raw = base64.b64decode(encrypted_base64)
iv = raw[:12]
ciphertext_with_tag = raw[12:] # AESGCM expects ciphertext + tag concatenated
aesgcm = AESGCM(key)
plaintext = aesgcm.decrypt(iv, ciphertext_with_tag, None)
return plaintext.decode("utf-8")
-124
View File
@@ -1,124 +0,0 @@
"""
QQBot scan-to-configure (QR code onboard) module.
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
generate a QR-code URL and poll for scan completion. On success the caller
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
scanner's *user_openid* — enough to fully configure the QQBot gateway.
Reference: https://bot.q.qq.com/wiki/develop/api-v2/
"""
from __future__ import annotations
import logging
from enum import IntEnum
from typing import Tuple
from urllib.parse import quote
from .constants import (
ONBOARD_API_TIMEOUT,
ONBOARD_CREATE_PATH,
ONBOARD_POLL_PATH,
PORTAL_HOST,
QR_URL_TEMPLATE,
)
from .crypto import generate_bind_key
from .utils import get_api_headers
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Bind status
# ---------------------------------------------------------------------------
class BindStatus(IntEnum):
"""Status codes returned by ``poll_bind_result``."""
NONE = 0
PENDING = 1
COMPLETED = 2
EXPIRED = 3
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def create_bind_task(
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[str, str]:
"""Create a bind task and return *(task_id, aes_key_base64)*.
The AES key is generated locally and sent to the server so it can
encrypt the bot credentials before returning them.
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
import httpx
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
key = generate_bind_key()
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"key": key}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
if data.get("retcode") != 0:
raise RuntimeError(data.get("msg", "create_bind_task failed"))
task_id = data.get("data", {}).get("task_id")
if not task_id:
raise RuntimeError("create_bind_task: missing task_id in response")
logger.debug("create_bind_task ok: task_id=%s", task_id)
return task_id, key
async def poll_bind_result(
task_id: str,
timeout: float = ONBOARD_API_TIMEOUT,
) -> Tuple[BindStatus, str, str, str]:
"""Poll the bind result for *task_id*.
Returns:
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
* ``bot_encrypt_secret`` is AES-256-GCM encrypted decrypt it with
:func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
key from :func:`create_bind_task`.
* ``user_openid`` is the OpenID of the person who scanned the code
(available when ``status == COMPLETED``).
Raises:
RuntimeError: If the API returns a non-zero ``retcode``.
"""
import httpx
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
resp.raise_for_status()
data = resp.json()
if data.get("retcode") != 0:
raise RuntimeError(data.get("msg", "poll_bind_result failed"))
d = data.get("data", {})
return (
BindStatus(d.get("status", 0)),
str(d.get("bot_appid", "")),
d.get("bot_encrypt_secret", ""),
d.get("user_openid", ""),
)
def build_connect_url(task_id: str) -> str:
"""Build the QR-code target URL for a given *task_id*."""
return QR_URL_TEMPLATE.format(task_id=quote(task_id))
-71
View File
@@ -1,71 +0,0 @@
"""QQBot shared utilities — User-Agent, HTTP helpers, config coercion."""
from __future__ import annotations
import platform
import sys
from typing import Any, Dict, List
from .constants import QQBOT_VERSION
# ---------------------------------------------------------------------------
# User-Agent
# ---------------------------------------------------------------------------
def _get_hermes_version() -> str:
"""Return the hermes-agent package version, or 'dev' if unavailable."""
try:
from importlib.metadata import version
return version("hermes-agent")
except Exception:
return "dev"
def build_user_agent() -> str:
"""Build a descriptive User-Agent string.
Format::
QQBotAdapter/<qqbot_version> (Python/<py_version>; <os>; Hermes/<hermes_version>)
Example::
QQBotAdapter/1.0.0 (Python/3.11.15; darwin; Hermes/0.9.0)
"""
py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
os_name = platform.system().lower()
hermes_version = _get_hermes_version()
return f"QQBotAdapter/{QQBOT_VERSION} (Python/{py_version}; {os_name}; Hermes/{hermes_version})"
def get_api_headers() -> Dict[str, str]:
"""Return standard HTTP headers for QQBot API requests.
Includes ``Content-Type``, ``Accept``, and a dynamic ``User-Agent``.
``q.qq.com`` requires ``Accept: application/json`` without it,
the server returns a JavaScript anti-bot challenge page.
"""
return {
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": build_user_agent(),
}
# ---------------------------------------------------------------------------
# Config helpers
# ---------------------------------------------------------------------------
def coerce_list(value: Any) -> List[str]:
"""Coerce config values into a trimmed string list.
Accepts comma-separated strings, lists, tuples, sets, or single values.
"""
if value is None:
return []
if isinstance(value, str):
return [item.strip() for item in value.split(",") if item.strip()]
if isinstance(value, (list, tuple, set)):
return [str(item).strip() for item in value if str(item).strip()]
return [str(value).strip()] if str(value).strip() else []
+24 -192
View File
@@ -18,7 +18,6 @@ import logging
import os
import random
import time
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional, Any
@@ -128,27 +127,6 @@ def _render_mentions(text: str, mentions: list) -> str:
return text
def _is_signal_service_id(value: str) -> bool:
"""Return True if *value* already looks like a Signal service identifier."""
if not value:
return False
if value.startswith("PNI:") or value.startswith("u:"):
return True
try:
uuid.UUID(value)
return True
except (ValueError, AttributeError, TypeError):
return False
def _looks_like_e164_number(value: str) -> bool:
"""Return True for a plausible E.164 phone number."""
if not value or not value.startswith("+"):
return False
digits = value[1:]
return digits.isdigit() and 7 <= len(digits) <= 15
def check_signal_requirements() -> bool:
"""Check if Signal is configured (has URL and account)."""
return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT"))
@@ -182,14 +160,6 @@ class SignalAdapter(BasePlatformAdapter):
self._sse_task: Optional[asyncio.Task] = None
self._health_monitor_task: Optional[asyncio.Task] = None
self._typing_tasks: Dict[str, asyncio.Task] = {}
# Per-chat typing-indicator backoff. When signal-cli reports
# NETWORK_FAILURE (recipient offline / unroutable), base.py's
# _keep_typing refresh loop would otherwise hammer sendTyping every
# ~2s indefinitely, producing WARNING-level log spam and pointless
# RPC traffic. We track consecutive failures per chat and skip the
# RPC during a cooldown window instead.
self._typing_failures: Dict[str, int] = {}
self._typing_skip_until: Dict[str, float] = {}
self._running = False
self._last_sse_activity = 0.0
self._sse_response: Optional[httpx.Response] = None
@@ -201,12 +171,6 @@ class SignalAdapter(BasePlatformAdapter):
# in Note to Self / self-chat mode (mirrors WhatsApp recentlySentIds)
self._recent_sent_timestamps: set = set()
self._max_recent_timestamps = 50
# Signal increasingly exposes ACI/PNI UUIDs as stable recipient IDs.
# Keep a best-effort mapping so outbound sends can upgrade from a
# phone number to the corresponding UUID when signal-cli prefers it.
self._recipient_uuid_by_number: Dict[str, str] = {}
self._recipient_number_by_uuid: Dict[str, str] = {}
self._recipient_cache_lock = asyncio.Lock()
logger.info("Signal adapter initialized: url=%s account=%s groups=%s",
self.http_url, redact_phone(self.account),
@@ -223,40 +187,31 @@ class SignalAdapter(BasePlatformAdapter):
return False
# Acquire scoped lock to prevent duplicate Signal listeners for the same phone
lock_acquired = False
try:
if not self._acquire_platform_lock('signal-phone', self.account, 'Signal account'):
return False
lock_acquired = True
except Exception as e:
logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)
self.client = httpx.AsyncClient(timeout=30.0)
# Health check — verify signal-cli daemon is reachable
try:
# Health check — verify signal-cli daemon is reachable
try:
resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
if resp.status_code != 200:
logger.error("Signal: health check failed (status %d)", resp.status_code)
return False
except Exception as e:
logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
if resp.status_code != 200:
logger.error("Signal: health check failed (status %d)", resp.status_code)
return False
except Exception as e:
logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
return False
self._running = True
self._last_sse_activity = time.time()
self._sse_task = asyncio.create_task(self._sse_listener())
self._health_monitor_task = asyncio.create_task(self._health_monitor())
self._running = True
self._last_sse_activity = time.time()
self._sse_task = asyncio.create_task(self._sse_listener())
self._health_monitor_task = asyncio.create_task(self._health_monitor())
logger.info("Signal: connected to %s", self.http_url)
return True
finally:
if not self._running:
if self.client:
await self.client.aclose()
self.client = None
if lock_acquired:
self._release_platform_lock()
logger.info("Signal: connected to %s", self.http_url)
return True
async def disconnect(self) -> None:
"""Stop SSE listener and clean up."""
@@ -437,7 +392,6 @@ class SignalAdapter(BasePlatformAdapter):
)
sender_name = envelope_data.get("sourceName", "")
sender_uuid = envelope_data.get("sourceUuid", "")
self._remember_recipient_identifiers(sender, sender_uuid)
if not sender:
logger.debug("Signal: ignoring envelope with no sender")
@@ -556,64 +510,6 @@ class SignalAdapter(BasePlatformAdapter):
await self.handle_message(event)
def _remember_recipient_identifiers(self, number: Optional[str], service_id: Optional[str]) -> None:
"""Cache any number↔UUID mapping observed from Signal envelopes."""
if not number or not service_id or not _is_signal_service_id(service_id):
return
self._recipient_uuid_by_number[number] = service_id
self._recipient_number_by_uuid[service_id] = number
def _extract_contact_uuid(self, contact: Any, phone_number: str) -> Optional[str]:
"""Best-effort extraction of a Signal service ID from listContacts output."""
if not isinstance(contact, dict):
return None
number = contact.get("number")
recipient = contact.get("recipient")
service_id = contact.get("uuid") or contact.get("serviceId")
if not service_id:
profile = contact.get("profile")
if isinstance(profile, dict):
service_id = profile.get("serviceId") or profile.get("uuid")
if service_id and _is_signal_service_id(service_id):
matches_number = number == phone_number or recipient == phone_number
if matches_number:
return service_id
return None
async def _resolve_recipient(self, chat_id: str) -> str:
"""Return the preferred Signal recipient identifier for a direct chat."""
if (
not chat_id
or chat_id.startswith("group:")
or _is_signal_service_id(chat_id)
or not _looks_like_e164_number(chat_id)
):
return chat_id
cached = self._recipient_uuid_by_number.get(chat_id)
if cached:
return cached
async with self._recipient_cache_lock:
cached = self._recipient_uuid_by_number.get(chat_id)
if cached:
return cached
contacts = await self._rpc("listContacts", {
"account": self.account,
"allRecipients": True,
})
if isinstance(contacts, list):
for contact in contacts:
number = contact.get("number") if isinstance(contact, dict) else None
service_id = self._extract_contact_uuid(contact, chat_id)
if number and service_id:
self._remember_recipient_identifiers(number, service_id)
return self._recipient_uuid_by_number.get(chat_id, chat_id)
# ------------------------------------------------------------------
# Attachment Handling
# ------------------------------------------------------------------
@@ -652,22 +548,8 @@ class SignalAdapter(BasePlatformAdapter):
# JSON-RPC Communication
# ------------------------------------------------------------------
async def _rpc(
self,
method: str,
params: dict,
rpc_id: str = None,
*,
log_failures: bool = True,
) -> Any:
"""Send a JSON-RPC 2.0 request to signal-cli daemon.
When ``log_failures=False``, error and exception paths log at DEBUG
instead of WARNING used by the typing-indicator path to silence
repeated NETWORK_FAILURE spam for unreachable recipients while
still preserving visibility for the first occurrence and for
unrelated RPCs.
"""
async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any:
"""Send a JSON-RPC 2.0 request to signal-cli daemon."""
if not self.client:
logger.warning("Signal: RPC called but client not connected")
return None
@@ -692,19 +574,13 @@ class SignalAdapter(BasePlatformAdapter):
data = resp.json()
if "error" in data:
if log_failures:
logger.warning("Signal RPC error (%s): %s", method, data["error"])
else:
logger.debug("Signal RPC error (%s): %s", method, data["error"])
logger.warning("Signal RPC error (%s): %s", method, data["error"])
return None
return data.get("result")
except Exception as e:
if log_failures:
logger.warning("Signal RPC %s failed: %s", method, e)
else:
logger.debug("Signal RPC %s failed: %s", method, e)
logger.warning("Signal RPC %s failed: %s", method, e)
return None
# ------------------------------------------------------------------
@@ -729,7 +605,7 @@ class SignalAdapter(BasePlatformAdapter):
if chat_id.startswith("group:"):
params["groupId"] = chat_id[6:]
else:
params["recipient"] = [await self._resolve_recipient(chat_id)]
params["recipient"] = [chat_id]
result = await self._rpc("send", params)
@@ -751,28 +627,7 @@ class SignalAdapter(BasePlatformAdapter):
self._recent_sent_timestamps.pop()
async def send_typing(self, chat_id: str, metadata=None) -> None:
"""Send a typing indicator.
base.py's ``_keep_typing`` refresh loop calls this every ~2s while
the agent is processing. If signal-cli returns NETWORK_FAILURE for
this recipient (offline, unroutable, group membership lost, etc.)
the unmitigated behaviour is: a WARNING log every 2 seconds for as
long as the agent keeps running. Instead we:
- silence the WARNING after the first consecutive failure (subsequent
attempts log at DEBUG) so transport issues are still visible once
but don't flood the log,
- skip the RPC entirely during an exponential cooldown window once
three consecutive failures have happened, so we stop hammering
signal-cli with requests it can't deliver.
A successful sendTyping clears the counters.
"""
now = time.monotonic()
skip_until = self._typing_skip_until.get(chat_id, 0.0)
if now < skip_until:
return
"""Send a typing indicator."""
params: Dict[str, Any] = {
"account": self.account,
}
@@ -780,28 +635,9 @@ class SignalAdapter(BasePlatformAdapter):
if chat_id.startswith("group:"):
params["groupId"] = chat_id[6:]
else:
params["recipient"] = [await self._resolve_recipient(chat_id)]
params["recipient"] = [chat_id]
fails = self._typing_failures.get(chat_id, 0)
result = await self._rpc(
"sendTyping",
params,
rpc_id="typing",
log_failures=(fails == 0),
)
if result is None:
fails += 1
self._typing_failures[chat_id] = fails
# After 3 consecutive failures, back off exponentially (16s,
# 32s, 60s cap) to stop spamming signal-cli for a recipient
# that clearly isn't reachable right now.
if fails >= 3:
backoff = min(60.0, 16.0 * (2 ** (fails - 3)))
self._typing_skip_until[chat_id] = now + backoff
else:
self._typing_failures.pop(chat_id, None)
self._typing_skip_until.pop(chat_id, None)
await self._rpc("sendTyping", params, rpc_id="typing")
async def send_image(
self,
@@ -841,7 +677,7 @@ class SignalAdapter(BasePlatformAdapter):
if chat_id.startswith("group:"):
params["groupId"] = chat_id[6:]
else:
params["recipient"] = [await self._resolve_recipient(chat_id)]
params["recipient"] = [chat_id]
result = await self._rpc("send", params)
if result is not None:
@@ -880,7 +716,7 @@ class SignalAdapter(BasePlatformAdapter):
if chat_id.startswith("group:"):
params["groupId"] = chat_id[6:]
else:
params["recipient"] = [await self._resolve_recipient(chat_id)]
params["recipient"] = [chat_id]
result = await self._rpc("send", params)
if result is not None:
@@ -953,10 +789,6 @@ class SignalAdapter(BasePlatformAdapter):
await task
except asyncio.CancelledError:
pass
# Reset per-chat typing backoff state so the next agent turn starts
# fresh rather than inheriting a cooldown from a prior conversation.
self._typing_failures.pop(chat_id, None)
self._typing_skip_until.pop(chat_id, None)
async def stop_typing(self, chat_id: str) -> None:
"""Public interface for stopping typing — called by base adapter's
+11 -35
View File
@@ -150,11 +150,9 @@ class SlackAdapter(BasePlatformAdapter):
except Exception as e:
logger.warning("[Slack] Failed to read %s: %s", tokens_file, e)
lock_acquired = False
try:
if not self._acquire_platform_lock('slack-app-token', app_token, 'Slack app token'):
return False
lock_acquired = True
# First token is the primary — used for AsyncApp / Socket Mode
primary_token = bot_tokens[0]
@@ -230,9 +228,6 @@ class SlackAdapter(BasePlatformAdapter):
except Exception as e: # pragma: no cover - defensive logging
logger.error("[Slack] Connection failed: %s", e, exc_info=True)
return False
finally:
if lock_acquired and not self._running:
self._release_platform_lock()
async def disconnect(self) -> None:
"""Disconnect from Slack."""
@@ -321,8 +316,6 @@ class SlackAdapter(BasePlatformAdapter):
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""Edit a previously sent Slack message."""
if not self._app:
@@ -373,20 +366,6 @@ class SlackAdapter(BasePlatformAdapter):
# in an assistant-enabled context. Falls back to reactions.
logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
def _dm_top_level_threads_as_sessions(self) -> bool:
"""Whether top-level Slack DMs get per-message session threads.
Defaults to ``True`` so each visible DM reply thread is isolated as its
own Hermes session matching the per-thread behavior channels already
have. Set ``platforms.slack.extra.dm_top_level_threads_as_sessions``
to ``false`` in config.yaml to revert to the legacy behavior where all
top-level DMs share one continuous session.
"""
raw = self.config.extra.get("dm_top_level_threads_as_sessions")
if raw is None:
return True # default: each DM thread is its own session
return str(raw).strip().lower() in ("1", "true", "yes", "on")
def _resolve_thread_ts(
self,
reply_to: Optional[str] = None,
@@ -1017,14 +996,10 @@ class SlackAdapter(BasePlatformAdapter):
# Build thread_ts for session keying.
# In channels: fall back to ts so each top-level @mention starts a
# new thread/session (the bot always replies in a thread).
# In DMs: fall back to ts so each top-level DM reply thread gets
# its own session key (matching channel behavior). Set
# dm_top_level_threads_as_sessions: false in config to revert to
# legacy single-session-per-DM-channel behavior.
# In DMs: only use the real thread_ts — top-level DMs should share
# one continuous session, threaded DMs get their own session.
if is_dm:
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts")
if not thread_ts and self._dm_top_level_threads_as_sessions():
thread_ts = ts
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts") # None for top-level DMs
else:
thread_ts = event.get("thread_ts") or ts # ts fallback for channels
@@ -1192,12 +1167,6 @@ class SlackAdapter(BasePlatformAdapter):
thread_id=thread_ts,
)
# Per-channel ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_channel_prompt = resolve_channel_prompt(
self.config.extra, channel_id, None,
)
msg_event = MessageEvent(
text=text,
message_type=msg_type,
@@ -1207,7 +1176,6 @@ class SlackAdapter(BasePlatformAdapter):
media_urls=media_urls,
media_types=media_types,
reply_to_message_id=thread_ts if thread_ts != ts else None,
channel_prompt=_channel_prompt,
)
# Only react when bot is directly addressed (DM or @mention).
@@ -1600,9 +1568,11 @@ class SlackAdapter(BasePlatformAdapter):
async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
"""Download a Slack file using the bot token for auth, with retry."""
import asyncio
import httpx
bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
last_exc = None
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
for attempt in range(3):
@@ -1632,6 +1602,7 @@ class SlackAdapter(BasePlatformAdapter):
from gateway.platforms.base import cache_image_from_bytes
return cache_image_from_bytes(response.content, ext)
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < 2:
@@ -1640,12 +1611,15 @@ class SlackAdapter(BasePlatformAdapter):
await asyncio.sleep(1.5 * (attempt + 1))
continue
raise
raise last_exc
async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
"""Download a Slack file and return raw bytes, with retry."""
import asyncio
import httpx
bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
last_exc = None
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
for attempt in range(3):
@@ -1657,6 +1631,7 @@ class SlackAdapter(BasePlatformAdapter):
response.raise_for_status()
return response.content
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
last_exc = exc
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
raise
if attempt < 2:
@@ -1665,6 +1640,7 @@ class SlackAdapter(BasePlatformAdapter):
await asyncio.sleep(1.5 * (attempt + 1))
continue
raise
raise last_exc
# ── Channel mention gating ─────────────────────────────────────────────
+71 -355
View File
@@ -11,8 +11,6 @@ import asyncio
import json
import logging
import os
import tempfile
import html as _html
import re
from typing import Dict, List, Optional, Any
@@ -20,10 +18,6 @@ logger = logging.getLogger(__name__)
try:
from telegram import Update, Bot, Message, InlineKeyboardButton, InlineKeyboardMarkup
try:
from telegram import LinkPreviewOptions
except ImportError:
LinkPreviewOptions = None
from telegram.ext import (
Application,
CommandHandler,
@@ -42,7 +36,6 @@ except ImportError:
Message = Any
InlineKeyboardButton = Any
InlineKeyboardMarkup = Any
LinkPreviewOptions = None
Application = Any
CommandHandler = Any
CallbackQueryHandler = Any
@@ -71,10 +64,8 @@ from gateway.platforms.base import (
SendResult,
cache_image_from_bytes,
cache_audio_from_bytes,
cache_video_from_bytes,
cache_document_from_bytes,
resolve_proxy_url,
SUPPORTED_VIDEO_TYPES,
SUPPORTED_DOCUMENT_TYPES,
utf16_len,
_prefix_within_utf16_limit,
@@ -121,84 +112,6 @@ def _strip_mdv2(text: str) -> str:
return cleaned
# ---------------------------------------------------------------------------
# Markdown table → code block conversion
# ---------------------------------------------------------------------------
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
# so pipe tables render as noisy backslash-pipe text with no alignment.
# Wrapping the table in a fenced code block makes Telegram render it as
# monospace preformatted text with columns intact.
# Matches a GFM table delimiter row: optional outer pipes, cells containing
# only dashes (with optional leading/trailing colons for alignment) separated
# by '|'. Requires at least one internal '|' so lone '---' horizontal rules
# are NOT matched.
_TABLE_SEPARATOR_RE = re.compile(
r'^\s*\|?\s*:?-+:?\s*(?:\|\s*:?-+:?\s*){1,}\|?\s*$'
)
def _is_table_row(line: str) -> bool:
"""Return True if *line* could plausibly be a table data row."""
stripped = line.strip()
return bool(stripped) and '|' in stripped
def _wrap_markdown_tables(text: str) -> str:
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
Detected by a row containing '|' immediately followed by a delimiter
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
non-blank lines are consumed as the table body and included in the
wrapped block. Tables inside existing fenced code blocks are left
alone.
"""
if '|' not in text or '-' not in text:
return text
lines = text.split('\n')
out: list[str] = []
in_fence = False
i = 0
while i < len(lines):
line = lines[i]
stripped = line.lstrip()
# Track existing fenced code blocks — never touch content inside.
if stripped.startswith('```'):
in_fence = not in_fence
out.append(line)
i += 1
continue
if in_fence:
out.append(line)
i += 1
continue
# Look for a header row (contains '|') immediately followed by a
# delimiter row.
if (
'|' in line
and i + 1 < len(lines)
and _TABLE_SEPARATOR_RE.match(lines[i + 1])
):
table_block = [line, lines[i + 1]]
j = i + 2
while j < len(lines) and _is_table_row(lines[j]):
table_block.append(lines[j])
j += 1
out.append('```')
out.extend(table_block)
out.append('```')
i = j
continue
out.append(line)
i += 1
return '\n'.join(out)
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
@@ -216,7 +129,6 @@ class TelegramAdapter(BasePlatformAdapter):
# When a chunk is near this limit, a continuation is almost certain.
_SPLIT_THRESHOLD = 4000
MEDIA_GROUP_WAIT_SECONDS = 0.8
_GENERAL_TOPIC_THREAD_ID = "1"
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.TELEGRAM)
@@ -225,7 +137,6 @@ class TelegramAdapter(BasePlatformAdapter):
self._webhook_mode: bool = False
self._mention_patterns = self._compile_mention_patterns()
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
self._disable_link_previews: bool = self._coerce_bool_extra("disable_link_previews", False)
# Buffer rapid/album photo updates so Telegram image bursts are handled
# as a single MessageEvent instead of self-interrupting multiple turns.
self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
@@ -252,38 +163,6 @@ class TelegramAdapter(BasePlatformAdapter):
# Approval button state: message_id → session_key
self._approval_state: Dict[int, str] = {}
@staticmethod
def _is_callback_user_authorized(user_id: str) -> bool:
"""Return whether a Telegram inline-button caller may perform gated actions."""
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
if not allowed_csv:
return True
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
return "*" in allowed_ids or user_id in allowed_ids
@classmethod
def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
if not metadata:
return None
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
return str(thread_id) if thread_id is not None else None
@classmethod
def _message_thread_id_for_send(cls, thread_id: Optional[str]) -> Optional[int]:
if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID:
return None
return int(thread_id)
@classmethod
def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]:
if not thread_id:
return None
return int(thread_id)
@staticmethod
def _is_thread_not_found_error(error: Exception) -> bool:
return "thread not found" in str(error).lower()
def _fallback_ips(self) -> list[str]:
"""Return validated fallback IPs from config (populated by _apply_env_overrides)."""
configured = self.config.extra.get("fallback_ips", []) if getattr(self.config, "extra", None) else []
@@ -314,26 +193,6 @@ class TelegramAdapter(BasePlatformAdapter):
pass
return isinstance(error, OSError)
def _coerce_bool_extra(self, key: str, default: bool = False) -> bool:
value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
if value is None:
return default
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in ("true", "1", "yes", "on"):
return True
if lowered in ("false", "0", "no", "off"):
return False
return default
return bool(value)
def _link_preview_kwargs(self) -> Dict[str, Any]:
if not getattr(self, "_disable_link_previews", False):
return {}
if LinkPreviewOptions is not None:
return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
return {"disable_web_page_preview": True}
async def _handle_polling_network_error(self, error: Exception) -> None:
"""Reconnect polling after a transient network interruption.
@@ -496,13 +355,6 @@ class TelegramAdapter(BasePlatformAdapter):
"[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
self.name, name, chat_id,
)
elif "not a forum" in error_text or "forums_disabled" in error_text:
logger.warning(
"[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
"The user must open the DM with this bot in Telegram, tap the bot name "
"at the top, and enable 'Topics' in chat settings before topics can be created.",
self.name, name, chat_id,
)
else:
logger.warning(
"[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -544,23 +396,8 @@ class TelegramAdapter(BasePlatformAdapter):
break
if changed:
fd, tmp_path = tempfile.mkstemp(
dir=str(config_path.parent),
suffix=".tmp",
prefix=".config_",
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, config_path)
except BaseException:
try:
os.unlink(tmp_path)
except OSError:
pass
raise
with open(config_path, "w") as f:
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
logger.info(
"[%s] Persisted thread_id=%s for topic '%s' in config.yaml",
self.name, thread_id, topic_name,
@@ -703,7 +540,7 @@ class TelegramAdapter(BasePlatformAdapter):
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
}
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
proxy_url = resolve_proxy_url()
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
fallback_ips = self._fallback_ips()
if not fallback_ips:
@@ -769,14 +606,14 @@ class TelegramAdapter(BasePlatformAdapter):
from telegram.error import NetworkError, TimedOut
except ImportError:
NetworkError = TimedOut = OSError # type: ignore[misc,assignment]
_max_connect = 8
_max_connect = 3
for _attempt in range(_max_connect):
try:
await self._app.initialize()
break
except (NetworkError, TimedOut, OSError) as init_err:
if _attempt < _max_connect - 1:
wait = min(2 ** _attempt, 15)
wait = 2 ** _attempt
logger.warning(
"[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
self.name, _attempt + 1, _max_connect, init_err, wait,
@@ -794,28 +631,8 @@ class TelegramAdapter(BasePlatformAdapter):
# Telegram pushes updates to our HTTP endpoint. This
# enables cloud platforms (Fly.io, Railway) to auto-wake
# suspended machines on inbound HTTP traffic.
#
# SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
# python-telegram-bot passes secret_token=None and the
# webhook endpoint accepts any HTTP POST — attackers can
# inject forged updates as if from Telegram. Refuse to
# start rather than silently run in fail-open mode.
# See GHSA-3vpc-7q5r-276h.
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
if not webhook_secret:
raise RuntimeError(
"TELEGRAM_WEBHOOK_SECRET is required when "
"TELEGRAM_WEBHOOK_URL is set. Without it, the "
"webhook endpoint accepts forged updates from "
"anyone who can reach it — see "
"https://github.com/NousResearch/hermes-agent/"
"security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
"Generate a secret and set it in your .env:\n"
" export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
"Then register it with Telegram when setting the "
"webhook via setWebhook's secret_token parameter."
)
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
from urllib.parse import urlparse
webhook_path = urlparse(webhook_url).path or "/telegram"
@@ -997,7 +814,7 @@ class TelegramAdapter(BasePlatformAdapter):
]
message_ids = []
thread_id = self._metadata_thread_id(metadata)
thread_id = metadata.get("thread_id") if metadata else None
try:
from telegram.error import NetworkError as _NetErr
@@ -1017,7 +834,7 @@ class TelegramAdapter(BasePlatformAdapter):
for i, chunk in enumerate(chunks):
should_thread = self._should_thread_reply(reply_to, i)
reply_to_id = int(reply_to) if should_thread else None
effective_thread_id = self._message_thread_id_for_send(thread_id)
effective_thread_id = int(thread_id) if thread_id else None
msg = None
for _send_attempt in range(3):
@@ -1030,7 +847,6 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=ParseMode.MARKDOWN_V2,
reply_to_message_id=reply_to_id,
message_thread_id=effective_thread_id,
**self._link_preview_kwargs(),
)
except Exception as md_error:
# Markdown parsing failed, try plain text
@@ -1043,7 +859,6 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=None,
reply_to_message_id=reply_to_id,
message_thread_id=effective_thread_id,
**self._link_preview_kwargs(),
)
else:
raise
@@ -1054,7 +869,8 @@ class TelegramAdapter(BasePlatformAdapter):
# (not transient network issues). Detect and handle
# specific cases instead of blindly retrying.
if _BadReq and isinstance(send_err, _BadReq):
if self._is_thread_not_found_error(send_err) and effective_thread_id is not None:
err_lower = str(send_err).lower()
if "thread not found" in err_lower and effective_thread_id is not None:
# Thread doesn't exist — retry without
# message_thread_id so the message still
# reaches the chat.
@@ -1064,7 +880,6 @@ class TelegramAdapter(BasePlatformAdapter):
)
effective_thread_id = None
continue
err_lower = str(send_err).lower()
if "message to be replied not found" in err_lower and reply_to_id is not None:
# Original message was deleted before we
# could reply — clear reply target and retry
@@ -1126,8 +941,6 @@ class TelegramAdapter(BasePlatformAdapter):
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
) -> SendResult:
"""Edit a previously sent Telegram message."""
if not self._bot:
@@ -1233,7 +1046,6 @@ class TelegramAdapter(BasePlatformAdapter):
text=text,
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
**self._link_preview_kwargs(),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1256,13 +1068,15 @@ class TelegramAdapter(BasePlatformAdapter):
try:
cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
text = (
f"⚠️ <b>Command Approval Required</b>\n\n"
f"<pre>{_html.escape(cmd_preview)}</pre>\n\n"
f"Reason: {_html.escape(description)}"
f"⚠️ *Command Approval Required*\n\n"
f"`{cmd_preview}`\n\n"
f"Reason: {description}"
)
# Resolve thread context for thread replies
thread_id = self._metadata_thread_id(metadata)
thread_id = None
if metadata:
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
# We'll use the message_id as part of callback_data to look up session_key
# Send a placeholder first, then update — or use a counter.
@@ -1286,13 +1100,11 @@ class TelegramAdapter(BasePlatformAdapter):
kwargs: Dict[str, Any] = {
"chat_id": int(chat_id),
"text": text,
"parse_mode": ParseMode.HTML,
"parse_mode": ParseMode.MARKDOWN,
"reply_markup": keyboard,
**self._link_preview_kwargs(),
}
message_thread_id = self._message_thread_id_for_send(thread_id)
if message_thread_id is not None:
kwargs["message_thread_id"] = message_thread_id
if thread_id:
kwargs["message_thread_id"] = int(thread_id)
msg = await self._bot.send_message(**kwargs)
@@ -1360,7 +1172,6 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
message_thread_id=int(thread_id) if thread_id else None,
**self._link_preview_kwargs(),
)
# Store picker state keyed by chat_id
@@ -1629,9 +1440,12 @@ class TelegramAdapter(BasePlatformAdapter):
# Only authorized users may click approval buttons.
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to approve commands.")
return
allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
if allowed_csv:
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
if "*" not in allowed_ids and caller_id not in allowed_ids:
await query.answer(text="⛔ You are not authorized to approve commands.")
return
session_key = self._approval_state.pop(approval_id, None)
if not session_key:
@@ -1676,10 +1490,6 @@ class TelegramAdapter(BasePlatformAdapter):
if not data.startswith("update_prompt:"):
return
answer = data.split(":", 1)[1] # "y" or "n"
caller_id = str(getattr(query.from_user, "id", ""))
if not self._is_callback_user_authorized(caller_id):
await query.answer(text="⛔ You are not authorized to answer update prompts.")
return
await query.answer(text=f"Sent '{answer}' to the update process.")
# Edit the message to show the choice and remove buttons
label = "Yes" if answer == "y" else "No"
@@ -1704,21 +1514,6 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as exc:
logger.error("Failed to write update response from callback: %s", exc)
def _missing_media_path_error(self, label: str, path: str) -> str:
"""Build an actionable file-not-found error for gateway MEDIA delivery.
Paths like /workspace/... or /output/... often only exist inside the
Docker sandbox, while the gateway process runs on the host.
"""
error = f"{label} file not found: {path}"
if path.startswith(("/workspace/", "/output/", "/outputs/")):
error += (
" (path may only exist inside the Docker sandbox. "
"Bind-mount a host directory and emit the host-visible "
"path in MEDIA: for gateway file delivery.)"
)
return error
async def send_voice(
self,
chat_id: str,
@@ -1733,29 +1528,30 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(audio_path):
return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
return SendResult(success=False, error=f"Audio file not found: {audio_path}")
with open(audio_path, "rb") as audio_file:
# .ogg files -> send as voice (round playable bubble)
if audio_path.endswith((".ogg", ".opus")):
_voice_thread = self._metadata_thread_id(metadata)
_voice_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_voice(
chat_id=int(chat_id),
voice=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_voice_thread),
message_thread_id=int(_voice_thread) if _voice_thread else None,
)
else:
# .mp3 and others -> send as audio file
_audio_thread = self._metadata_thread_id(metadata)
_audio_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_audio(
chat_id=int(chat_id),
audio=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_audio_thread),
message_thread_id=int(_audio_thread) if _audio_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1781,17 +1577,18 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(image_path):
return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
return SendResult(success=False, error=f"Image file not found: {image_path}")
_thread = self._metadata_thread_id(metadata)
_thread = metadata.get("thread_id") if metadata else None
with open(image_path, "rb") as image_file:
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_thread),
message_thread_id=int(_thread) if _thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1819,10 +1616,10 @@ class TelegramAdapter(BasePlatformAdapter):
try:
if not os.path.exists(file_path):
return SendResult(success=False, error=self._missing_media_path_error("File", file_path))
return SendResult(success=False, error=f"File not found: {file_path}")
display_name = file_name or os.path.basename(file_path)
_thread = self._metadata_thread_id(metadata)
_thread = metadata.get("thread_id") if metadata else None
with open(file_path, "rb") as f:
msg = await self._bot.send_document(
@@ -1831,7 +1628,7 @@ class TelegramAdapter(BasePlatformAdapter):
filename=display_name,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_thread),
message_thread_id=int(_thread) if _thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1853,16 +1650,16 @@ class TelegramAdapter(BasePlatformAdapter):
try:
if not os.path.exists(video_path):
return SendResult(success=False, error=self._missing_media_path_error("Video", video_path))
return SendResult(success=False, error=f"Video file not found: {video_path}")
_thread = self._metadata_thread_id(metadata)
_thread = metadata.get("thread_id") if metadata else None
with open(video_path, "rb") as f:
msg = await self._bot.send_video(
chat_id=int(chat_id),
video=f,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_thread),
message_thread_id=int(_thread) if _thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1892,13 +1689,13 @@ class TelegramAdapter(BasePlatformAdapter):
try:
# Telegram can send photos directly from URLs (up to ~5MB)
_photo_thread = self._metadata_thread_id(metadata)
_photo_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_url,
caption=caption[:1024] if caption else None, # Telegram caption limit
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_photo_thread),
message_thread_id=int(_photo_thread) if _photo_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1921,7 +1718,6 @@ class TelegramAdapter(BasePlatformAdapter):
photo=image_data,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_photo_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e2:
@@ -1947,13 +1743,13 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
_anim_thread = self._metadata_thread_id(metadata)
_anim_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_animation(
chat_id=int(chat_id),
animation=animation_url,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_anim_thread),
message_thread_id=int(_anim_thread) if _anim_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@@ -1970,23 +1766,12 @@ class TelegramAdapter(BasePlatformAdapter):
"""Send typing indicator."""
if self._bot:
try:
_typing_thread = self._metadata_thread_id(metadata)
message_thread_id = self._message_thread_id_for_typing(_typing_thread)
try:
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=message_thread_id,
)
except Exception as e:
if message_thread_id is not None and self._is_thread_not_found_error(e):
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=None,
)
else:
raise
_typing_thread = metadata.get("thread_id") if metadata else None
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=int(_typing_thread) if _typing_thread else None,
)
except Exception as e:
# Typing failures are non-fatal; log at debug level only.
logger.debug(
@@ -2054,12 +1839,6 @@ class TelegramAdapter(BasePlatformAdapter):
text = content
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
# render tables natively, but fenced code blocks render as
# monospace preformatted text with columns intact. The wrapped
# tables then flow through step (1) below as protected regions.
text = _wrap_markdown_tables(text)
# 1) Protect fenced code blocks (``` ... ```)
# Per MarkdownV2 spec, \ and ` inside pre/code must be escaped.
def _protect_fenced(m):
@@ -2093,7 +1872,7 @@ class TelegramAdapter(BasePlatformAdapter):
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
return _ph(f'[{display}]({url})')
text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
# 4) Convert markdown headers (## Title) → bold *Title*
def _convert_header(m):
@@ -2301,27 +2080,22 @@ class TelegramAdapter(BasePlatformAdapter):
bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower()
bot_id = getattr(self._bot, "id", None)
expected = f"@{bot_username}" if bot_username else None
def _iter_sources():
yield getattr(message, "text", None) or "", getattr(message, "entities", None) or []
yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or []
# Telegram parses mentions server-side and emits MessageEntity objects
# (type=mention for @username, type=text_mention for @FirstName targeting
# a user without a public username). Only those entities are authoritative —
# raw substring matches like "foo@hermes_bot.example" are not mentions
# (bug #12545). Entities also correctly handle @handles inside URLs, code
# blocks, and quoted text, where a regex scan would over-match.
for source_text, entities in _iter_sources():
if bot_username and f"@{bot_username}" in source_text.lower():
return True
for entity in entities:
entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower()
if entity_type == "mention" and expected:
if entity_type == "mention" and bot_username:
offset = int(getattr(entity, "offset", -1))
length = int(getattr(entity, "length", 0))
if offset < 0 or length <= 0:
continue
if source_text[offset:offset + length].strip().lower() == expected:
if source_text[offset:offset + length].strip().lower() == f"@{bot_username}":
return True
elif entity_type == "text_mention":
user = getattr(entity, "user", None)
@@ -2353,16 +2127,10 @@ class TelegramAdapter(BasePlatformAdapter):
DMs remain unrestricted. Group/supergroup messages are accepted when:
- the chat is explicitly allowlisted in ``free_response_chats``
- ``require_mention`` is disabled
- the message is a command
- the message replies to the bot
- the bot is @mentioned
- the text/caption matches a configured regex wake-word pattern
When ``require_mention`` is enabled, slash commands are not given
special treatment they must pass the same mention/reply checks
as any other group message. Users can still trigger commands via
the Telegram bot menu (``/command@botname``) or by explicitly
mentioning the bot (``@botname /command``), both of which are
recognised as mentions by :meth:`_message_mentions_bot`.
"""
if not self._is_group_chat(message):
return True
@@ -2377,6 +2145,8 @@ class TelegramAdapter(BasePlatformAdapter):
return True
if not self._telegram_require_mention():
return True
if is_command:
return True
if self._is_reply_to_bot(message):
return True
if self._message_mentions_bot(message):
@@ -2395,7 +2165,7 @@ class TelegramAdapter(BasePlatformAdapter):
if not self._should_process_message(update.message):
return
event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
event = self._build_message_event(update.message, MessageType.TEXT)
event.text = self._clean_bot_trigger_text(event.text)
self._enqueue_text_event(event)
@@ -2406,7 +2176,7 @@ class TelegramAdapter(BasePlatformAdapter):
if not self._should_process_message(update.message, is_command=True):
return
event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
event = self._build_message_event(update.message, MessageType.COMMAND)
await self.handle_message(event)
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -2442,7 +2212,7 @@ class TelegramAdapter(BasePlatformAdapter):
parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
event = self._build_message_event(msg, MessageType.LOCATION)
event.text = "\n".join(parts)
await self.handle_message(event)
@@ -2593,7 +2363,7 @@ class TelegramAdapter(BasePlatformAdapter):
else:
msg_type = MessageType.DOCUMENT
event = self._build_message_event(msg, msg_type, update_id=update.update_id)
event = self._build_message_event(msg, msg_type)
# Add caption as text
if msg.caption:
@@ -2659,23 +2429,6 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as e:
logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)
elif msg.video:
try:
file_obj = await msg.video.get_file()
video_bytes = await file_obj.download_as_bytearray()
ext = ".mp4"
if getattr(file_obj, "file_path", None):
for candidate in SUPPORTED_VIDEO_TYPES:
if file_obj.file_path.lower().endswith(candidate):
ext = candidate
break
cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
logger.info("[Telegram] Cached user video at %s", cached_path)
except Exception as e:
logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
# Download document files to cache for agent processing
elif msg.document:
doc = msg.document
@@ -2692,21 +2445,6 @@ class TelegramAdapter(BasePlatformAdapter):
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
ext = mime_to_ext.get(doc.mime_type, "")
if not ext and doc.mime_type:
video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
ext = video_mime_to_ext.get(doc.mime_type, "")
if ext in SUPPORTED_VIDEO_TYPES:
file_obj = await doc.get_file()
video_bytes = await file_obj.download_as_bytearray()
cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
event.message_type = MessageType.VIDEO
logger.info("[Telegram] Cached user video document at %s", cached_path)
await self.handle_message(event)
return
# Check if supported
if ext not in SUPPORTED_DOCUMENT_TYPES:
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
@@ -2845,11 +2583,13 @@ class TelegramAdapter(BasePlatformAdapter):
logger.info("[Telegram] Analyzing sticker at %s", cached_path)
from tools.vision_tools import vision_analyze_tool
import json as _json
result_json = await vision_analyze_tool(
image_url=cached_path,
user_prompt=STICKER_VISION_PROMPT,
)
result = json.loads(result_json)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "a sticker")
@@ -2962,19 +2702,8 @@ class TelegramAdapter(BasePlatformAdapter):
self.name, cache_key, thread_id,
)
def _build_message_event(
self,
message: Message,
msg_type: MessageType,
update_id: Optional[int] = None,
) -> MessageEvent:
"""Build a MessageEvent from a Telegram message.
``update_id`` is the ``Update.update_id`` from PTB; passing it through
lets ``/restart`` record the triggering offset so the new gateway
process can advance past it (prevents ``/restart`` being re-delivered
when PTB's graceful-shutdown ACK fails).
"""
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
"""Build a MessageEvent from a Telegram message."""
chat = message.chat
user = message.from_user
@@ -2987,9 +2716,7 @@ class TelegramAdapter(BasePlatformAdapter):
# Resolve DM topic name and skill binding
thread_id_raw = message.message_thread_id
thread_id_str = str(thread_id_raw) if thread_id_raw is not None else None
if chat_type == "group" and thread_id_str is None and getattr(chat, "is_forum", False):
thread_id_str = self._GENERAL_TOPIC_THREAD_ID
thread_id_str = str(thread_id_raw) if thread_id_raw else None
chat_topic = None
topic_skill = None
@@ -3025,8 +2752,8 @@ class TelegramAdapter(BasePlatformAdapter):
chat_id=str(chat.id),
chat_name=chat.title or (chat.full_name if hasattr(chat, "full_name") else None),
chat_type=chat_type,
user_id=str(user.id) if user else (str(chat.id) if chat_type == "dm" else None),
user_name=user.full_name if user else (chat.full_name if hasattr(chat, "full_name") and chat_type == "dm" else None),
user_id=str(user.id) if user else None,
user_name=user.full_name if user else None,
thread_id=thread_id_str,
chat_topic=chat_topic,
)
@@ -3038,26 +2765,15 @@ class TelegramAdapter(BasePlatformAdapter):
reply_to_id = str(message.reply_to_message.message_id)
reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None
# Per-channel/topic ephemeral prompt
from gateway.platforms.base import resolve_channel_prompt
_chat_id_str = str(chat.id)
_channel_prompt = resolve_channel_prompt(
self.config.extra,
thread_id_str or _chat_id_str,
_chat_id_str if thread_id_str else None,
)
return MessageEvent(
text=message.text or "",
message_type=msg_type,
source=source,
raw_message=message,
message_id=str(message.message_id),
platform_update_id=update_id,
reply_to_message_id=reply_to_id,
reply_to_text=reply_to_text,
auto_skill=topic_skill,
channel_prompt=_channel_prompt,
timestamp=message.date,
)
+1 -1
View File
@@ -46,7 +46,7 @@ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
def _resolve_proxy_url() -> str | None:
# Delegate to shared implementation (env vars + macOS system proxy detection)
from gateway.platforms.base import resolve_proxy_url
return resolve_proxy_url("TELEGRAM_PROXY")
return resolve_proxy_url()
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
+12 -115
View File
@@ -13,10 +13,6 @@ Each route defines:
- skills: optional list of skills to load for the agent
- deliver: where to send the response (github_comment, telegram, etc.)
- deliver_extra: additional delivery config (repo, pr_number, chat_id)
- deliver_only: if true, skip the agent the rendered prompt IS the
message that gets delivered. Use for external push notifications
(Supabase, monitoring alerts, inter-agent pings) where zero LLM cost
and sub-second delivery matter more than agent reasoning.
Security:
- HMAC secret is required per route (validated at startup)
@@ -126,19 +122,6 @@ class WebhookAdapter(BasePlatformAdapter):
f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'."
)
# deliver_only routes bypass the agent — the POST body becomes a
# direct push notification via the configured delivery target.
# Validate up-front so misconfiguration surfaces at startup rather
# than on the first webhook POST.
if route.get("deliver_only"):
deliver = route.get("deliver", "log")
if not deliver or deliver == "log":
raise ValueError(
f"[webhook] Route '{name}' has deliver_only=true but "
f"deliver is '{deliver}'. Direct delivery requires a "
f"real target (telegram, discord, slack, github_comment, etc.)."
)
app = web.Application()
app.router.add_get("/health", self._handle_health)
app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
@@ -313,14 +296,24 @@ class WebhookAdapter(BasePlatformAdapter):
{"error": "Payload too large"}, status=413
)
# Read body (must be done before any validation)
# ── Rate limiting ────────────────────────────────────────
now = time.time()
window = self._rate_counts.setdefault(route_name, [])
window[:] = [t for t in window if now - t < 60]
if len(window) >= self._rate_limit:
return web.json_response(
{"error": "Rate limit exceeded"}, status=429
)
window.append(now)
# Read body
try:
raw_body = await request.read()
except Exception as e:
logger.error("[webhook] Failed to read body: %s", e)
return web.json_response({"error": "Bad request"}, status=400)
# Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode)
# Validate HMAC signature (skip for INSECURE_NO_AUTH testing mode)
secret = route_config.get("secret", self._global_secret)
if secret and secret != _INSECURE_NO_AUTH:
if not self._validate_signature(request, raw_body, secret):
@@ -331,16 +324,6 @@ class WebhookAdapter(BasePlatformAdapter):
{"error": "Invalid signature"}, status=401
)
# ── Rate limiting (after auth) ───────────────────────────
now = time.time()
window = self._rate_counts.setdefault(route_name, [])
window[:] = [t for t in window if now - t < 60]
if len(window) >= self._rate_limit:
return web.json_response(
{"error": "Rate limit exceeded"}, status=429
)
window.append(now)
# Parse payload
try:
payload = json.loads(raw_body)
@@ -436,64 +419,6 @@ class WebhookAdapter(BasePlatformAdapter):
)
self._seen_deliveries[delivery_id] = now
# ── Direct delivery mode (deliver_only) ─────────────────
# Skip the agent entirely — the rendered prompt IS the message we
# deliver. Use case: external services (Supabase, monitoring,
# cron jobs, other agents) that need to push a plain notification
# to a user's chat with zero LLM cost. Reuses the same HMAC auth,
# rate limiting, idempotency, and template rendering as agent mode.
if route_config.get("deliver_only"):
delivery = {
"deliver": route_config.get("deliver", "log"),
"deliver_extra": self._render_delivery_extra(
route_config.get("deliver_extra", {}), payload
),
"payload": payload,
}
logger.info(
"[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s",
event_type,
route_name,
delivery["deliver"],
len(prompt),
delivery_id,
)
try:
result = await self._direct_deliver(prompt, delivery)
except Exception:
logger.exception(
"[webhook] direct-deliver failed route=%s delivery=%s",
route_name,
delivery_id,
)
return web.json_response(
{"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
status=502,
)
if result.success:
return web.json_response(
{
"status": "delivered",
"route": route_name,
"target": delivery["deliver"],
"delivery_id": delivery_id,
},
status=200,
)
# Delivery attempted but target rejected it — surface as 502
# with a generic error (don't leak adapter-level detail).
logger.warning(
"[webhook] direct-deliver target rejected route=%s target=%s error=%s",
route_name,
delivery["deliver"],
result.error,
)
return web.json_response(
{"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
status=502,
)
# Use delivery_id in session key so concurrent webhooks on the
# same route get independent agent runs (not queued/interrupted).
session_chat_id = f"webhook:{route_name}:{delivery_id}"
@@ -647,34 +572,6 @@ class WebhookAdapter(BasePlatformAdapter):
# Response delivery
# ------------------------------------------------------------------
async def _direct_deliver(
self, content: str, delivery: dict
) -> SendResult:
"""Deliver *content* directly without invoking the agent.
Used by ``deliver_only`` routes: the rendered template becomes the
literal message body, and we dispatch to the same delivery helpers
that the agent-mode ``send()`` flow uses. All target types that
work in agent mode work here Telegram, Discord, Slack, GitHub
PR comments, etc.
"""
deliver_type = delivery.get("deliver", "log")
if deliver_type == "log":
# Shouldn't reach here — startup validation rejects deliver_only
# with deliver=log — but guard defensively.
logger.info("[webhook] direct-deliver log-only: %s", content[:200])
return SendResult(success=True)
if deliver_type == "github_comment":
return await self._deliver_github_comment(content, delivery)
# Fall through to the cross-platform dispatcher, which validates the
# target name and routes via the gateway runner.
return await self._deliver_cross_platform(
deliver_type, content, delivery
)
async def _deliver_github_comment(
self, content: str, delivery: dict
) -> SendResult:
+15 -51
View File
@@ -180,8 +180,6 @@ class WeComAdapter(BasePlatformAdapter):
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_WECOM_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
self._pending_text_batches: Dict[str, MessageEvent] = {}
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
self._device_id = uuid.uuid4().hex
self._last_chat_req_ids: Dict[str, str] = {}
# ------------------------------------------------------------------
# Connection lifecycle
@@ -279,11 +277,7 @@ class WeComAdapter(BasePlatformAdapter):
{
"cmd": APP_CMD_SUBSCRIBE,
"headers": {"req_id": req_id},
"body": {
"bot_id": self._bot_id,
"secret": self._secret,
"device_id": self._device_id,
},
"body": {"bot_id": self._bot_id, "secret": self._secret},
}
)
@@ -502,11 +496,6 @@ class WeComAdapter(BasePlatformAdapter):
logger.debug("[%s] DM sender %s blocked by policy", self.name, sender_id)
return
# Cache the inbound req_id after policy checks so proactive sends to
# this chat can fall back to APP_CMD_RESPONSE (required for groups —
# WeCom AI Bots cannot initiate APP_CMD_SEND in group chats).
self._remember_chat_req_id(chat_id, self._payload_req_id(payload))
text, reply_text = self._extract_text(body)
media_urls, media_types = await self._extract_media(body)
message_type = self._derive_message_type(body, text, media_types)
@@ -624,16 +613,13 @@ class WeComAdapter(BasePlatformAdapter):
msgtype = str(body.get("msgtype") or "").lower()
if msgtype == "mixed":
_raw_mixed = body.get("mixed")
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
_raw_items = mixed.get("msg_item")
items = _raw_items if isinstance(_raw_items, list) else []
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
for item in items:
if not isinstance(item, dict):
continue
if str(item.get("msgtype") or "").lower() == "text":
_raw_text = item.get("text")
text_block = _raw_text if isinstance(_raw_text, dict) else {}
text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
content = str(text_block.get("content") or "").strip()
if content:
text_parts.append(content)
@@ -675,10 +661,8 @@ class WeComAdapter(BasePlatformAdapter):
msgtype = str(body.get("msgtype") or "").lower()
if msgtype == "mixed":
_raw_mixed = body.get("mixed")
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
_raw_items = mixed.get("msg_item")
items = _raw_items if isinstance(_raw_items, list) else []
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
for item in items:
if not isinstance(item, dict):
continue
@@ -863,23 +847,6 @@ class WeComAdapter(BasePlatformAdapter):
while len(self._reply_req_ids) > DEDUP_MAX_SIZE:
self._reply_req_ids.pop(next(iter(self._reply_req_ids)))
def _remember_chat_req_id(self, chat_id: str, req_id: str) -> None:
"""Cache the most recent inbound req_id per chat.
Used as a fallback reply target when we need to send into a group
without an explicit ``reply_to`` WeCom AI Bots are blocked from
APP_CMD_SEND in groups and must use APP_CMD_RESPONSE bound to some
prior req_id. Bounded like _reply_req_ids so long-running gateways
don't leak memory across many chats.
"""
normalized_chat_id = str(chat_id or "").strip()
normalized_req_id = str(req_id or "").strip()
if not normalized_chat_id or not normalized_req_id:
return
self._last_chat_req_ids[normalized_chat_id] = normalized_req_id
while len(self._last_chat_req_ids) > DEDUP_MAX_SIZE:
self._last_chat_req_ids.pop(next(iter(self._last_chat_req_ids)))
def _reply_req_id_for_message(self, reply_to: Optional[str]) -> Optional[str]:
normalized = str(reply_to or "").strip()
if not normalized or normalized.startswith("quote:"):
@@ -1196,15 +1163,19 @@ class WeComAdapter(BasePlatformAdapter):
self._raise_for_wecom_error(response, "send media message")
return response
async def _send_reply_markdown(self, reply_req_id: str, content: str) -> Dict[str, Any]:
async def _send_reply_stream(self, reply_req_id: str, content: str) -> Dict[str, Any]:
response = await self._send_reply_request(
reply_req_id,
{
"msgtype": "markdown",
"markdown": {"content": content[:self.MAX_MESSAGE_LENGTH]},
"msgtype": "stream",
"stream": {
"id": self._new_req_id("stream"),
"finish": True,
"content": content[:self.MAX_MESSAGE_LENGTH],
},
},
)
self._raise_for_wecom_error(response, "send reply markdown")
self._raise_for_wecom_error(response, "send reply stream")
return response
async def _send_reply_media_message(
@@ -1264,9 +1235,6 @@ class WeComAdapter(BasePlatformAdapter):
return SendResult(success=False, error=prepared["reject_reason"])
reply_req_id = self._reply_req_id_for_message(reply_to)
if not reply_req_id and chat_id in self._last_chat_req_ids:
reply_req_id = self._last_chat_req_ids[chat_id]
try:
upload_result = await self._upload_media_bytes(
prepared["data"],
@@ -1334,12 +1302,8 @@ class WeComAdapter(BasePlatformAdapter):
try:
reply_req_id = self._reply_req_id_for_message(reply_to)
if not reply_req_id and chat_id in self._last_chat_req_ids:
reply_req_id = self._last_chat_req_ids[chat_id]
if reply_req_id:
response = await self._send_reply_markdown(reply_req_id, content)
response = await self._send_reply_stream(reply_req_id, content)
else:
response = await self._send_request(
APP_CMD_SEND,
-14
View File
@@ -258,20 +258,6 @@ class WecomCallbackAdapter(BasePlatformAdapter):
)
event = self._build_event(app, decrypted)
if event is not None:
# Deduplicate: WeCom retries callbacks on timeout,
# producing duplicate inbound messages (#10305).
if event.message_id:
now = time.time()
if event.message_id in self._seen_messages:
if now - self._seen_messages[event.message_id] < MESSAGE_DEDUP_TTL_SECONDS:
logger.debug("[WecomCallback] Duplicate MsgId %s, skipping", event.message_id)
return web.Response(text="success", content_type="text/plain")
del self._seen_messages[event.message_id]
self._seen_messages[event.message_id] = now
# Prune expired entries when cache grows large
if len(self._seen_messages) > 2000:
cutoff = now - MESSAGE_DEDUP_TTL_SECONDS
self._seen_messages = {k: v for k, v in self._seen_messages.items() if v > cutoff}
# Record which app this user belongs to.
if event.source and event.source.user_id:
map_key = self._user_app_key(
+86 -310
View File
@@ -28,7 +28,7 @@ import uuid
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import quote, urlparse
from urllib.parse import quote
logger = logging.getLogger(__name__)
@@ -96,28 +96,6 @@ MEDIA_VIDEO = 2
MEDIA_FILE = 3
MEDIA_VOICE = 4
_LIVE_ADAPTERS: Dict[str, Any] = {}
def _make_ssl_connector() -> Optional["aiohttp.TCPConnector"]:
"""Return a TCPConnector with a certifi CA bundle, or None if certifi is unavailable.
Tencent's iLink server (``ilinkai.weixin.qq.com``) is not verifiable against
some system CA stores (notably Homebrew's OpenSSL on macOS Apple Silicon).
When ``certifi`` is installed, use its Mozilla CA bundle to guarantee
verification. Otherwise fall back to aiohttp's default (which honors
``SSL_CERT_FILE`` env var via ``trust_env=True``).
"""
try:
import ssl
import certifi
except ImportError:
return None
if not AIOHTTP_AVAILABLE:
return None
ssl_ctx = ssl.create_default_context(cafile=certifi.where())
return aiohttp.TCPConnector(ssl=ssl_ctx)
ITEM_TEXT = 1
ITEM_IMAGE = 2
ITEM_VOICE = 3
@@ -420,12 +398,7 @@ async def _send_message(
text: str,
context_token: Optional[str],
client_id: str,
) -> Dict[str, Any]:
"""Send a text message via iLink sendmessage API.
Returns the raw API response dict (may contain error codes like
``errcode: -14`` for session expiry that the caller can inspect).
"""
) -> None:
if not text or not text.strip():
raise ValueError("_send_message: text must not be empty")
message: Dict[str, Any] = {
@@ -438,7 +411,7 @@ async def _send_message(
}
if context_token:
message["context_token"] = context_token
return await _api_post(
await _api_post(
session,
base_url=base_url,
endpoint=EP_SEND_MESSAGE,
@@ -560,39 +533,6 @@ async def _download_bytes(
return await response.read()
_WEIXIN_CDN_ALLOWLIST: frozenset[str] = frozenset(
{
"novac2c.cdn.weixin.qq.com",
"ilinkai.weixin.qq.com",
"wx.qlogo.cn",
"thirdwx.qlogo.cn",
"res.wx.qq.com",
"mmbiz.qpic.cn",
"mmbiz.qlogo.cn",
}
)
def _assert_weixin_cdn_url(url: str) -> None:
"""Raise ValueError if *url* does not point at a known WeChat CDN host."""
try:
parsed = urlparse(url)
scheme = parsed.scheme.lower()
host = parsed.hostname or ""
except Exception as exc: # noqa: BLE001
raise ValueError(f"Unparseable media URL: {url!r}") from exc
if scheme not in ("http", "https"):
raise ValueError(
f"Media URL has disallowed scheme {scheme!r}; only http/https are permitted."
)
if host not in _WEIXIN_CDN_ALLOWLIST:
raise ValueError(
f"Media URL host {host!r} is not in the WeChat CDN allowlist. "
"Refusing to fetch to prevent SSRF."
)
def _media_reference(item: Dict[str, Any], key: str) -> Dict[str, Any]:
return (item.get(key) or {}).get("media") or {}
@@ -613,7 +553,6 @@ async def _download_and_decrypt_media(
timeout_seconds=timeout_seconds,
)
elif full_url:
_assert_weixin_cdn_url(full_url)
raw = await _download_bytes(session, url=full_url, timeout_seconds=timeout_seconds)
else:
raise RuntimeError("media item had neither encrypt_query_param nor full_url")
@@ -684,31 +623,42 @@ def _rewrite_table_block_for_weixin(lines: List[str]) -> str:
def _normalize_markdown_blocks(content: str) -> str:
lines = content.splitlines()
result: List[str] = []
i = 0
in_code_block = False
blank_run = 0
for raw_line in lines:
line = raw_line.rstrip()
if _FENCE_RE.match(line.strip()):
while i < len(lines):
line = lines[i].rstrip()
fence_match = _FENCE_RE.match(line.strip())
if fence_match:
in_code_block = not in_code_block
result.append(line)
blank_run = 0
i += 1
continue
if in_code_block:
result.append(line)
i += 1
continue
if not line.strip():
blank_run += 1
if blank_run <= 1:
result.append("")
if (
i + 1 < len(lines)
and "|" in lines[i]
and _TABLE_RULE_RE.match(lines[i + 1].rstrip())
):
table_lines = [lines[i].rstrip(), lines[i + 1].rstrip()]
i += 2
while i < len(lines) and "|" in lines[i]:
table_lines.append(lines[i].rstrip())
i += 1
result.append(_rewrite_table_block_for_weixin(table_lines))
continue
blank_run = 0
result.append(line)
result.append(_MARKDOWN_LINK_RE.sub(r"\1 (\2)", _rewrite_headers_for_weixin(line)))
i += 1
return "\n".join(result).strip()
normalized = "\n".join(item.rstrip() for item in result)
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
return normalized.strip()
def _split_markdown_blocks(content: str) -> List[str]:
@@ -754,8 +704,8 @@ def _split_delivery_units_for_weixin(content: str) -> List[str]:
Weixin can render Markdown, but chat readability is better when top-level
line breaks become separate messages. Keep fenced code blocks intact and
attach indented continuation lines to the previous top-level line so nested
list items do not get torn apart.
attach indented continuation lines to the previous top-level line so
transformed tables/lists do not get torn apart.
"""
units: List[str] = []
@@ -797,9 +747,7 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool:
return False
if line.startswith((" ", "\t")):
return False
if stripped.startswith((">", "-", "*", "", "#", "|")):
return False
if _TABLE_RULE_RE.match(stripped):
if stripped.startswith((">", "-", "*", "")):
return False
if re.match(r"^\*\*[^*]+\*\*$", stripped):
return False
@@ -809,12 +757,10 @@ def _looks_like_chatty_line_for_weixin(line: str) -> bool:
def _looks_like_heading_line_for_weixin(line: str) -> bool:
"""Return True when a short line behaves like a heading."""
"""Return True when a short line behaves like a plain-text heading."""
stripped = line.strip()
if not stripped:
return False
if _HEADER_RE.match(stripped):
return True
return len(stripped) <= 24 and stripped.endswith((":", ""))
@@ -989,7 +935,7 @@ async def qr_login(
if not AIOHTTP_AVAILABLE:
raise RuntimeError("aiohttp is required for Weixin QR login")
async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session:
async with aiohttp.ClientSession(trust_env=True) as session:
try:
qr_resp = await _api_get(
session,
@@ -1007,10 +953,6 @@ async def qr_login(
logger.error("weixin: QR response missing qrcode")
return None
# qrcode_url is the full scannable liteapp URL; qrcode_value is just the hex token
# WeChat needs to scan the full URL, not the raw hex string
qr_scan_data = qrcode_url if qrcode_url else qrcode_value
print("\n请使用微信扫描以下二维码:")
if qrcode_url:
print(qrcode_url)
@@ -1018,11 +960,11 @@ async def qr_login(
import qrcode
qr = qrcode.QRCode()
qr.add_data(qr_scan_data)
qr.add_data(qrcode_url or qrcode_value)
qr.make(fit=True)
qr.print_ascii(invert=True)
except Exception as _qr_exc:
print(f"(终端二维码渲染失败: {_qr_exc},请直接打开上面的二维码链接)")
except Exception:
print("(终端二维码渲染失败,请直接打开上面的二维码链接)")
deadline = time.time() + timeout_seconds
current_base_url = ILINK_BASE_URL
@@ -1068,17 +1010,8 @@ async def qr_login(
)
qrcode_value = str(qr_resp.get("qrcode") or "")
qrcode_url = str(qr_resp.get("qrcode_img_content") or "")
qr_scan_data = qrcode_url if qrcode_url else qrcode_value
if qrcode_url:
print(qrcode_url)
try:
import qrcode as _qrcode
qr = _qrcode.QRCode()
qr.add_data(qr_scan_data)
qr.make(fit=True)
qr.print_ascii(invert=True)
except Exception:
pass
except Exception as exc:
logger.error("weixin: QR refresh failed: %s", exc)
return None
@@ -1126,8 +1059,7 @@ class WeixinAdapter(BasePlatformAdapter):
self._hermes_home = hermes_home
self._token_store = ContextTokenStore(hermes_home)
self._typing_cache = TypingTicketCache()
self._poll_session: Optional[aiohttp.ClientSession] = None
self._send_session: Optional[aiohttp.ClientSession] = None
self._session: Optional[aiohttp.ClientSession] = None
self._poll_task: Optional[asyncio.Task] = None
self._dedup = MessageDeduplicator(ttl_seconds=MESSAGE_DEDUP_TTL_SECONDS)
@@ -1202,17 +1134,14 @@ class WeixinAdapter(BasePlatformAdapter):
except Exception as exc:
logger.debug("[%s] Token lock unavailable (non-fatal): %s", self.name, exc)
self._poll_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector())
self._send_session = aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector())
self._session = aiohttp.ClientSession(trust_env=True)
self._token_store.restore(self._account_id)
self._poll_task = asyncio.create_task(self._poll_loop(), name="weixin-poll")
self._mark_connected()
_LIVE_ADAPTERS[self._token] = self
logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
return True
async def disconnect(self) -> None:
_LIVE_ADAPTERS.pop(self._token, None)
self._running = False
if self._poll_task and not self._poll_task.done():
self._poll_task.cancel()
@@ -1221,18 +1150,15 @@ class WeixinAdapter(BasePlatformAdapter):
except asyncio.CancelledError:
pass
self._poll_task = None
if self._poll_session and not self._poll_session.closed:
await self._poll_session.close()
self._poll_session = None
if self._send_session and not self._send_session.closed:
await self._send_session.close()
self._send_session = None
if self._session and not self._session.closed:
await self._session.close()
self._session = None
self._release_platform_lock()
self._mark_disconnected()
logger.info("[%s] Disconnected", self.name)
async def _poll_loop(self) -> None:
assert self._poll_session is not None
assert self._session is not None
sync_buf = _load_sync_buf(self._hermes_home, self._account_id)
timeout_ms = LONG_POLL_TIMEOUT_MS
consecutive_failures = 0
@@ -1240,7 +1166,7 @@ class WeixinAdapter(BasePlatformAdapter):
while self._running:
try:
response = await _get_updates(
self._poll_session,
self._session,
base_url=self._base_url,
token=self._token,
sync_buf=sync_buf,
@@ -1297,7 +1223,7 @@ class WeixinAdapter(BasePlatformAdapter):
logger.error("[%s] unhandled inbound error from=%s: %s", self.name, _safe_id(message.get("from_user_id")), exc, exc_info=True)
async def _process_message(self, message: Dict[str, Any]) -> None:
assert self._poll_session is not None
assert self._session is not None
sender_id = str(message.get("from_user_id") or "").strip()
if not sender_id:
return
@@ -1390,7 +1316,7 @@ class WeixinAdapter(BasePlatformAdapter):
media = _media_reference(item, "image_item")
try:
data = await _download_and_decrypt_media(
self._poll_session,
self._session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=(item.get("image_item") or {}).get("aeskey")
@@ -1408,7 +1334,7 @@ class WeixinAdapter(BasePlatformAdapter):
media = _media_reference(item, "video_item")
try:
data = await _download_and_decrypt_media(
self._poll_session,
self._session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@@ -1427,7 +1353,7 @@ class WeixinAdapter(BasePlatformAdapter):
mime = _mime_from_filename(filename)
try:
data = await _download_and_decrypt_media(
self._poll_session,
self._session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@@ -1446,7 +1372,7 @@ class WeixinAdapter(BasePlatformAdapter):
return None
try:
data = await _download_and_decrypt_media(
self._poll_session,
self._session,
cdn_base_url=self._cdn_base_url,
encrypted_query_param=media.get("encrypt_query_param"),
aes_key_b64=media.get("aes_key"),
@@ -1459,13 +1385,13 @@ class WeixinAdapter(BasePlatformAdapter):
return None
async def _maybe_fetch_typing_ticket(self, user_id: str, context_token: Optional[str]) -> None:
if not self._poll_session or not self._token:
if not self._session or not self._token:
return
if self._typing_cache.get(user_id):
return
try:
response = await _get_config(
self._poll_session,
self._session,
base_url=self._base_url,
token=self._token,
user_id=user_id,
@@ -1490,19 +1416,12 @@ class WeixinAdapter(BasePlatformAdapter):
context_token: Optional[str],
client_id: str,
) -> None:
"""Send a single text chunk with per-chunk retry and backoff.
On session-expired errors (errcode -14), automatically retries
*without* ``context_token`` iLink accepts tokenless sends as a
degraded fallback, which keeps cron-initiated push messages working
even when no user message has refreshed the session recently.
"""
"""Send a single text chunk with per-chunk retry and backoff."""
last_error: Optional[Exception] = None
retried_without_token = False
for attempt in range(self._send_chunk_retries + 1):
try:
resp = await _send_message(
self._send_session,
await _send_message(
self._session,
base_url=self._base_url,
token=self._token,
to=chat_id,
@@ -1510,31 +1429,6 @@ class WeixinAdapter(BasePlatformAdapter):
context_token=context_token,
client_id=client_id,
)
# Check iLink response for session-expired error
if resp and isinstance(resp, dict):
ret = resp.get("ret")
errcode = resp.get("errcode")
if (ret is not None and ret not in (0,)) or (errcode is not None and errcode not in (0,)):
is_session_expired = (
ret == SESSION_EXPIRED_ERRCODE
or errcode == SESSION_EXPIRED_ERRCODE
)
# Session expired — strip token and retry once
if is_session_expired and not retried_without_token and context_token:
retried_without_token = True
context_token = None
self._token_store._cache.pop(
self._token_store._key(self._account_id, chat_id), None
)
logger.warning(
"[%s] session expired for %s; retrying without context_token",
self.name, _safe_id(chat_id),
)
continue
errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
raise RuntimeError(
f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
)
return
except Exception as exc:
last_error = exc
@@ -1562,48 +1456,12 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
if not self._send_session or not self._token:
if not self._session or not self._token:
return SendResult(success=False, error="Not connected")
context_token = self._token_store.get(self._account_id, chat_id)
last_message_id: Optional[str] = None
# Extract MEDIA: tags and bare local file paths before text delivery.
media_files, cleaned_content = self.extract_media(content)
_, image_cleaned = self.extract_images(cleaned_content)
local_files, final_content = self.extract_local_files(image_cleaned)
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
async def _deliver_media(path: str, is_voice: bool = False) -> None:
ext = Path(path).suffix.lower()
if is_voice or ext in _AUDIO_EXTS:
await self.send_voice(chat_id=chat_id, audio_path=path, metadata=metadata)
elif ext in _VIDEO_EXTS:
await self.send_video(chat_id=chat_id, video_path=path, metadata=metadata)
elif ext in _IMAGE_EXTS:
await self.send_image_file(chat_id=chat_id, image_path=path, metadata=metadata)
else:
await self.send_document(chat_id=chat_id, file_path=path, metadata=metadata)
try:
# Deliver extracted MEDIA: attachments first.
for media_path, is_voice in media_files:
try:
await _deliver_media(media_path, is_voice)
except Exception as exc:
logger.warning("[%s] media delivery failed for %s: %s", self.name, media_path, exc)
# Deliver bare local file paths.
for file_path in local_files:
try:
await _deliver_media(file_path, is_voice=False)
except Exception as exc:
logger.warning("[%s] local file delivery failed for %s: %s", self.name, file_path, exc)
# Deliver text content.
chunks = [c for c in self._split_text(self.format_message(final_content)) if c and c.strip()]
chunks = [c for c in self._split_text(self.format_message(content)) if c and c.strip()]
for idx, chunk in enumerate(chunks):
client_id = f"hermes-weixin-{uuid.uuid4().hex}"
await self._send_text_chunk(
@@ -1621,14 +1479,14 @@ class WeixinAdapter(BasePlatformAdapter):
return SendResult(success=False, error=str(exc))
async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None:
if not self._send_session or not self._token:
if not self._session or not self._token:
return
typing_ticket = self._typing_cache.get(chat_id)
if not typing_ticket:
return
try:
await _send_typing(
self._send_session,
self._session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@@ -1639,14 +1497,14 @@ class WeixinAdapter(BasePlatformAdapter):
logger.debug("[%s] typing start failed for %s: %s", self.name, _safe_id(chat_id), exc)
async def stop_typing(self, chat_id: str) -> None:
if not self._send_session or not self._token:
if not self._session or not self._token:
return
typing_ticket = self._typing_cache.get(chat_id)
if not typing_ticket:
return
try:
await _send_typing(
self._send_session,
self._session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@@ -1684,35 +1542,24 @@ class WeixinAdapter(BasePlatformAdapter):
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
path: str,
caption: str = "",
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
del reply_to, kwargs
return await self.send_document(
chat_id=chat_id,
file_path=image_path,
caption=caption,
metadata=metadata,
)
return await self.send_document(chat_id, file_path=path, caption=caption, metadata=metadata)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
caption: str = "",
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
del file_name, reply_to, metadata, kwargs
if not self._send_session or not self._token:
if not self._session or not self._token:
return SendResult(success=False, error="Not connected")
try:
message_id = await self._send_file(chat_id, file_path, caption or "")
message_id = await self._send_file(chat_id, file_path, caption)
return SendResult(success=True, message_id=message_id)
except Exception as exc:
logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc)
@@ -1726,7 +1573,7 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
if not self._send_session or not self._token:
if not self._session or not self._token:
return SendResult(success=False, error="Not connected")
try:
message_id = await self._send_file(chat_id, video_path, caption or "")
@@ -1743,24 +1590,7 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
if not self._send_session or not self._token:
return SendResult(success=False, error="Not connected")
# Native outbound Weixin voice bubbles are not proven-working in the
# upstream reference implementation. Prefer a reliable file attachment
# fallback so users at least receive playable audio, even for .silk.
fallback_caption = caption or "[voice message as attachment]"
try:
message_id = await self._send_file(
chat_id,
audio_path,
fallback_caption,
force_file_attachment=True,
)
return SendResult(success=True, message_id=message_id)
except Exception as exc:
logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc)
return SendResult(success=False, error=str(exc))
return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
async def _download_remote_media(self, url: str) -> str:
from tools.url_safety import is_safe_url
@@ -1768,8 +1598,8 @@ class WeixinAdapter(BasePlatformAdapter):
if not is_safe_url(url):
raise ValueError(f"Blocked unsafe URL (SSRF protection): {url}")
assert self._send_session is not None
async with self._send_session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
assert self._session is not None
async with self._session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as response:
response.raise_for_status()
data = await response.read()
suffix = Path(url.split("?", 1)[0]).suffix or ".bin"
@@ -1777,22 +1607,16 @@ class WeixinAdapter(BasePlatformAdapter):
handle.write(data)
return handle.name
async def _send_file(
self,
chat_id: str,
path: str,
caption: str,
force_file_attachment: bool = False,
) -> str:
assert self._send_session is not None and self._token is not None
async def _send_file(self, chat_id: str, path: str, caption: str) -> str:
assert self._session is not None and self._token is not None
plaintext = Path(path).read_bytes()
media_type, item_builder = self._outbound_media_builder(path, force_file_attachment=force_file_attachment)
media_type, item_builder = self._outbound_media_builder(path)
filekey = secrets.token_hex(16)
aes_key = secrets.token_bytes(16)
rawsize = len(plaintext)
rawfilemd5 = hashlib.md5(plaintext).hexdigest()
upload_response = await _get_upload_url(
self._send_session,
self._session,
base_url=self._base_url,
token=self._token,
to_user_id=chat_id,
@@ -1818,34 +1642,30 @@ class WeixinAdapter(BasePlatformAdapter):
raise RuntimeError(f"getUploadUrl returned neither upload_param nor upload_full_url: {upload_response}")
encrypted_query_param = await _upload_ciphertext(
self._send_session,
self._session,
ciphertext=ciphertext,
upload_url=upload_url,
)
context_token = self._token_store.get(self._account_id, chat_id)
# The iLink API expects aes_key as base64(hex_string), not base64(raw_bytes).
# Sending base64(raw_bytes) causes images to show as grey boxes on the
# receiver side because the decryption key doesn't match.
aes_key_for_api = base64.b64encode(aes_key.hex().encode("ascii")).decode("ascii")
item_kwargs = {
"encrypt_query_param": encrypted_query_param,
"aes_key_for_api": aes_key_for_api,
"ciphertext_size": len(ciphertext),
"plaintext_size": rawsize,
"filename": Path(path).name,
"rawfilemd5": rawfilemd5,
}
if media_type == MEDIA_VOICE and path.endswith(".silk"):
item_kwargs["encode_type"] = 6
item_kwargs["sample_rate"] = 24000
item_kwargs["bits_per_sample"] = 16
media_item = item_builder(**item_kwargs)
media_item = item_builder(
encrypt_query_param=encrypted_query_param,
aes_key_for_api=aes_key_for_api,
ciphertext_size=len(ciphertext),
plaintext_size=rawsize,
filename=Path(path).name,
rawfilemd5=rawfilemd5,
)
last_message_id = None
if caption:
last_message_id = f"hermes-weixin-{uuid.uuid4().hex}"
await _send_message(
self._send_session,
self._session,
base_url=self._base_url,
token=self._token,
to=chat_id,
@@ -1856,7 +1676,7 @@ class WeixinAdapter(BasePlatformAdapter):
last_message_id = f"hermes-weixin-{uuid.uuid4().hex}"
await _api_post(
self._send_session,
self._session,
base_url=self._base_url,
endpoint=EP_SEND_MESSAGE,
payload={
@@ -1875,7 +1695,7 @@ class WeixinAdapter(BasePlatformAdapter):
)
return last_message_id
def _outbound_media_builder(self, path: str, force_file_attachment: bool = False):
def _outbound_media_builder(self, path: str):
mime = mimetypes.guess_type(path)[0] or "application/octet-stream"
if mime.startswith("image/"):
return MEDIA_IMAGE, lambda **kw: {
@@ -1903,7 +1723,7 @@ class WeixinAdapter(BasePlatformAdapter):
"video_md5": kw.get("rawfilemd5", ""),
},
}
if path.endswith(".silk") and not force_file_attachment:
if mime.startswith("audio/") or path.endswith(".silk"):
return MEDIA_VOICE, lambda **kw: {
"type": ITEM_VOICE,
"voice_item": {
@@ -1912,25 +1732,9 @@ class WeixinAdapter(BasePlatformAdapter):
"aes_key": kw["aes_key_for_api"],
"encrypt_type": 1,
},
"encode_type": kw.get("encode_type"),
"bits_per_sample": kw.get("bits_per_sample"),
"sample_rate": kw.get("sample_rate"),
"playtime": kw.get("playtime", 0),
},
}
if mime.startswith("audio/"):
return MEDIA_FILE, lambda **kw: {
"type": ITEM_FILE,
"file_item": {
"media": {
"encrypt_query_param": kw["encrypt_query_param"],
"aes_key": kw["aes_key_for_api"],
"encrypt_type": 1,
},
"file_name": kw["filename"],
"len": str(kw["plaintext_size"]),
},
}
return MEDIA_FILE, lambda **kw: {
"type": ITEM_FILE,
"file_item": {
@@ -1980,34 +1784,7 @@ async def send_weixin_direct(
token_store.restore(account_id)
context_token = token_store.get(account_id, chat_id)
live_adapter = _LIVE_ADAPTERS.get(resolved_token)
send_session = getattr(live_adapter, '_send_session', None)
if live_adapter is not None and send_session is not None and not send_session.closed:
last_result: Optional[SendResult] = None
cleaned = live_adapter.format_message(message)
if cleaned:
last_result = await live_adapter.send(chat_id, cleaned)
if not last_result.success:
return {"error": f"Weixin send failed: {last_result.error}"}
for media_path, _is_voice in media_files or []:
ext = Path(media_path).suffix.lower()
if ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}:
last_result = await live_adapter.send_image_file(chat_id, media_path)
else:
last_result = await live_adapter.send_document(chat_id, media_path)
if not last_result.success:
return {"error": f"Weixin media send failed: {last_result.error}"}
return {
"success": True,
"platform": "weixin",
"chat_id": chat_id,
"message_id": last_result.message_id if last_result else None,
"context_token_used": bool(context_token),
}
async with aiohttp.ClientSession(trust_env=True, connector=_make_ssl_connector()) as session:
async with aiohttp.ClientSession(trust_env=True) as session:
adapter = WeixinAdapter(
PlatformConfig(
enabled=True,
@@ -2020,7 +1797,6 @@ async def send_weixin_direct(
},
)
)
adapter._send_session = session
adapter._session = session
adapter._token = resolved_token
adapter._account_id = account_id

Some files were not shown because too many files have changed in this diff Show More