Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e5492dc002 |
@@ -14,6 +14,3 @@ node_modules
|
||||
.env
|
||||
|
||||
*.md
|
||||
|
||||
# Runtime data (bind-mounted at /opt/data; must not leak into build context)
|
||||
data/
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
name: 'Setup Nix'
|
||||
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
@@ -3,13 +3,8 @@ name: Docker Build and Publish
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'Dockerfile'
|
||||
- 'docker/**'
|
||||
- '.github/workflows/docker-publish.yml'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
@@ -54,14 +49,6 @@ jobs:
|
||||
|
||||
- name: Test image starts
|
||||
run: |
|
||||
# The image runs as the hermes user (UID 10000). GitHub Actions
|
||||
# creates /tmp/hermes-test root-owned by default, which hermes
|
||||
# can't write to — chown it to match the in-container UID before
|
||||
# bind-mounting. Real users doing `docker run -v ~/.hermes:...`
|
||||
# with their own UID hit the same issue and have their own
|
||||
# remediations (HERMES_UID env var, or chown locally).
|
||||
mkdir -p /tmp/hermes-test
|
||||
sudo chown -R 10000:10000 /tmp/hermes-test
|
||||
docker run --rm \
|
||||
-v /tmp/hermes-test:/opt/data \
|
||||
--entrypoint /opt/hermes/docker/entrypoint.sh \
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
name: Nix Lockfile Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-lockfile-check-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
|
||||
- name: Resolve head SHA
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check lockfile hashes
|
||||
id: check
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
- name: Post sticky PR comment (stale)
|
||||
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
|
||||
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Fail if stale
|
||||
if: steps.check.outputs.stale == 'true'
|
||||
run: exit 1
|
||||
@@ -1,149 +0,0 @@
|
||||
name: Nix Lockfile Fix
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr_number:
|
||||
description: 'PR number to fix (leave empty to run on the selected branch)'
|
||||
required: false
|
||||
type: string
|
||||
issue_comment:
|
||||
types: [edited]
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
fix:
|
||||
# Run on manual dispatch OR when a task-list checkbox in the sticky
|
||||
# lockfile-check comment flips from `[ ]` to `[x]`.
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'issue_comment'
|
||||
&& github.event.issue.pull_request != null
|
||||
&& contains(github.event.comment.body, '[x] **Apply lockfile fix**')
|
||||
&& !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
- name: Authorize & resolve PR
|
||||
id: resolve
|
||||
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
with:
|
||||
script: |
|
||||
// 1. Verify the actor has write access — applies to both checkbox
|
||||
// clicks and manual dispatch.
|
||||
const { data: perm } =
|
||||
await github.rest.repos.getCollaboratorPermissionLevel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
username: context.actor,
|
||||
});
|
||||
if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
|
||||
core.setFailed(
|
||||
`${context.actor} lacks write access (has: ${perm.permission})`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. Resolve which ref to check out.
|
||||
let prNumber = '';
|
||||
if (context.eventName === 'issue_comment') {
|
||||
prNumber = String(context.payload.issue.number);
|
||||
} else if (context.eventName === 'workflow_dispatch') {
|
||||
prNumber = context.payload.inputs.pr_number || '';
|
||||
}
|
||||
|
||||
if (!prNumber) {
|
||||
core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
|
||||
core.setOutput('repo', context.repo.repo);
|
||||
core.setOutput('owner', context.repo.owner);
|
||||
core.setOutput('pr', '');
|
||||
return;
|
||||
}
|
||||
|
||||
const { data: pr } = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: Number(prNumber),
|
||||
});
|
||||
core.setOutput('ref', pr.head.ref);
|
||||
core.setOutput('repo', pr.head.repo.name);
|
||||
core.setOutput('owner', pr.head.repo.owner.login);
|
||||
core.setOutput('pr', String(pr.number));
|
||||
|
||||
# Wipe the sticky lockfile-check comment to a "running" state as soon
|
||||
# as the job is authorized, so the user sees their click was picked up
|
||||
# before the ~minute of nix build work.
|
||||
- name: Mark sticky as running
|
||||
if: steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### 🔄 Applying lockfile fix…
|
||||
|
||||
Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
|
||||
ref: ${{ steps.resolve.outputs.ref }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): refresh npm lockfile hashes"
|
||||
git push
|
||||
|
||||
- name: Update sticky (applied)
|
||||
if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ✅ Lockfile fix applied
|
||||
|
||||
Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- name: Update sticky (already current)
|
||||
if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ✅ Lockfile hashes already current
|
||||
|
||||
Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
|
||||
|
||||
- name: Update sticky (failed)
|
||||
if: failure() && steps.resolve.outputs.pr != ''
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
number: ${{ steps.resolve.outputs.pr }}
|
||||
message: |
|
||||
### ❌ Lockfile fix failed
|
||||
|
||||
See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
|
||||
@@ -4,6 +4,15 @@ on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
paths:
|
||||
- 'flake.nix'
|
||||
- 'flake.lock'
|
||||
- 'nix/**'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
- 'hermes_cli/**'
|
||||
- 'run_agent.py'
|
||||
- 'acp_adapter/**'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -20,8 +29,9 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: ./.github/actions/nix-setup
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
- name: Check flake
|
||||
if: runner.os == 'Linux'
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
@@ -3,31 +3,14 @@ name: Supply Chain Audit
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- '**/*.py'
|
||||
- '**/*.pth'
|
||||
- '**/setup.py'
|
||||
- '**/setup.cfg'
|
||||
- '**/sitecustomize.py'
|
||||
- '**/usercustomize.py'
|
||||
- '**/__init__.pth'
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
# Narrow, high-signal scanner. Only fires on critical indicators of supply
|
||||
# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
|
||||
# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
|
||||
# Actions version unpinning, outbound POST/PUT) were intentionally
|
||||
# removed — they fired on nearly every PR and trained reviewers to ignore
|
||||
# the scanner. Keep this file's checks ruthlessly narrow: if you find
|
||||
# yourself adding WARNING-tier patterns here again, make a separate
|
||||
# advisory-only workflow instead.
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan PR for critical supply chain risks
|
||||
name: Scan PR for supply chain risks
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -35,7 +18,7 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Scan diff for critical patterns
|
||||
- name: Scan diff for suspicious patterns
|
||||
id: scan
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -45,19 +28,19 @@ jobs:
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
HEAD="${{ github.event.pull_request.head.sha }}"
|
||||
|
||||
# Added lines only, excluding lockfiles.
|
||||
# Get the full diff (added lines only)
|
||||
DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
|
||||
|
||||
FINDINGS=""
|
||||
CRITICAL=false
|
||||
|
||||
# --- .pth files (auto-execute on Python startup) ---
|
||||
# The exact mechanism used in the litellm supply chain attack:
|
||||
# https://github.com/BerriAI/litellm/issues/24512
|
||||
PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
|
||||
if [ -n "$PTH_FILES" ]; then
|
||||
CRITICAL=true
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: .pth file added or modified
|
||||
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required.
|
||||
Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512).
|
||||
|
||||
**Files:**
|
||||
\`\`\`
|
||||
@@ -66,12 +49,13 @@ jobs:
|
||||
"
|
||||
fi
|
||||
|
||||
# --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
|
||||
# --- base64 + exec/eval combo (the litellm attack pattern) ---
|
||||
B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
|
||||
if [ -n "$B64_EXEC_HITS" ]; then
|
||||
CRITICAL=true
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: base64 decode + exec/eval combo
|
||||
Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads.
|
||||
This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
@@ -80,12 +64,41 @@ jobs:
|
||||
"
|
||||
fi
|
||||
|
||||
# --- subprocess with encoded/obfuscated command argument ---
|
||||
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
|
||||
# --- base64 decode/encode (alone — legitimate uses exist) ---
|
||||
B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true)
|
||||
if [ -n "$B64_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: base64 encoding/decoding detected
|
||||
Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate.
|
||||
|
||||
**Matches (first 20):**
|
||||
\`\`\`
|
||||
${B64_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- exec/eval with string arguments ---
|
||||
EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true)
|
||||
if [ -n "$EXEC_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: exec() or eval() usage
|
||||
Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches.
|
||||
|
||||
**Matches (first 20):**
|
||||
\`\`\`
|
||||
${EXEC_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- subprocess with encoded/obfuscated commands ---
|
||||
PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true)
|
||||
if [ -n "$PROC_HITS" ]; then
|
||||
CRITICAL=true
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: subprocess with encoded/obfuscated command
|
||||
Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution.
|
||||
Subprocess calls with encoded arguments are a strong indicator of payload execution.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
@@ -94,12 +107,25 @@ jobs:
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
|
||||
# These execute during pip install or interpreter startup.
|
||||
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
|
||||
# --- Network calls to non-standard domains ---
|
||||
EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true)
|
||||
if [ -n "$EXFIL_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: Outbound network calls (POST/PUT)
|
||||
Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate.
|
||||
|
||||
**Matches (first 10):**
|
||||
\`\`\`
|
||||
${EXFIL_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- setup.py / setup.cfg install hooks ---
|
||||
SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true)
|
||||
if [ -n "$SETUP_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### 🚨 CRITICAL: Install-hook file added or modified
|
||||
### ⚠️ WARNING: Install hook files modified
|
||||
These files can execute code during package installation or interpreter startup.
|
||||
|
||||
**Files:**
|
||||
@@ -109,31 +135,114 @@ jobs:
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Compile/marshal/pickle (code object injection) ---
|
||||
MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true)
|
||||
if [ -n "$MARSHAL_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: marshal/pickle/compile usage
|
||||
These can deserialize or construct executable code objects.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
${MARSHAL_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- CI/CD workflow files modified ---
|
||||
WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true)
|
||||
if [ -n "$WORKFLOW_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: CI/CD workflow files modified
|
||||
Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added.
|
||||
|
||||
**Files:**
|
||||
\`\`\`
|
||||
${WORKFLOW_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Dockerfile / container build files modified ---
|
||||
DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true)
|
||||
if [ -n "$DOCKER_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: Container build files modified
|
||||
Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands.
|
||||
|
||||
**Files:**
|
||||
\`\`\`
|
||||
${DOCKER_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Dependency manifest files modified ---
|
||||
DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true)
|
||||
if [ -n "$DEP_HITS" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: Dependency manifest files modified
|
||||
Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources.
|
||||
|
||||
**Files:**
|
||||
\`\`\`
|
||||
${DEP_HITS}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- GitHub Actions version unpinning (mutable tags instead of SHAs) ---
|
||||
ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true)
|
||||
if [ -n "$ACTIONS_UNPIN" ]; then
|
||||
FINDINGS="${FINDINGS}
|
||||
### ⚠️ WARNING: GitHub Actions with mutable version tags
|
||||
Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised.
|
||||
|
||||
**Matches:**
|
||||
\`\`\`
|
||||
${ACTIONS_UNPIN}
|
||||
\`\`\`
|
||||
"
|
||||
fi
|
||||
|
||||
# --- Output results ---
|
||||
if [ -n "$FINDINGS" ]; then
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
if [ "$CRITICAL" = true ]; then
|
||||
echo "critical=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "critical=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
# Write findings to a file (multiline env vars are fragile)
|
||||
echo "$FINDINGS" > /tmp/findings.md
|
||||
else
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
echo "critical=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Post critical finding comment
|
||||
- name: Post warning comment
|
||||
if: steps.scan.outputs.found == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
BODY="## 🚨 CRITICAL Supply Chain Risk Detected
|
||||
SEVERITY="⚠️ Supply Chain Risk Detected"
|
||||
if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
|
||||
SEVERITY="🚨 CRITICAL Supply Chain Risk Detected"
|
||||
fi
|
||||
|
||||
This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging.
|
||||
BODY="## ${SEVERITY}
|
||||
|
||||
This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging.
|
||||
|
||||
$(cat /tmp/findings.md)
|
||||
|
||||
---
|
||||
*Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*"
|
||||
*Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"
|
||||
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"
|
||||
|
||||
- name: Fail on critical findings
|
||||
if: steps.scan.outputs.found == 'true'
|
||||
if: steps.scan.outputs.critical == 'true'
|
||||
run: |
|
||||
echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
|
||||
exit 1
|
||||
|
||||
@@ -3,14 +3,8 @@ name: Tests
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '**/*.md'
|
||||
- 'docs/**'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '**/*.md'
|
||||
- 'docs/**'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -23,7 +17,7 @@ concurrency:
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
.DS_Store
|
||||
/venv/
|
||||
/_pycache/
|
||||
*.pyc*
|
||||
@@ -55,11 +54,6 @@ environments/benchmarks/evals/
|
||||
# Web UI build output
|
||||
hermes_cli/web_dist/
|
||||
|
||||
# Web UI assets — synced from @nous-research/ui at build time via
|
||||
# `npm run sync-assets` (see web/package.json).
|
||||
web/public/fonts/
|
||||
web/public/ds-assets/
|
||||
|
||||
# Release script temp files
|
||||
.release_notes.md
|
||||
mini-swe-agent/
|
||||
|
||||
@@ -69,7 +69,7 @@ hermes-agent/
|
||||
│ ├── server.py # RPC handlers and session logic
|
||||
│ ├── render.py # Optional rich/ANSI bridge
|
||||
│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands
|
||||
├── hermes_agent/acp/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration)
|
||||
├── cron/ # Scheduler (jobs.py, scheduler.py)
|
||||
├── environments/ # RL training environments (Atropos)
|
||||
├── tests/ # Pytest suite (~3000 tests)
|
||||
@@ -566,52 +566,3 @@ python -m pytest tests/ -q -n 4
|
||||
Worker count above 4 will surface test-ordering flakes that CI never sees.
|
||||
|
||||
Always run the full suite before pushing changes.
|
||||
|
||||
### Don't write change-detector tests
|
||||
|
||||
A test is a **change-detector** if it fails whenever data that is **expected
|
||||
to change** gets updated — model catalogs, config version numbers,
|
||||
enumeration counts, hardcoded lists of provider models. These tests add no
|
||||
behavioral coverage; they just guarantee that routine source updates break
|
||||
CI and cost engineering time to "fix."
|
||||
|
||||
**Do not write:**
|
||||
|
||||
```python
|
||||
# catalog snapshot — breaks every model release
|
||||
assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
|
||||
assert "MiniMax-M2.7" in models
|
||||
|
||||
# config version literal — breaks every schema bump
|
||||
assert DEFAULT_CONFIG["_config_version"] == 21
|
||||
|
||||
# enumeration count — breaks every time a skill/provider is added
|
||||
assert len(_PROVIDER_MODELS["huggingface"]) == 8
|
||||
```
|
||||
|
||||
**Do write:**
|
||||
|
||||
```python
|
||||
# behavior: does the catalog plumbing work at all?
|
||||
assert "gemini" in _PROVIDER_MODELS
|
||||
assert len(_PROVIDER_MODELS["gemini"]) >= 1
|
||||
|
||||
# behavior: does migration bump the user's version to current latest?
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
|
||||
# invariant: no plan-only model leaks into the legacy list
|
||||
assert not (set(moonshot_models) & coding_plan_only_models)
|
||||
|
||||
# invariant: every model in the catalog has a context-length entry
|
||||
for m in _PROVIDER_MODELS["huggingface"]:
|
||||
assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
|
||||
```
|
||||
|
||||
The rule: if the test reads like a snapshot of current data, delete it. If
|
||||
it reads like a contract about how two pieces of data must relate, keep it.
|
||||
When a PR adds a new provider/model and you want a test, make the test
|
||||
assert the relationship (e.g. "catalog entries all have context lengths"),
|
||||
not the specific names.
|
||||
|
||||
Reviewers should reject new change-detector tests; authors should convert
|
||||
them into invariants before re-requesting review.
|
||||
|
||||
+3
-3
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite
|
||||
|
||||
| Requirement | Notes |
|
||||
|-------------|-------|
|
||||
| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
|
||||
| **Git** | With `--recurse-submodules` support |
|
||||
| **Python 3.11+** | uv will install it if missing |
|
||||
| **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
|
||||
| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
|
||||
| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
|
||||
|
||||
### Clone and install
|
||||
|
||||
@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
|
||||
touch ~/.hermes/.env
|
||||
|
||||
# Add at minimum an LLM provider key:
|
||||
echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
|
||||
echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
+3
-2
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
@@ -27,10 +27,12 @@ WORKDIR /opt/hermes
|
||||
# Copy only package manifests first so npm install + Playwright are cached
|
||||
# unless the lockfiles themselves change.
|
||||
COPY package.json package-lock.json ./
|
||||
COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
|
||||
COPY web/package.json web/package-lock.json web/
|
||||
|
||||
RUN npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
(cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
|
||||
(cd web && npm install --prefer-offline --no-audit) && \
|
||||
npm cache clean --force
|
||||
|
||||
@@ -50,6 +52,5 @@ RUN uv venv && \
|
||||
# ---------- Runtime ----------
|
||||
ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
ENV HERMES_HOME=/opt/data
|
||||
ENV PATH="/opt/data/.local/bin:${PATH}"
|
||||
VOLUME [ "/opt/data" ]
|
||||
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
|
||||
|
||||
@@ -173,6 +173,7 @@ python -m pytest tests/ -q
|
||||
- 💬 [Discord](https://discord.gg/NousResearch)
|
||||
- 📚 [Skills Hub](https://agentskills.io)
|
||||
- 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
|
||||
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
|
||||
- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.
|
||||
|
||||
---
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Allow running the ACP adapter as ``python -m acp_adapter``."""
|
||||
|
||||
from .entry import main
|
||||
|
||||
main()
|
||||
@@ -6,7 +6,7 @@ and starts the ACP agent server.
|
||||
|
||||
Usage::
|
||||
|
||||
python -m hermes_agent.acp.entry
|
||||
python -m acp_adapter.entry
|
||||
# or
|
||||
hermes acp
|
||||
# or
|
||||
@@ -16,49 +16,10 @@ Usage::
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
# Methods clients send as periodic liveness probes. They are not part of the
|
||||
# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the
|
||||
# caller — but the supervisor task that dispatches the request then surfaces
|
||||
# the raised RequestError via ``logging.exception("Background task failed")``,
|
||||
# which dumps a traceback to stderr every probe interval. Clients like
|
||||
# acp-bridge already treat the -32601 response as "agent alive", so the
|
||||
# traceback is pure noise. We keep the protocol response intact and only
|
||||
# silence the stderr noise for this specific benign case.
|
||||
_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"})
|
||||
|
||||
|
||||
class _BenignProbeMethodFilter(logging.Filter):
|
||||
"""Suppress acp 'Background task failed' tracebacks caused by unknown
|
||||
liveness-probe methods (e.g. ``ping``) while leaving every other
|
||||
background-task error — including method_not_found for any non-probe
|
||||
method — visible in stderr.
|
||||
"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
if record.getMessage() != "Background task failed":
|
||||
return True
|
||||
exc_info = record.exc_info
|
||||
if not exc_info:
|
||||
return True
|
||||
exc = exc_info[1]
|
||||
# Imported lazily so this module stays importable when the optional
|
||||
# ``agent-client-protocol`` dependency is not installed.
|
||||
try:
|
||||
from acp.exceptions import RequestError
|
||||
except ImportError:
|
||||
return True
|
||||
if not isinstance(exc, RequestError):
|
||||
return True
|
||||
if getattr(exc, "code", None) != -32601:
|
||||
return True
|
||||
data = getattr(exc, "data", None)
|
||||
method = data.get("method") if isinstance(data, dict) else None
|
||||
return method not in _BENIGN_PROBE_METHODS
|
||||
|
||||
|
||||
def _setup_logging() -> None:
|
||||
"""Route all logging to stderr so stdout stays clean for ACP stdio."""
|
||||
handler = logging.StreamHandler(sys.stderr)
|
||||
@@ -68,7 +29,6 @@ def _setup_logging() -> None:
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
)
|
||||
handler.addFilter(_BenignProbeMethodFilter())
|
||||
root = logging.getLogger()
|
||||
root.handlers.clear()
|
||||
root.addHandler(handler)
|
||||
@@ -103,8 +63,13 @@ def main() -> None:
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("Starting hermes-agent ACP adapter")
|
||||
|
||||
# Ensure the project root is on sys.path so ``from run_agent import AIAgent`` works
|
||||
project_root = str(Path(__file__).resolve().parent.parent)
|
||||
if project_root not in sys.path:
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
import acp
|
||||
from hermes_agent.acp.server import HermesACPAgent
|
||||
from .server import HermesACPAgent
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
@@ -15,7 +15,7 @@ from typing import Any, Callable, Deque, Dict
|
||||
|
||||
import acp
|
||||
|
||||
from hermes_agent.acp.tools import (
|
||||
from .tools import (
|
||||
build_tool_complete,
|
||||
build_tool_start,
|
||||
make_tool_call_id,
|
||||
@@ -63,9 +63,6 @@ def make_approval_callback(
|
||||
logger.warning("Permission request timed out or failed: %s", exc)
|
||||
return "deny"
|
||||
|
||||
if response is None:
|
||||
return "deny"
|
||||
|
||||
outcome = response.outcome
|
||||
if isinstance(outcome, AllowedOutcome):
|
||||
option_id = outcome.option_id
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, Deque, Optional
|
||||
@@ -52,15 +51,15 @@ try:
|
||||
except ImportError:
|
||||
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
|
||||
|
||||
from hermes_agent.acp.auth import detect_provider
|
||||
from hermes_agent.acp.events import (
|
||||
from acp_adapter.auth import detect_provider, has_provider
|
||||
from acp_adapter.events import (
|
||||
make_message_cb,
|
||||
make_step_cb,
|
||||
make_thinking_cb,
|
||||
make_tool_progress_cb,
|
||||
)
|
||||
from hermes_agent.acp.permissions import make_approval_callback
|
||||
from hermes_agent.acp.session import SessionManager, SessionState
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -72,11 +71,6 @@ except Exception:
|
||||
# Thread pool for running AIAgent (synchronous) in parallel.
|
||||
_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
|
||||
|
||||
# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
|
||||
# does not expose a client-side limit, so this is a fixed cap that clients
|
||||
# paginate against using `cursor` / `next_cursor`.
|
||||
_LIST_SESSIONS_PAGE_SIZE = 50
|
||||
|
||||
|
||||
def _extract_text(
|
||||
prompt: list[
|
||||
@@ -357,18 +351,9 @@ class HermesACPAgent(acp.Agent):
|
||||
)
|
||||
|
||||
async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
|
||||
# Only accept authenticate() calls whose method_id matches the
|
||||
# provider we advertised in initialize(). Without this check,
|
||||
# authenticate() would acknowledge any method_id as long as the
|
||||
# server has provider credentials configured — harmless under
|
||||
# Hermes' threat model (ACP is stdio-only, local-trust), but poor
|
||||
# API hygiene and confusing if ACP ever grows multi-method auth.
|
||||
provider = detect_provider()
|
||||
if not provider:
|
||||
return None
|
||||
if not isinstance(method_id, str) or method_id.strip().lower() != provider:
|
||||
return None
|
||||
return AuthenticateResponse()
|
||||
if has_provider():
|
||||
return AuthenticateResponse()
|
||||
return None
|
||||
|
||||
# ---- Session management -------------------------------------------------
|
||||
|
||||
@@ -452,28 +437,7 @@ class HermesACPAgent(acp.Agent):
|
||||
cwd: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> ListSessionsResponse:
|
||||
"""List ACP sessions with optional ``cwd`` filtering and cursor pagination.
|
||||
|
||||
``cwd`` is passed through to ``SessionManager.list_sessions`` which already
|
||||
normalizes and filters by working directory. ``cursor`` is a ``session_id``
|
||||
previously returned as ``next_cursor``; results resume after that entry.
|
||||
Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
|
||||
results remain, ``next_cursor`` is set to the last returned ``session_id``.
|
||||
"""
|
||||
infos = self.session_manager.list_sessions(cwd=cwd)
|
||||
|
||||
if cursor:
|
||||
for idx, s in enumerate(infos):
|
||||
if s["session_id"] == cursor:
|
||||
infos = infos[idx + 1:]
|
||||
break
|
||||
else:
|
||||
# Unknown cursor -> empty page (do not fall back to full list).
|
||||
infos = []
|
||||
|
||||
has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
|
||||
infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
|
||||
|
||||
sessions = []
|
||||
for s in infos:
|
||||
updated_at = s.get("updated_at")
|
||||
@@ -487,9 +451,7 @@ class HermesACPAgent(acp.Agent):
|
||||
updated_at=updated_at,
|
||||
)
|
||||
)
|
||||
|
||||
next_cursor = sessions[-1].session_id if has_more and sessions else None
|
||||
return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
|
||||
return ListSessionsResponse(sessions=sessions)
|
||||
|
||||
# ---- Prompt (core) ------------------------------------------------------
|
||||
|
||||
@@ -555,32 +517,15 @@ class HermesACPAgent(acp.Agent):
|
||||
agent.step_callback = step_cb
|
||||
agent.message_callback = message_cb
|
||||
|
||||
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
|
||||
# Set it INSIDE _run_agent so the TLS write happens in the executor
|
||||
# thread — setting it here would write to the event-loop thread's TLS,
|
||||
# not the executor's. Also set HERMES_INTERACTIVE so approval.py
|
||||
# takes the CLI-interactive path (which calls the registered
|
||||
# callback via prompt_dangerous_approval) instead of the
|
||||
# non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
|
||||
# ACP's conn.request_permission maps cleanly to the interactive
|
||||
# callback shape — not the gateway-queue HERMES_EXEC_ASK path,
|
||||
# which requires a notify_cb registered in _gateway_notify_cbs.
|
||||
previous_approval_cb = None
|
||||
previous_interactive = None
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
|
||||
_terminal_tool.set_approval_callback(approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not set ACP approval callback", exc_info=True)
|
||||
|
||||
def _run_agent() -> dict:
|
||||
nonlocal previous_approval_cb, previous_interactive
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
previous_approval_cb = _terminal_tool._get_approval_callback()
|
||||
_terminal_tool.set_approval_callback(approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not set ACP approval callback", exc_info=True)
|
||||
# Signal to tools.approval that we have an interactive callback
|
||||
# and the non-interactive auto-approve path must not fire.
|
||||
previous_interactive = os.environ.get("HERMES_INTERACTIVE")
|
||||
os.environ["HERMES_INTERACTIVE"] = "1"
|
||||
try:
|
||||
result = agent.run_conversation(
|
||||
user_message=user_text,
|
||||
@@ -592,11 +537,6 @@ class HermesACPAgent(acp.Agent):
|
||||
logger.exception("Agent error in session %s", session_id)
|
||||
return {"final_response": f"Error: {e}", "messages": state.history}
|
||||
finally:
|
||||
# Restore HERMES_INTERACTIVE.
|
||||
if previous_interactive is None:
|
||||
os.environ.pop("HERMES_INTERACTIVE", None)
|
||||
else:
|
||||
os.environ["HERMES_INTERACTIVE"] = previous_interactive
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
@@ -673,8 +613,8 @@ class HermesACPAgent(acp.Agent):
|
||||
await self._conn.session_update(
|
||||
session_id=session_id,
|
||||
update=AvailableCommandsUpdate(
|
||||
session_update="available_commands_update",
|
||||
available_commands=self._available_commands(),
|
||||
sessionUpdate="available_commands_update",
|
||||
availableCommands=self._available_commands(),
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
@@ -1,326 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
|
||||
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountUsageWindow:
|
||||
label: str
|
||||
used_percent: Optional[float] = None
|
||||
reset_at: Optional[datetime] = None
|
||||
detail: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountUsageSnapshot:
|
||||
provider: str
|
||||
source: str
|
||||
fetched_at: datetime
|
||||
title: str = "Account limits"
|
||||
plan: Optional[str] = None
|
||||
windows: tuple[AccountUsageWindow, ...] = ()
|
||||
details: tuple[str, ...] = ()
|
||||
unavailable_reason: Optional[str] = None
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return bool(self.windows or self.details) and not self.unavailable_reason
|
||||
|
||||
|
||||
def _title_case_slug(value: Optional[str]) -> Optional[str]:
|
||||
cleaned = str(value or "").strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
return cleaned.replace("_", " ").replace("-", " ").title()
|
||||
|
||||
|
||||
def _parse_dt(value: Any) -> Optional[datetime]:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return datetime.fromtimestamp(float(value), tz=timezone.utc)
|
||||
if isinstance(value, str):
|
||||
text = value.strip()
|
||||
if not text:
|
||||
return None
|
||||
if text.endswith("Z"):
|
||||
text = text[:-1] + "+00:00"
|
||||
try:
|
||||
dt = datetime.fromisoformat(text)
|
||||
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _format_reset(dt: Optional[datetime]) -> str:
|
||||
if not dt:
|
||||
return "unknown"
|
||||
local_dt = dt.astimezone()
|
||||
delta = dt - _utc_now()
|
||||
total_seconds = int(delta.total_seconds())
|
||||
if total_seconds <= 0:
|
||||
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
|
||||
hours, rem = divmod(total_seconds, 3600)
|
||||
minutes = rem // 60
|
||||
if hours >= 24:
|
||||
days, hours = divmod(hours, 24)
|
||||
rel = f"in {days}d {hours}h"
|
||||
elif hours > 0:
|
||||
rel = f"in {hours}h {minutes}m"
|
||||
else:
|
||||
rel = f"in {minutes}m"
|
||||
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
|
||||
|
||||
|
||||
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
|
||||
if not snapshot:
|
||||
return []
|
||||
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
|
||||
lines = [header]
|
||||
if snapshot.plan:
|
||||
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
|
||||
else:
|
||||
lines.append(f"Provider: {snapshot.provider}")
|
||||
for window in snapshot.windows:
|
||||
if window.used_percent is None:
|
||||
base = f"{window.label}: unavailable"
|
||||
else:
|
||||
remaining = max(0, round(100 - float(window.used_percent)))
|
||||
used = max(0, round(float(window.used_percent)))
|
||||
base = f"{window.label}: {remaining}% remaining ({used}% used)"
|
||||
if window.reset_at:
|
||||
base += f" • resets {_format_reset(window.reset_at)}"
|
||||
elif window.detail:
|
||||
base += f" • {window.detail}"
|
||||
lines.append(base)
|
||||
for detail in snapshot.details:
|
||||
lines.append(detail)
|
||||
if snapshot.unavailable_reason:
|
||||
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
|
||||
return lines
|
||||
|
||||
|
||||
def _resolve_codex_usage_url(base_url: str) -> str:
|
||||
normalized = (base_url or "").strip().rstrip("/")
|
||||
if not normalized:
|
||||
normalized = "https://chatgpt.com/backend-api/codex"
|
||||
if normalized.endswith("/codex"):
|
||||
normalized = normalized[: -len("/codex")]
|
||||
if "/backend-api" in normalized:
|
||||
return normalized + "/wham/usage"
|
||||
return normalized + "/api/codex/usage"
|
||||
|
||||
|
||||
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
|
||||
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
|
||||
token_data = _read_codex_tokens()
|
||||
tokens = token_data.get("tokens") or {}
|
||||
account_id = str(tokens.get("account_id", "") or "").strip() or None
|
||||
headers = {
|
||||
"Authorization": f"Bearer {creds['api_key']}",
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "codex-cli",
|
||||
}
|
||||
if account_id:
|
||||
headers["ChatGPT-Account-Id"] = account_id
|
||||
with httpx.Client(timeout=15.0) as client:
|
||||
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
rate_limit = payload.get("rate_limit") or {}
|
||||
windows: list[AccountUsageWindow] = []
|
||||
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
|
||||
window = rate_limit.get(key) or {}
|
||||
used = window.get("used_percent")
|
||||
if used is None:
|
||||
continue
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label=label,
|
||||
used_percent=float(used),
|
||||
reset_at=_parse_dt(window.get("reset_at")),
|
||||
)
|
||||
)
|
||||
details: list[str] = []
|
||||
credits = payload.get("credits") or {}
|
||||
if credits.get("has_credits"):
|
||||
balance = credits.get("balance")
|
||||
if isinstance(balance, (int, float)):
|
||||
details.append(f"Credits balance: ${float(balance):.2f}")
|
||||
elif credits.get("unlimited"):
|
||||
details.append("Credits balance: unlimited")
|
||||
return AccountUsageSnapshot(
|
||||
provider="openai-codex",
|
||||
source="usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
plan=_title_case_slug(payload.get("plan_type")),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
|
||||
token = (resolve_anthropic_token() or "").strip()
|
||||
if not token:
|
||||
return None
|
||||
if not _is_oauth_token(token):
|
||||
return AccountUsageSnapshot(
|
||||
provider="anthropic",
|
||||
source="oauth_usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
|
||||
)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
"anthropic-beta": "oauth-2025-04-20",
|
||||
"User-Agent": "claude-code/2.1.0",
|
||||
}
|
||||
with httpx.Client(timeout=15.0) as client:
|
||||
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
windows: list[AccountUsageWindow] = []
|
||||
mapping = (
|
||||
("five_hour", "Current session"),
|
||||
("seven_day", "Current week"),
|
||||
("seven_day_opus", "Opus week"),
|
||||
("seven_day_sonnet", "Sonnet week"),
|
||||
)
|
||||
for key, label in mapping:
|
||||
window = payload.get(key) or {}
|
||||
util = window.get("utilization")
|
||||
if util is None:
|
||||
continue
|
||||
used = float(util) * 100 if float(util) <= 1 else float(util)
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label=label,
|
||||
used_percent=used,
|
||||
reset_at=_parse_dt(window.get("resets_at")),
|
||||
)
|
||||
)
|
||||
details: list[str] = []
|
||||
extra = payload.get("extra_usage") or {}
|
||||
if extra.get("is_enabled"):
|
||||
used_credits = extra.get("used_credits")
|
||||
monthly_limit = extra.get("monthly_limit")
|
||||
currency = extra.get("currency") or "USD"
|
||||
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
|
||||
details.append(
|
||||
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
|
||||
)
|
||||
return AccountUsageSnapshot(
|
||||
provider="anthropic",
|
||||
source="oauth_usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested="openrouter",
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
token = str(runtime.get("api_key", "") or "").strip()
|
||||
if not token:
|
||||
return None
|
||||
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
|
||||
credits_url = f"{normalized}/credits"
|
||||
key_url = f"{normalized}/key"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
credits_resp = client.get(credits_url, headers=headers)
|
||||
credits_resp.raise_for_status()
|
||||
credits = (credits_resp.json() or {}).get("data") or {}
|
||||
try:
|
||||
key_resp = client.get(key_url, headers=headers)
|
||||
key_resp.raise_for_status()
|
||||
key_data = (key_resp.json() or {}).get("data") or {}
|
||||
except Exception:
|
||||
key_data = {}
|
||||
total_credits = float(credits.get("total_credits") or 0.0)
|
||||
total_usage = float(credits.get("total_usage") or 0.0)
|
||||
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
|
||||
windows: list[AccountUsageWindow] = []
|
||||
limit = key_data.get("limit")
|
||||
limit_remaining = key_data.get("limit_remaining")
|
||||
limit_reset = str(key_data.get("limit_reset") or "").strip()
|
||||
usage = key_data.get("usage")
|
||||
if (
|
||||
isinstance(limit, (int, float))
|
||||
and float(limit) > 0
|
||||
and isinstance(limit_remaining, (int, float))
|
||||
and 0 <= float(limit_remaining) <= float(limit)
|
||||
):
|
||||
limit_value = float(limit)
|
||||
remaining_value = float(limit_remaining)
|
||||
used_percent = ((limit_value - remaining_value) / limit_value) * 100
|
||||
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
|
||||
if limit_reset:
|
||||
detail_parts.append(f"resets {limit_reset}")
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label="API key quota",
|
||||
used_percent=used_percent,
|
||||
detail=" • ".join(detail_parts),
|
||||
)
|
||||
)
|
||||
if isinstance(usage, (int, float)):
|
||||
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
|
||||
for value, label in (
|
||||
(key_data.get("usage_daily"), "today"),
|
||||
(key_data.get("usage_weekly"), "this week"),
|
||||
(key_data.get("usage_monthly"), "this month"),
|
||||
):
|
||||
if isinstance(value, (int, float)) and float(value) > 0:
|
||||
usage_parts.append(f"${float(value):.2f} {label}")
|
||||
details.append(" • ".join(usage_parts))
|
||||
return AccountUsageSnapshot(
|
||||
provider="openrouter",
|
||||
source="credits_api",
|
||||
fetched_at=_utc_now(),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def fetch_account_usage(
|
||||
provider: Optional[str],
|
||||
*,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> Optional[AccountUsageSnapshot]:
|
||||
normalized = str(provider or "").strip().lower()
|
||||
if normalized in {"", "auto", "custom"}:
|
||||
return None
|
||||
try:
|
||||
if normalized == "openai-codex":
|
||||
return _fetch_codex_account_usage()
|
||||
if normalized == "anthropic":
|
||||
return _fetch_anthropic_account_usage()
|
||||
if normalized == "openrouter":
|
||||
return _fetch_openrouter_account_usage(base_url, api_key)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
+74
-155
@@ -17,8 +17,8 @@ import os
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from utils import normalize_proxy_env_vars
|
||||
|
||||
try:
|
||||
import anthropic as _anthropic_sdk
|
||||
@@ -116,63 +116,6 @@ def _get_anthropic_max_output(model: str) -> int:
|
||||
return best_val
|
||||
|
||||
|
||||
def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
|
||||
"""Return ``value`` floored to a positive int, or ``None`` if it is not a
|
||||
finite positive number. Ported from openclaw/openclaw#66664.
|
||||
|
||||
Anthropic's Messages API rejects ``max_tokens`` values that are 0,
|
||||
negative, non-integer, or non-finite with HTTP 400. Python's ``or``
|
||||
idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
|
||||
negative ints and fractional floats (``-1``, ``0.5``) through to the
|
||||
API, producing a user-visible failure instead of a local error.
|
||||
"""
|
||||
# Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
|
||||
# silently become 1 and ``False`` doesn't become 0.
|
||||
if isinstance(value, bool):
|
||||
return None
|
||||
if not isinstance(value, (int, float)):
|
||||
return None
|
||||
try:
|
||||
import math
|
||||
if not math.isfinite(value):
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
floored = int(value) # truncates toward zero for floats
|
||||
return floored if floored > 0 else None
|
||||
|
||||
|
||||
def _resolve_anthropic_messages_max_tokens(
|
||||
requested,
|
||||
model: str,
|
||||
context_length: Optional[int] = None,
|
||||
) -> int:
|
||||
"""Resolve the ``max_tokens`` budget for an Anthropic Messages call.
|
||||
|
||||
Prefers ``requested`` when it is a positive finite number; otherwise
|
||||
falls back to the model's output ceiling. Raises ``ValueError`` if no
|
||||
positive budget can be resolved (should not happen with current model
|
||||
table defaults, but guards against a future regression where
|
||||
``_get_anthropic_max_output`` could return ``0``).
|
||||
|
||||
Separately, callers apply a context-window clamp — this resolver does
|
||||
not, to keep the positive-value contract independent of endpoint
|
||||
specifics.
|
||||
|
||||
Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
|
||||
"""
|
||||
resolved = _resolve_positive_anthropic_max_tokens(requested)
|
||||
if resolved is not None:
|
||||
return resolved
|
||||
fallback = _get_anthropic_max_output(model)
|
||||
if fallback > 0:
|
||||
return fallback
|
||||
raise ValueError(
|
||||
f"Anthropic Messages adapter requires a positive max_tokens value for "
|
||||
f"model {model!r}; got {requested!r} and no model default resolved."
|
||||
)
|
||||
|
||||
|
||||
def _supports_adaptive_thinking(model: str) -> bool:
|
||||
"""Return True for Claude 4.6+ models that support adaptive thinking."""
|
||||
return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
|
||||
@@ -322,14 +265,6 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
return True # Any other endpoint is a third-party proxy
|
||||
|
||||
|
||||
def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
|
||||
"""Return True for Kimi's /coding endpoint that requires claude-code UA."""
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
|
||||
|
||||
|
||||
def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
"""Return True for Anthropic-compatible providers that require Bearer auth.
|
||||
|
||||
@@ -357,15 +292,9 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
|
||||
def build_anthropic_client(api_key: str, base_url: str = None):
|
||||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||||
|
||||
If *timeout* is provided it overrides the default 900s read timeout. The
|
||||
connect timeout stays at 10s. Callers pass this from the per-provider /
|
||||
per-model ``request_timeout_seconds`` config so Anthropic-native and
|
||||
Anthropic-compatible providers respect the same knob as OpenAI-wire
|
||||
providers.
|
||||
|
||||
Returns an anthropic.Anthropic instance.
|
||||
"""
|
||||
if _anthropic_sdk is None:
|
||||
@@ -373,32 +302,19 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
"The 'anthropic' package is required for the Anthropic provider. "
|
||||
"Install it with: pip install 'anthropic>=0.39.0'"
|
||||
)
|
||||
|
||||
normalize_proxy_env_vars()
|
||||
|
||||
from httpx import Timeout
|
||||
|
||||
normalized_base_url = _normalize_base_url_text(base_url)
|
||||
_read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
|
||||
kwargs = {
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
"timeout": Timeout(timeout=900.0, connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
|
||||
# to be recognized as a valid Coding Agent. Without it, returns 403.
|
||||
# Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
|
||||
kwargs["api_key"] = api_key
|
||||
kwargs["default_headers"] = {
|
||||
"User-Agent": "claude-code/0.1.0",
|
||||
**( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
|
||||
}
|
||||
elif _requires_bearer_auth(normalized_base_url):
|
||||
if _requires_bearer_auth(normalized_base_url):
|
||||
# Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
|
||||
# Authorization: Bearer *** for regular API keys. Route those endpoints
|
||||
# Authorization: Bearer even for regular API keys. Route those endpoints
|
||||
# through auth_token so the SDK sends Bearer auth instead of x-api-key.
|
||||
# Check this before OAuth token shape detection because MiniMax secrets do
|
||||
# not use Anthropic's sk-ant-api prefix and would otherwise be misread as
|
||||
@@ -1139,31 +1055,6 @@ def convert_messages_to_anthropic(
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
@@ -1319,7 +1210,6 @@ def convert_messages_to_anthropic(
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
_is_kimi = _is_kimi_coding_endpoint(base_url)
|
||||
|
||||
last_assistant_idx = None
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
@@ -1331,25 +1221,7 @@ def convert_messages_to_anthropic(
|
||||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||||
continue
|
||||
|
||||
if _is_kimi:
|
||||
# Kimi's /coding endpoint enables thinking server-side and
|
||||
# requires unsigned thinking blocks on replayed assistant
|
||||
# tool-call messages. Strip signed Anthropic blocks (Kimi
|
||||
# can't validate signatures) but preserve the unsigned ones
|
||||
# we synthesised from reasoning_content above.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Anthropic-signed block — Kimi can't validate, strip
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: Kimi needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
if _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
@@ -1447,12 +1319,7 @@ def build_anthropic_kwargs(
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
# effective_max_tokens = output cap for this call (≠ total context window)
|
||||
# Use the resolver helper so non-positive values (negative ints,
|
||||
# fractional floats, NaN, non-numeric) fail locally with a clear error
|
||||
# rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
|
||||
effective_max_tokens = _resolve_anthropic_messages_max_tokens(
|
||||
max_tokens, model, context_length=context_length
|
||||
)
|
||||
effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
|
||||
|
||||
# Clamp output cap to fit inside the total context window.
|
||||
# Only matters for small custom endpoints where context_length < native
|
||||
@@ -1531,25 +1398,11 @@ def build_anthropic_kwargs(
|
||||
# MiniMax Anthropic-compat endpoints support thinking (manual mode only,
|
||||
# not adaptive). Haiku does NOT support extended thinking — skip entirely.
|
||||
#
|
||||
# Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
|
||||
# its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
|
||||
# validates the message history and requires every prior assistant
|
||||
# tool-call message to carry OpenAI-style ``reasoning_content``. The
|
||||
# Anthropic path never populates that field, and
|
||||
# ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
|
||||
# on third-party endpoints — so the request fails with HTTP 400
|
||||
# "thinking is enabled but reasoning_content is missing in assistant
|
||||
# tool call message at index N". Kimi's reasoning is driven server-side
|
||||
# on the /coding route, so skip Anthropic's thinking parameter entirely
|
||||
# for that host. (Kimi on chat_completions enables thinking via
|
||||
# extra_body in the ChatCompletionsTransport — see #13503.)
|
||||
#
|
||||
# On 4.7+ the `thinking.display` field defaults to "omitted", which
|
||||
# silently hides reasoning text that Hermes surfaces in its CLI. We
|
||||
# request "summarized" so the reasoning blocks stay populated — matching
|
||||
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
|
||||
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
|
||||
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||
budget = THINKING_BUDGET.get(effort, 8000)
|
||||
@@ -1598,4 +1451,70 @@ def build_anthropic_kwargs(
|
||||
return kwargs
|
||||
|
||||
|
||||
def normalize_anthropic_response(
|
||||
response,
|
||||
strip_tool_prefix: bool = False,
|
||||
) -> Tuple[SimpleNamespace, str]:
|
||||
"""Normalize Anthropic response to match the shape expected by AIAgent.
|
||||
|
||||
Returns (assistant_message, finish_reason) where assistant_message has
|
||||
.content, .tool_calls, and .reasoning attributes.
|
||||
|
||||
When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
|
||||
added to tool names for OAuth Claude Code compatibility.
|
||||
"""
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
reasoning_details = []
|
||||
tool_calls = []
|
||||
|
||||
for block in response.content:
|
||||
if block.type == "text":
|
||||
text_parts.append(block.text)
|
||||
elif block.type == "thinking":
|
||||
reasoning_parts.append(block.thinking)
|
||||
block_dict = _to_plain_data(block)
|
||||
if isinstance(block_dict, dict):
|
||||
reasoning_details.append(block_dict)
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
|
||||
name = name[len(_MCP_TOOL_PREFIX):]
|
||||
tool_calls.append(
|
||||
SimpleNamespace(
|
||||
id=block.id,
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=name,
|
||||
arguments=json.dumps(block.input),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# Map Anthropic stop_reason to OpenAI finish_reason.
|
||||
# Newer stop reasons added in Claude 4.5+ / 4.7:
|
||||
# - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
|
||||
# - model_context_window_exceeded: hit context limit (not max_tokens)
|
||||
# Both need distinct handling upstream — a refusal should surface to the
|
||||
# user with a clear message, and a context-window overflow should trigger
|
||||
# compression/truncation rather than be treated as normal end-of-turn.
|
||||
stop_reason_map = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"refusal": "content_filter",
|
||||
"model_context_window_exceeded": "length",
|
||||
}
|
||||
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
|
||||
|
||||
return (
|
||||
SimpleNamespace(
|
||||
content="\n".join(text_parts) if text_parts else None,
|
||||
tool_calls=tool_calls or None,
|
||||
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=reasoning_details or None,
|
||||
),
|
||||
finish_reason,
|
||||
)
|
||||
|
||||
+128
-444
@@ -48,7 +48,6 @@ from openai import OpenAI
|
||||
from agent.credential_pool import load_pool
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -96,37 +95,51 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
|
||||
return _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
|
||||
# Sentinel: when returned by _fixed_temperature_for_model(), callers must
|
||||
# strip the ``temperature`` key from API kwargs entirely so the provider's
|
||||
# server-side default applies. Kimi/Moonshot models manage temperature
|
||||
# internally — sending *any* value (even the "correct" one) can conflict
|
||||
# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
|
||||
OMIT_TEMPERATURE: object = object()
|
||||
_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
|
||||
"kimi-for-coding": 0.6,
|
||||
}
|
||||
|
||||
# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
|
||||
# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
|
||||
# value 0.6. Any other value will result in an error." The same lock applies
|
||||
# to the other k2.* models served on that endpoint. Enumerated explicitly so
|
||||
# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
|
||||
# the standard chat API and third parties) are NOT clamped.
|
||||
# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
|
||||
_KIMI_INSTANT_MODELS: frozenset = frozenset({
|
||||
"kimi-k2.5",
|
||||
"kimi-k2-turbo-preview",
|
||||
"kimi-k2-0905-preview",
|
||||
})
|
||||
_KIMI_THINKING_MODELS: frozenset = frozenset({
|
||||
"kimi-k2-thinking",
|
||||
"kimi-k2-thinking-turbo",
|
||||
})
|
||||
|
||||
|
||||
def _is_kimi_model(model: Optional[str]) -> bool:
|
||||
"""True for any Kimi / Moonshot model that manages temperature server-side."""
|
||||
bare = (model or "").strip().lower().rsplit("/", 1)[-1]
|
||||
return bare.startswith("kimi-") or bare == "kimi"
|
||||
def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
|
||||
"""Return a required temperature override for models with strict contracts.
|
||||
|
||||
Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
|
||||
the k2.5 family. Non-thinking variants require exactly 0.6; thinking
|
||||
variants require 1.0. An optional ``vendor/`` prefix (e.g.
|
||||
``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
|
||||
|
||||
def _fixed_temperature_for_model(
|
||||
model: Optional[str],
|
||||
base_url: Optional[str] = None,
|
||||
) -> "Optional[float] | object":
|
||||
"""Return a temperature directive for models with strict contracts.
|
||||
|
||||
Returns:
|
||||
``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
|
||||
provider chooses its own default. Used for all Kimi / Moonshot
|
||||
models whose gateway selects temperature server-side.
|
||||
``float`` — a specific value the caller must use (reserved for future
|
||||
models with fixed-temperature contracts).
|
||||
``None`` — no override; caller should use its own default.
|
||||
Returns ``None`` for every other model, including ``kimi-k2-instruct*``
|
||||
which is the separate non-coding K2 family with variable temperature.
|
||||
"""
|
||||
if _is_kimi_model(model):
|
||||
logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
|
||||
return OMIT_TEMPERATURE
|
||||
normalized = (model or "").strip().lower()
|
||||
fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
|
||||
if fixed is not None:
|
||||
logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
|
||||
return fixed
|
||||
bare = normalized.rsplit("/", 1)[-1]
|
||||
if bare in _KIMI_THINKING_MODELS:
|
||||
logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
|
||||
return 1.0
|
||||
if bare in _KIMI_INSTANT_MODELS:
|
||||
logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
|
||||
return 0.6
|
||||
return None
|
||||
|
||||
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
||||
@@ -134,7 +147,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"gemini": "gemini-3-flash-preview",
|
||||
"zai": "glm-4.5-flash",
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
"stepfun": "step-3.5-flash",
|
||||
"kimi-coding-cn": "kimi-k2-turbo-preview",
|
||||
"minimax": "MiniMax-M2.7",
|
||||
"minimax-cn": "MiniMax-M2.7",
|
||||
@@ -162,16 +174,6 @@ _OR_HEADERS = {
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
|
||||
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
|
||||
# referrerUrl and X-Title maps to appName in the gateway's analytics.
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
||||
_AI_GATEWAY_HEADERS = {
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
"X-Title": "Hermes Agent",
|
||||
"User-Agent": f"HermesAgent/{_HERMES_VERSION}",
|
||||
}
|
||||
|
||||
# Nous Portal extra_body for product attribution.
|
||||
# Callers should pass this as extra_body in chat.completions.create()
|
||||
# when the auxiliary client is backed by Nous Portal.
|
||||
@@ -183,6 +185,8 @@ auxiliary_is_nous: bool = False
|
||||
# Default auxiliary models per provider
|
||||
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
|
||||
_NOUS_MODEL = "google/gemini-3-flash-preview"
|
||||
_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
|
||||
_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
|
||||
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
|
||||
_AUTH_JSON_PATH = get_hermes_home() / "auth.json"
|
||||
@@ -196,45 +200,6 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex"
|
||||
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
|
||||
def _codex_cloudflare_headers(access_token: str) -> Dict[str, str]:
|
||||
"""Headers required to avoid Cloudflare 403s on chatgpt.com/backend-api/codex.
|
||||
|
||||
The Cloudflare layer in front of the Codex endpoint whitelists a small set of
|
||||
first-party originators (``codex_cli_rs``, ``codex_vscode``, ``codex_sdk_ts``,
|
||||
anything starting with ``Codex``). Requests from non-residential IPs (VPS,
|
||||
server-hosted agents) that don't advertise an allowed originator are served
|
||||
a 403 with ``cf-mitigated: challenge`` regardless of auth correctness.
|
||||
|
||||
We pin ``originator: codex_cli_rs`` to match the upstream codex-rs CLI, set
|
||||
``User-Agent`` to a codex_cli_rs-shaped string (beats SDK fingerprinting),
|
||||
and extract ``ChatGPT-Account-ID`` (canonical casing, from codex-rs
|
||||
``auth.rs``) out of the OAuth JWT's ``chatgpt_account_id`` claim.
|
||||
|
||||
Malformed tokens are tolerated — we drop the account-ID header rather than
|
||||
raise, so a bad token still surfaces as an auth error (401) instead of a
|
||||
crash at client construction.
|
||||
"""
|
||||
headers = {
|
||||
"User-Agent": "codex_cli_rs/0.0.0 (Hermes Agent)",
|
||||
"originator": "codex_cli_rs",
|
||||
}
|
||||
if not isinstance(access_token, str) or not access_token.strip():
|
||||
return headers
|
||||
try:
|
||||
import base64
|
||||
parts = access_token.split(".")
|
||||
if len(parts) < 2:
|
||||
return headers
|
||||
payload_b64 = parts[1] + "=" * (-len(parts[1]) % 4)
|
||||
claims = json.loads(base64.urlsafe_b64decode(payload_b64))
|
||||
acct_id = claims.get("https://api.openai.com/auth", {}).get("chatgpt_account_id")
|
||||
if isinstance(acct_id, str) and acct_id:
|
||||
headers["ChatGPT-Account-ID"] = acct_id
|
||||
except Exception:
|
||||
pass
|
||||
return headers
|
||||
|
||||
|
||||
def _to_openai_base_url(base_url: str) -> str:
|
||||
"""Normalize an Anthropic-style base URL to OpenAI-compatible format.
|
||||
|
||||
@@ -573,8 +538,7 @@ class _AnthropicCompletionsAdapter:
|
||||
self._is_oauth = is_oauth
|
||||
|
||||
def create(self, **kwargs) -> Any:
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
from agent.transports import get_transport
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
|
||||
|
||||
messages = kwargs.get("messages", [])
|
||||
model = kwargs.get("model", self._model)
|
||||
@@ -611,19 +575,7 @@ class _AnthropicCompletionsAdapter:
|
||||
anthropic_kwargs["temperature"] = temperature
|
||||
|
||||
response = self._client.messages.create(**anthropic_kwargs)
|
||||
_transport = get_transport("anthropic_messages")
|
||||
_nr = _transport.normalize_response(
|
||||
response, strip_tool_prefix=self._is_oauth
|
||||
)
|
||||
|
||||
# ToolCall already duck-types as OpenAI shape (.type, .function.name,
|
||||
# .function.arguments) via properties, so no wrapping needed.
|
||||
assistant_message = SimpleNamespace(
|
||||
content=_nr.content,
|
||||
tool_calls=_nr.tool_calls,
|
||||
reasoning=_nr.reasoning,
|
||||
)
|
||||
finish_reason = _nr.finish_reason
|
||||
assistant_message, finish_reason = normalize_anthropic_response(response)
|
||||
|
||||
usage = None
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
@@ -740,33 +692,6 @@ def _nous_base_url() -> str:
|
||||
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
|
||||
|
||||
|
||||
def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
|
||||
"""Return fresh Nous runtime credentials when available.
|
||||
|
||||
This mirrors the main agent's 401 recovery path and keeps auxiliary
|
||||
clients aligned with the singleton auth store + mint flow instead of
|
||||
relying only on whatever raw tokens happen to be sitting in auth.json
|
||||
or the credential pool.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
force_mint=force_refresh,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
|
||||
return None
|
||||
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
|
||||
if not api_key or not base_url:
|
||||
return None
|
||||
return api_key, base_url
|
||||
|
||||
|
||||
def _read_codex_access_token() -> Optional[str]:
|
||||
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
|
||||
|
||||
@@ -850,15 +775,10 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
if model is None:
|
||||
continue # skip provider if we don't know a valid aux model
|
||||
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
|
||||
if provider_id == "gemini":
|
||||
from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
|
||||
|
||||
if is_native_gemini_base_url(base_url):
|
||||
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
|
||||
extra = {}
|
||||
if base_url_host_matches(base_url, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
@@ -876,15 +796,10 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
if model is None:
|
||||
continue # skip provider if we don't know a valid aux model
|
||||
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
||||
if provider_id == "gemini":
|
||||
from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
|
||||
|
||||
if is_native_gemini_base_url(base_url):
|
||||
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
|
||||
extra = {}
|
||||
if base_url_host_matches(base_url, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
@@ -933,50 +848,29 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
pass
|
||||
|
||||
nous = _read_nous_auth()
|
||||
runtime = _resolve_nous_runtime_api(force_refresh=False)
|
||||
if runtime is None and not nous:
|
||||
if not nous:
|
||||
return None, None
|
||||
global auxiliary_is_nous
|
||||
auxiliary_is_nous = True
|
||||
logger.debug("Auxiliary client: Nous Portal")
|
||||
|
||||
# Ask the Portal which model it currently recommends for this task type.
|
||||
# The /api/nous/recommended-models endpoint is the authoritative source:
|
||||
# it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
|
||||
# auto-detects the caller's tier via check_nous_free_tier(). Fall back to
|
||||
# _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
|
||||
# or returns a null recommendation for this task type.
|
||||
model = _NOUS_MODEL
|
||||
try:
|
||||
from hermes_cli.models import get_nous_recommended_aux_model
|
||||
recommended = get_nous_recommended_aux_model(vision=vision)
|
||||
if recommended:
|
||||
model = recommended
|
||||
logger.debug(
|
||||
"Auxiliary/%s: using Portal-recommended model %s",
|
||||
"vision" if vision else "text", model,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Auxiliary/%s: no Portal recommendation, falling back to %s",
|
||||
"vision" if vision else "text", model,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Auxiliary/%s: recommended-models lookup failed (%s); "
|
||||
"falling back to %s",
|
||||
"vision" if vision else "text", exc, model,
|
||||
)
|
||||
|
||||
if runtime is not None:
|
||||
api_key, base_url = runtime
|
||||
if nous.get("source") == "pool":
|
||||
model = "gemini-3-flash"
|
||||
else:
|
||||
api_key = _nous_api_key(nous or {})
|
||||
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
|
||||
model = _NOUS_MODEL
|
||||
# Free-tier users can't use paid auxiliary models — use the free
|
||||
# models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
|
||||
try:
|
||||
from hermes_cli.models import check_nous_free_tier
|
||||
if check_nous_free_tier():
|
||||
model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
|
||||
logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
|
||||
model, "vision" if vision else "text")
|
||||
except Exception:
|
||||
pass
|
||||
return (
|
||||
OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
api_key=_nous_api_key(nous),
|
||||
base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
|
||||
),
|
||||
model,
|
||||
)
|
||||
@@ -1054,7 +948,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
|
||||
return None, None, None
|
||||
|
||||
custom_base = custom_base.strip().rstrip("/")
|
||||
if base_url_host_matches(custom_base, "openrouter.ai"):
|
||||
if "openrouter.ai" in custom_base.lower():
|
||||
# requested='custom' falls back to OpenRouter when no custom endpoint is
|
||||
# configured. Treat that as "no custom endpoint" for auxiliary routing.
|
||||
return None, None, None
|
||||
@@ -1088,8 +982,6 @@ def _validate_proxy_env_urls() -> None:
|
||||
"""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
normalize_proxy_env_vars()
|
||||
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = str(os.environ.get(key) or "").strip()
|
||||
@@ -1124,7 +1016,7 @@ def _validate_base_url(base_url: str) -> None:
|
||||
) from exc
|
||||
|
||||
|
||||
def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
runtime = _resolve_custom_runtime()
|
||||
if len(runtime) == 2:
|
||||
custom_base, custom_key = runtime
|
||||
@@ -1140,23 +1032,6 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
if custom_mode == "codex_responses":
|
||||
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
if custom_mode == "anthropic_messages":
|
||||
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
|
||||
# LiteLLM proxies, etc.). Must NEVER be treated as OAuth —
|
||||
# Anthropic OAuth claims only apply to api.anthropic.com.
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
real_client = build_anthropic_client(custom_key, custom_base)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"Custom endpoint declares api_mode=anthropic_messages but the "
|
||||
"anthropic SDK is not installed — falling back to OpenAI-wire."
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return (
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
|
||||
@@ -1177,11 +1052,7 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
return None, None
|
||||
base_url = _CODEX_AUX_BASE_URL
|
||||
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
|
||||
real_client = OpenAI(
|
||||
api_key=codex_token,
|
||||
base_url=base_url,
|
||||
default_headers=_codex_cloudflare_headers(codex_token),
|
||||
)
|
||||
real_client = OpenAI(api_key=codex_token, base_url=base_url)
|
||||
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
|
||||
|
||||
|
||||
@@ -1320,15 +1191,6 @@ def _is_connection_error(exc: Exception) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _is_auth_error(exc: Exception) -> bool:
|
||||
"""Detect auth failures that should trigger provider-specific refresh."""
|
||||
status = getattr(exc, "status_code", None)
|
||||
if status == 401:
|
||||
return True
|
||||
err_lower = str(exc).lower()
|
||||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _try_payment_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
@@ -1486,13 +1348,6 @@ def _to_async_client(sync_client, model: str):
|
||||
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||
if isinstance(sync_client, AnthropicAuxiliaryClient):
|
||||
return AsyncAnthropicAuxiliaryClient(sync_client), model
|
||||
try:
|
||||
from agent.gemini_native_adapter import GeminiNativeClient, AsyncGeminiNativeClient
|
||||
|
||||
if isinstance(sync_client, GeminiNativeClient):
|
||||
return AsyncGeminiNativeClient(sync_client), model
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
from agent.copilot_acp_client import CopilotACPClient
|
||||
if isinstance(sync_client, CopilotACPClient):
|
||||
@@ -1504,15 +1359,15 @@ def _to_async_client(sync_client, model: str):
|
||||
"api_key": sync_client.api_key,
|
||||
"base_url": str(sync_client.base_url),
|
||||
}
|
||||
sync_base_url = str(sync_client.base_url)
|
||||
if base_url_host_matches(sync_base_url, "openrouter.ai"):
|
||||
base_lower = str(sync_client.base_url).lower()
|
||||
if "openrouter" in base_lower:
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
|
||||
elif "api.githubcopilot.com" in base_lower:
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
async_kwargs["default_headers"] = copilot_default_headers()
|
||||
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
|
||||
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif "api.kimi.com" in base_lower:
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
@@ -1588,7 +1443,8 @@ def resolve_provider_client(
|
||||
# Auto-detect: api.openai.com + codex model name pattern
|
||||
if api_mode and api_mode != "codex_responses":
|
||||
return False # explicit non-codex mode
|
||||
if base_url_hostname(base_url_str) == "api.openai.com":
|
||||
normalized_base = (base_url_str or "").strip().lower()
|
||||
if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
|
||||
model_lower = (model_str or "").lower()
|
||||
if "codex" in model_lower:
|
||||
return True
|
||||
@@ -1636,13 +1492,7 @@ def resolve_provider_client(
|
||||
|
||||
# ── Nous Portal (OAuth) ──────────────────────────────────────────
|
||||
if provider == "nous":
|
||||
# Detect vision tasks: either explicit model override from
|
||||
# _PROVIDER_VISION_MODELS, or caller passed a known vision model.
|
||||
_is_vision = (
|
||||
model in _PROVIDER_VISION_MODELS.values()
|
||||
or (model or "").strip().lower() == "mimo-v2-omni"
|
||||
)
|
||||
client, default = _try_nous(vision=_is_vision)
|
||||
client, default = _try_nous()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: nous requested "
|
||||
"but Nous Portal not configured (run: hermes auth)")
|
||||
@@ -1662,11 +1512,7 @@ def resolve_provider_client(
|
||||
"but no Codex OAuth token found (run: hermes model)")
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
|
||||
raw_client = OpenAI(
|
||||
api_key=codex_token,
|
||||
base_url=_CODEX_AUX_BASE_URL,
|
||||
default_headers=_codex_cloudflare_headers(codex_token),
|
||||
)
|
||||
raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
|
||||
return (raw_client, final_model)
|
||||
# Standard path: wrap in CodexAuxiliaryClient adapter
|
||||
client, default = _try_codex()
|
||||
@@ -1698,9 +1544,9 @@ def resolve_provider_client(
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
if base_url_host_matches(custom_base, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
|
||||
if "api.kimi.com" in custom_base.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
|
||||
elif "api.githubcopilot.com" in custom_base.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
@@ -1794,20 +1640,11 @@ def resolve_provider_client(
|
||||
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
|
||||
if provider == "gemini":
|
||||
from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
|
||||
|
||||
if is_native_gemini_base_url(base_url):
|
||||
client = GeminiNativeClient(api_key=api_key, base_url=base_url)
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# Provider-specific headers
|
||||
headers = {}
|
||||
if base_url_host_matches(base_url, "api.kimi.com"):
|
||||
headers["User-Agent"] = "claude-code/0.1.0"
|
||||
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
headers["User-Agent"] = "KimiCLI/1.30.0"
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
headers.update(copilot_default_headers())
|
||||
@@ -2038,35 +1875,24 @@ def resolve_vision_provider_client(
|
||||
# _PROVIDER_VISION_MODELS provides per-provider vision model
|
||||
# overrides when the provider has a dedicated multimodal model
|
||||
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
|
||||
# zai → glm-5v-turbo). Nous is the exception: it has a dedicated
|
||||
# strict vision backend with tier-aware defaults, so it must not
|
||||
# fall through to the user's text chat model here.
|
||||
# zai → glm-5v-turbo).
|
||||
# 2. OpenRouter (vision-capable aggregator fallback)
|
||||
# 3. Nous Portal (vision-capable aggregator fallback)
|
||||
# 4. Stop
|
||||
main_provider = _read_main_provider()
|
||||
main_model = _read_main_model()
|
||||
if main_provider and main_provider not in ("auto", ""):
|
||||
if main_provider == "nous":
|
||||
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
|
||||
if sync_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
main_provider, default_model or resolved_model or main_model,
|
||||
)
|
||||
return _finalize(main_provider, sync_client, default_model)
|
||||
else:
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
api_mode=resolved_api_mode)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
main_provider, rpc_model or vision_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, rpc_client, rpc_model or vision_model)
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
api_mode=resolved_api_mode)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
main_provider, rpc_model or vision_model,
|
||||
)
|
||||
return _finalize(
|
||||
main_provider, rpc_client, rpc_model or vision_model)
|
||||
|
||||
# Fall back through aggregators (uses their dedicated vision model,
|
||||
# not the user's main model) when main provider has no client.
|
||||
@@ -2113,7 +1939,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
|
||||
# Only use max_completion_tokens for direct OpenAI custom endpoints
|
||||
if (not or_key
|
||||
and _read_nous_auth() is None
|
||||
and base_url_hostname(custom_base) == "api.openai.com"):
|
||||
and "api.openai.com" in custom_base.lower()):
|
||||
return {"max_completion_tokens": value}
|
||||
return {"max_tokens": value}
|
||||
|
||||
@@ -2141,76 +1967,6 @@ _client_cache_lock = threading.Lock()
|
||||
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
|
||||
|
||||
|
||||
def _client_cache_key(
|
||||
provider: str,
|
||||
*,
|
||||
async_mode: bool,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
) -> tuple:
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
|
||||
|
||||
|
||||
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
|
||||
with _client_cache_lock:
|
||||
old_entry = _client_cache.get(cache_key)
|
||||
if old_entry is not None and old_entry[0] is not client:
|
||||
_force_close_async_httpx(old_entry[0])
|
||||
try:
|
||||
close_fn = getattr(old_entry[0], "close", None)
|
||||
if callable(close_fn):
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
_client_cache[cache_key] = (client, default_model, bound_loop)
|
||||
|
||||
|
||||
def _refresh_nous_auxiliary_client(
|
||||
*,
|
||||
cache_provider: str,
|
||||
model: Optional[str],
|
||||
async_mode: bool,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
|
||||
runtime = _resolve_nous_runtime_api(force_refresh=True)
|
||||
if runtime is None:
|
||||
return None, model
|
||||
|
||||
fresh_key, fresh_base_url = runtime
|
||||
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
|
||||
final_model = model
|
||||
|
||||
current_loop = None
|
||||
if async_mode:
|
||||
try:
|
||||
import asyncio as _aio
|
||||
current_loop = _aio.get_event_loop()
|
||||
except RuntimeError:
|
||||
pass
|
||||
client, final_model = _to_async_client(sync_client, final_model or "")
|
||||
else:
|
||||
client = sync_client
|
||||
|
||||
cache_key = _client_cache_key(
|
||||
cache_provider,
|
||||
async_mode=async_mode,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
|
||||
return client, final_model
|
||||
|
||||
|
||||
def neuter_async_httpx_del() -> None:
|
||||
"""Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
|
||||
|
||||
@@ -2312,7 +2068,7 @@ def cleanup_stale_async_clients() -> None:
|
||||
|
||||
def _is_openrouter_client(client: Any) -> bool:
|
||||
for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
|
||||
if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
|
||||
if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -2364,14 +2120,8 @@ def _get_cached_client(
|
||||
except RuntimeError:
|
||||
pass
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
cache_key = _client_cache_key(
|
||||
provider,
|
||||
async_mode=async_mode,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
|
||||
with _client_cache_lock:
|
||||
if cache_key in _client_cache:
|
||||
cached_client, cached_default, cached_loop = _client_cache[cache_key]
|
||||
@@ -2440,6 +2190,7 @@ def _resolve_task_provider_model(
|
||||
to "custom" and the task uses that direct endpoint. api_mode is one of
|
||||
"chat_completions", "codex_responses", or None (auto-detect).
|
||||
"""
|
||||
config = {}
|
||||
cfg_provider = None
|
||||
cfg_model = None
|
||||
cfg_base_url = None
|
||||
@@ -2447,7 +2198,16 @@ def _resolve_task_provider_model(
|
||||
cfg_api_mode = None
|
||||
|
||||
if task:
|
||||
task_config = _get_auxiliary_task_config(task)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
except ImportError:
|
||||
config = {}
|
||||
|
||||
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||
if not isinstance(task_config, dict):
|
||||
task_config = {}
|
||||
cfg_provider = str(task_config.get("provider", "")).strip() or None
|
||||
cfg_model = str(task_config.get("model", "")).strip() or None
|
||||
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
|
||||
@@ -2477,25 +2237,17 @@ def _resolve_task_provider_model(
|
||||
_DEFAULT_AUX_TIMEOUT = 30.0
|
||||
|
||||
|
||||
def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
|
||||
"""Return the config dict for auxiliary.<task>, or {} when unavailable."""
|
||||
if not task:
|
||||
return {}
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
except ImportError:
|
||||
return {}
|
||||
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||
return task_config if isinstance(task_config, dict) else {}
|
||||
|
||||
|
||||
def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
|
||||
"""Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
|
||||
if not task:
|
||||
return default
|
||||
task_config = _get_auxiliary_task_config(task)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
except ImportError:
|
||||
return default
|
||||
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||
raw = task_config.get("timeout")
|
||||
if raw is not None:
|
||||
try:
|
||||
@@ -2505,15 +2257,6 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float
|
||||
return default
|
||||
|
||||
|
||||
def _get_task_extra_body(task: str) -> Dict[str, Any]:
|
||||
"""Read auxiliary.<task>.extra_body and return a shallow copy when valid."""
|
||||
task_config = _get_auxiliary_task_config(task)
|
||||
raw = task_config.get("extra_body")
|
||||
if isinstance(raw, dict):
|
||||
return dict(raw)
|
||||
return {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Anthropic-compatible endpoint detection + image block conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -2601,10 +2344,8 @@ def _build_call_kwargs(
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
fixed_temperature = _fixed_temperature_for_model(model, base_url)
|
||||
if fixed_temperature is OMIT_TEMPERATURE:
|
||||
temperature = None # strip — let server choose
|
||||
elif fixed_temperature is not None:
|
||||
fixed_temperature = _fixed_temperature_for_model(model)
|
||||
if fixed_temperature is not None:
|
||||
temperature = fixed_temperature
|
||||
|
||||
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
|
||||
@@ -2624,7 +2365,7 @@ def _build_call_kwargs(
|
||||
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
|
||||
if provider == "custom":
|
||||
custom_base = base_url or _current_custom_base_url()
|
||||
if base_url_hostname(custom_base) == "api.openai.com":
|
||||
if "api.openai.com" in custom_base.lower():
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
else:
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
@@ -2716,8 +2457,6 @@ def call_llm(
|
||||
"""
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
effective_extra_body = _get_task_extra_body(task)
|
||||
effective_extra_body.update(extra_body or {})
|
||||
|
||||
if task == "vision":
|
||||
effective_provider, client, final_model = resolve_vision_provider_client(
|
||||
@@ -2786,14 +2525,11 @@ def call_llm(
|
||||
task, resolved_provider or "auto", final_model or "default",
|
||||
f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "")
|
||||
|
||||
# Pass the client's actual base_url (not just resolved_base_url) so
|
||||
# endpoint-specific temperature overrides can distinguish
|
||||
# api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes.
|
||||
kwargs = _build_call_kwargs(
|
||||
resolved_provider, final_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=effective_timeout, extra_body=effective_extra_body,
|
||||
base_url=_base_info or resolved_base_url)
|
||||
tools=tools, timeout=effective_timeout, extra_body=extra_body,
|
||||
base_url=resolved_base_url)
|
||||
|
||||
# Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
|
||||
_client_base = str(getattr(client, "base_url", "") or "")
|
||||
@@ -2819,29 +2555,6 @@ def call_llm(
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Nous auth refresh parity with main agent ──────────────────
|
||||
client_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
|
||||
)
|
||||
if _is_auth_error(first_err) and client_is_nous:
|
||||
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
|
||||
cache_provider=resolved_provider or "nous",
|
||||
model=final_model,
|
||||
async_mode=False,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
if refreshed_client is not None:
|
||||
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
|
||||
task or "call")
|
||||
if refreshed_model and refreshed_model != kwargs.get("model"):
|
||||
kwargs["model"] = refreshed_model
|
||||
return _validate_llm_response(
|
||||
refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Payment / credit exhaustion fallback ──────────────────────
|
||||
# When the resolved provider returns 402 or a credit-related error,
|
||||
# try alternative providers instead of giving up. This handles the
|
||||
@@ -2870,8 +2583,7 @@ def call_llm(
|
||||
fb_label, fb_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=effective_timeout,
|
||||
extra_body=effective_extra_body,
|
||||
base_url=str(getattr(fb_client, "base_url", "") or ""))
|
||||
extra_body=extra_body)
|
||||
return _validate_llm_response(
|
||||
fb_client.chat.completions.create(**fb_kwargs), task)
|
||||
raise
|
||||
@@ -2953,8 +2665,6 @@ async def async_call_llm(
|
||||
"""
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
effective_extra_body = _get_task_extra_body(task)
|
||||
effective_extra_body.update(extra_body or {})
|
||||
|
||||
if task == "vision":
|
||||
effective_provider, client, final_model = resolve_vision_provider_client(
|
||||
@@ -3008,17 +2718,14 @@ async def async_call_llm(
|
||||
|
||||
effective_timeout = timeout if timeout is not None else _get_task_timeout(task)
|
||||
|
||||
# Pass the client's actual base_url (not just resolved_base_url) so
|
||||
# endpoint-specific temperature overrides can distinguish
|
||||
# api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes.
|
||||
_client_base = str(getattr(client, "base_url", "") or "")
|
||||
kwargs = _build_call_kwargs(
|
||||
resolved_provider, final_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=effective_timeout, extra_body=effective_extra_body,
|
||||
base_url=_client_base or resolved_base_url)
|
||||
tools=tools, timeout=effective_timeout, extra_body=extra_body,
|
||||
base_url=resolved_base_url)
|
||||
|
||||
# Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
|
||||
_client_base = str(getattr(client, "base_url", "") or "")
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
||||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||
|
||||
@@ -3040,28 +2747,6 @@ async def async_call_llm(
|
||||
raise
|
||||
first_err = retry_err
|
||||
|
||||
# ── Nous auth refresh parity with main agent ──────────────────
|
||||
client_is_nous = (
|
||||
resolved_provider == "nous"
|
||||
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
|
||||
)
|
||||
if _is_auth_error(first_err) and client_is_nous:
|
||||
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
|
||||
cache_provider=resolved_provider or "nous",
|
||||
model=final_model,
|
||||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if refreshed_client is not None:
|
||||
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
|
||||
task or "call")
|
||||
if refreshed_model and refreshed_model != kwargs.get("model"):
|
||||
kwargs["model"] = refreshed_model
|
||||
return _validate_llm_response(
|
||||
await refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Payment / connection fallback (mirrors sync call_llm) ─────
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
@@ -3076,8 +2761,7 @@ async def async_call_llm(
|
||||
fb_label, fb_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=effective_timeout,
|
||||
extra_body=effective_extra_body,
|
||||
base_url=str(getattr(fb_client, "base_url", "") or ""))
|
||||
extra_body=extra_body)
|
||||
# Convert sync fallback client to async
|
||||
async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
|
||||
if async_fb_model and async_fb_model != fb_kwargs.get("model"):
|
||||
|
||||
@@ -1,813 +0,0 @@
|
||||
"""Codex Responses API adapter.
|
||||
|
||||
Pure format-conversion and normalization logic for the OpenAI Responses API
|
||||
(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints).
|
||||
|
||||
Extracted from run_agent.py to isolate Responses API-specific logic from the
|
||||
core agent loop. All functions are stateless — they operate on the data passed
|
||||
in and return transformed results.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = str(part.get("type") or "").strip().lower()
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
detail = part.get("detail")
|
||||
if isinstance(image_ref, dict):
|
||||
url = image_ref.get("url")
|
||||
detail = image_ref.get("detail", detail)
|
||||
else:
|
||||
url = image_ref
|
||||
if not isinstance(url, str) or not url:
|
||||
continue
|
||||
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
|
||||
if isinstance(detail, str) and detail.strip():
|
||||
image_part["detail"] = detail.strip()
|
||||
converted.append(image_part)
|
||||
return converted
|
||||
|
||||
|
||||
def _summarize_user_message_for_log(content: Any) -> str:
|
||||
"""Return a short text summary of a user message for logging/trajectory.
|
||||
|
||||
Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
|
||||
parts from the API server. Logging, spinner previews, and trajectory
|
||||
files all want a plain string — this helper extracts the first chunk of
|
||||
text and notes any attached images. Returns an empty string for empty
|
||||
lists and ``str(content)`` for unexpected scalar types.
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
text_bits: List[str] = []
|
||||
image_count = 0
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
text_bits.append(part)
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = str(part.get("type") or "").strip().lower()
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
text_bits.append(text)
|
||||
elif ptype in {"image_url", "input_image"}:
|
||||
image_count += 1
|
||||
summary = " ".join(text_bits).strip()
|
||||
if image_count:
|
||||
note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
|
||||
summary = f"{note} {summary}" if summary else note
|
||||
return summary
|
||||
try:
|
||||
return str(content)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ID helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
|
||||
"""Generate a deterministic call_id from tool call content.
|
||||
|
||||
Used as a fallback when the API doesn't provide a call_id.
|
||||
Deterministic IDs prevent cache invalidation — random UUIDs would
|
||||
make every API call's prefix unique, breaking OpenAI's prompt cache.
|
||||
"""
|
||||
seed = f"{fn_name}:{arguments}:{index}"
|
||||
digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
|
||||
return f"call_{digest}"
|
||||
|
||||
|
||||
def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Split a stored tool id into (call_id, response_item_id)."""
|
||||
if not isinstance(raw_id, str):
|
||||
return None, None
|
||||
value = raw_id.strip()
|
||||
if not value:
|
||||
return None, None
|
||||
if "|" in value:
|
||||
call_id, response_item_id = value.split("|", 1)
|
||||
call_id = call_id.strip() or None
|
||||
response_item_id = response_item_id.strip() or None
|
||||
return call_id, response_item_id
|
||||
if value.startswith("fc_"):
|
||||
return None, value
|
||||
return value, None
|
||||
|
||||
|
||||
def _derive_responses_function_call_id(
|
||||
call_id: str,
|
||||
response_item_id: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build a valid Responses `function_call.id` (must start with `fc_`)."""
|
||||
if isinstance(response_item_id, str):
|
||||
candidate = response_item_id.strip()
|
||||
if candidate.startswith("fc_"):
|
||||
return candidate
|
||||
|
||||
source = (call_id or "").strip()
|
||||
if source.startswith("fc_"):
|
||||
return source
|
||||
if source.startswith("call_") and len(source) > len("call_"):
|
||||
return f"fc_{source[len('call_'):]}"
|
||||
|
||||
sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
|
||||
if sanitized.startswith("fc_"):
|
||||
return sanitized
|
||||
if sanitized.startswith("call_") and len(sanitized) > len("call_"):
|
||||
return f"fc_{sanitized[len('call_'):]}"
|
||||
if sanitized:
|
||||
return f"fc_{sanitized[:48]}"
|
||||
|
||||
seed = source or str(response_item_id or "") or uuid.uuid4().hex
|
||||
digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
|
||||
return f"fc_{digest}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Convert chat-completions tool schemas to Responses function-tool schemas."""
|
||||
if not tools:
|
||||
return None
|
||||
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for item in tools:
|
||||
fn = item.get("function", {}) if isinstance(item, dict) else {}
|
||||
name = fn.get("name")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
continue
|
||||
converted.append({
|
||||
"type": "function",
|
||||
"name": name,
|
||||
"description": fn.get("description", ""),
|
||||
"strict": False,
|
||||
"parameters": fn.get("parameters", {"type": "object", "properties": {}}),
|
||||
})
|
||||
return converted or None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
items: List[Dict[str, Any]] = []
|
||||
seen_item_ids: set = set()
|
||||
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
role = msg.get("role")
|
||||
if role == "system":
|
||||
continue
|
||||
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
content_text = str(content) if content is not None else ""
|
||||
|
||||
if role == "assistant":
|
||||
# Replay encrypted reasoning items from previous turns
|
||||
# so the API can maintain coherent reasoning chains.
|
||||
codex_reasoning = msg.get("codex_reasoning_items")
|
||||
has_codex_reasoning = False
|
||||
if isinstance(codex_reasoning, list):
|
||||
for ri in codex_reasoning:
|
||||
if isinstance(ri, dict) and ri.get("encrypted_content"):
|
||||
item_id = ri.get("id")
|
||||
if item_id and item_id in seen_item_ids:
|
||||
continue
|
||||
# Strip the "id" field — with store=False the
|
||||
# Responses API cannot look up items by ID and
|
||||
# returns 404. The encrypted_content blob is
|
||||
# self-contained for reasoning chain continuity.
|
||||
replay_item = {k: v for k, v in ri.items() if k != "id"}
|
||||
items.append(replay_item)
|
||||
if item_id:
|
||||
seen_item_ids.add(item_id)
|
||||
has_codex_reasoning = True
|
||||
|
||||
if content_parts:
|
||||
items.append({"role": "assistant", "content": content_parts})
|
||||
elif content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
elif has_codex_reasoning:
|
||||
# The Responses API requires a following item after each
|
||||
# reasoning item (otherwise: missing_following_item error).
|
||||
# When the assistant produced only reasoning with no visible
|
||||
# content, emit an empty assistant message as the required
|
||||
# following item.
|
||||
items.append({"role": "assistant", "content": ""})
|
||||
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
fn_name = fn.get("name")
|
||||
if not isinstance(fn_name, str) or not fn_name.strip():
|
||||
continue
|
||||
|
||||
embedded_call_id, embedded_response_item_id = _split_responses_tool_id(
|
||||
tc.get("id")
|
||||
)
|
||||
call_id = tc.get("call_id")
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
call_id = embedded_call_id
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
if (
|
||||
isinstance(embedded_response_item_id, str)
|
||||
and embedded_response_item_id.startswith("fc_")
|
||||
and len(embedded_response_item_id) > len("fc_")
|
||||
):
|
||||
call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
|
||||
else:
|
||||
_raw_args = str(fn.get("arguments", "{}"))
|
||||
call_id = _deterministic_call_id(fn_name, _raw_args, len(items))
|
||||
call_id = call_id.strip()
|
||||
|
||||
arguments = fn.get("arguments", "{}")
|
||||
if isinstance(arguments, dict):
|
||||
arguments = json.dumps(arguments, ensure_ascii=False)
|
||||
elif not isinstance(arguments, str):
|
||||
arguments = str(arguments)
|
||||
arguments = arguments.strip() or "{}"
|
||||
|
||||
items.append({
|
||||
"type": "function_call",
|
||||
"call_id": call_id,
|
||||
"name": fn_name,
|
||||
"arguments": arguments,
|
||||
})
|
||||
continue
|
||||
|
||||
# Non-assistant (user) role: emit multimodal parts when present,
|
||||
# otherwise fall back to the text payload.
|
||||
if content_parts:
|
||||
items.append({"role": role, "content": content_parts})
|
||||
else:
|
||||
items.append({"role": role, "content": content_text})
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
raw_tool_call_id = msg.get("tool_call_id")
|
||||
call_id, _ = _split_responses_tool_id(raw_tool_call_id)
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
|
||||
call_id = raw_tool_call_id.strip()
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
continue
|
||||
items.append({
|
||||
"type": "function_call_output",
|
||||
"call_id": call_id,
|
||||
"output": str(msg.get("content", "") or ""),
|
||||
})
|
||||
|
||||
return items
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Input preflight / validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
if not isinstance(raw_items, list):
|
||||
raise ValueError("Codex Responses input must be a list of input items.")
|
||||
|
||||
normalized: List[Dict[str, Any]] = []
|
||||
seen_ids: set = set()
|
||||
for idx, item in enumerate(raw_items):
|
||||
if not isinstance(item, dict):
|
||||
raise ValueError(f"Codex Responses input[{idx}] must be an object.")
|
||||
|
||||
item_type = item.get("type")
|
||||
if item_type == "function_call":
|
||||
call_id = item.get("call_id")
|
||||
name = item.get("name")
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
|
||||
|
||||
arguments = item.get("arguments", "{}")
|
||||
if isinstance(arguments, dict):
|
||||
arguments = json.dumps(arguments, ensure_ascii=False)
|
||||
elif not isinstance(arguments, str):
|
||||
arguments = str(arguments)
|
||||
arguments = arguments.strip() or "{}"
|
||||
|
||||
normalized.append(
|
||||
{
|
||||
"type": "function_call",
|
||||
"call_id": call_id.strip(),
|
||||
"name": name.strip(),
|
||||
"arguments": arguments,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if item_type == "function_call_output":
|
||||
call_id = item.get("call_id")
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
|
||||
output = item.get("output", "")
|
||||
if output is None:
|
||||
output = ""
|
||||
if not isinstance(output, str):
|
||||
output = str(output)
|
||||
|
||||
normalized.append(
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": call_id.strip(),
|
||||
"output": output,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if item_type == "reasoning":
|
||||
encrypted = item.get("encrypted_content")
|
||||
if isinstance(encrypted, str) and encrypted:
|
||||
item_id = item.get("id")
|
||||
if isinstance(item_id, str) and item_id:
|
||||
if item_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(item_id)
|
||||
reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
|
||||
# Do NOT include the "id" in the outgoing item — with
|
||||
# store=False (our default) the API tries to resolve the
|
||||
# id server-side and returns 404. The id is still used
|
||||
# above for local deduplication via seen_ids.
|
||||
summary = item.get("summary")
|
||||
if isinstance(summary, list):
|
||||
reasoning_item["summary"] = summary
|
||||
else:
|
||||
reasoning_item["summary"] = []
|
||||
normalized.append(reasoning_item)
|
||||
continue
|
||||
|
||||
role = item.get("role")
|
||||
if role in {"user", "assistant"}:
|
||||
content = item.get("content", "")
|
||||
if content is None:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
|
||||
)
|
||||
ptype = str(part.get("type") or "").strip().lower()
|
||||
if ptype in {"input_text", "text", "output_text"}:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
if isinstance(image_ref, dict):
|
||||
url = image_ref.get("url", "")
|
||||
detail = image_ref.get("detail", detail)
|
||||
else:
|
||||
url = image_ref
|
||||
if not isinstance(url, str):
|
||||
url = str(url or "")
|
||||
image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
|
||||
if isinstance(detail, str) and detail.strip():
|
||||
image_part["detail"] = detail.strip()
|
||||
validated.append(image_part)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
|
||||
)
|
||||
normalized.append({"role": role, "content": validated})
|
||||
continue
|
||||
if not isinstance(content, str):
|
||||
content = str(content)
|
||||
|
||||
normalized.append({"role": role, "content": content})
|
||||
continue
|
||||
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
|
||||
)
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def _preflight_codex_api_kwargs(
|
||||
api_kwargs: Any,
|
||||
*,
|
||||
allow_stream: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
if not isinstance(api_kwargs, dict):
|
||||
raise ValueError("Codex Responses request must be a dict.")
|
||||
|
||||
required = {"model", "instructions", "input"}
|
||||
missing = [key for key in required if key not in api_kwargs]
|
||||
if missing:
|
||||
raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
|
||||
|
||||
model = api_kwargs.get("model")
|
||||
if not isinstance(model, str) or not model.strip():
|
||||
raise ValueError("Codex Responses request 'model' must be a non-empty string.")
|
||||
model = model.strip()
|
||||
|
||||
instructions = api_kwargs.get("instructions")
|
||||
if instructions is None:
|
||||
instructions = ""
|
||||
if not isinstance(instructions, str):
|
||||
instructions = str(instructions)
|
||||
instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
|
||||
|
||||
normalized_input = _preflight_codex_input_items(api_kwargs.get("input"))
|
||||
|
||||
tools = api_kwargs.get("tools")
|
||||
normalized_tools = None
|
||||
if tools is not None:
|
||||
if not isinstance(tools, list):
|
||||
raise ValueError("Codex Responses request 'tools' must be a list when provided.")
|
||||
normalized_tools = []
|
||||
for idx, tool in enumerate(tools):
|
||||
if not isinstance(tool, dict):
|
||||
raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
|
||||
if tool.get("type") != "function":
|
||||
raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
|
||||
|
||||
name = tool.get("name")
|
||||
parameters = tool.get("parameters")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
|
||||
if not isinstance(parameters, dict):
|
||||
raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
|
||||
|
||||
description = tool.get("description", "")
|
||||
if description is None:
|
||||
description = ""
|
||||
if not isinstance(description, str):
|
||||
description = str(description)
|
||||
|
||||
strict = tool.get("strict", False)
|
||||
if not isinstance(strict, bool):
|
||||
strict = bool(strict)
|
||||
|
||||
normalized_tools.append(
|
||||
{
|
||||
"type": "function",
|
||||
"name": name.strip(),
|
||||
"description": description,
|
||||
"strict": strict,
|
||||
"parameters": parameters,
|
||||
}
|
||||
)
|
||||
|
||||
store = api_kwargs.get("store", False)
|
||||
if store is not False:
|
||||
raise ValueError("Codex Responses contract requires 'store' to be false.")
|
||||
|
||||
allowed_keys = {
|
||||
"model", "instructions", "input", "tools", "store",
|
||||
"reasoning", "include", "max_output_tokens", "temperature",
|
||||
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
|
||||
"extra_headers",
|
||||
}
|
||||
normalized: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": normalized_input,
|
||||
"store": False,
|
||||
}
|
||||
if normalized_tools is not None:
|
||||
normalized["tools"] = normalized_tools
|
||||
|
||||
# Pass through reasoning config
|
||||
reasoning = api_kwargs.get("reasoning")
|
||||
if isinstance(reasoning, dict):
|
||||
normalized["reasoning"] = reasoning
|
||||
include = api_kwargs.get("include")
|
||||
if isinstance(include, list):
|
||||
normalized["include"] = include
|
||||
service_tier = api_kwargs.get("service_tier")
|
||||
if isinstance(service_tier, str) and service_tier.strip():
|
||||
normalized["service_tier"] = service_tier.strip()
|
||||
|
||||
# Pass through max_output_tokens and temperature
|
||||
max_output_tokens = api_kwargs.get("max_output_tokens")
|
||||
if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
|
||||
normalized["max_output_tokens"] = int(max_output_tokens)
|
||||
temperature = api_kwargs.get("temperature")
|
||||
if isinstance(temperature, (int, float)):
|
||||
normalized["temperature"] = float(temperature)
|
||||
|
||||
# Pass through tool_choice, parallel_tool_calls, prompt_cache_key
|
||||
for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
|
||||
val = api_kwargs.get(passthrough_key)
|
||||
if val is not None:
|
||||
normalized[passthrough_key] = val
|
||||
|
||||
extra_headers = api_kwargs.get("extra_headers")
|
||||
if extra_headers is not None:
|
||||
if not isinstance(extra_headers, dict):
|
||||
raise ValueError("Codex Responses request 'extra_headers' must be an object.")
|
||||
normalized_headers: Dict[str, str] = {}
|
||||
for key, value in extra_headers.items():
|
||||
if not isinstance(key, str) or not key.strip():
|
||||
raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
|
||||
if value is None:
|
||||
continue
|
||||
normalized_headers[key.strip()] = str(value)
|
||||
if normalized_headers:
|
||||
normalized["extra_headers"] = normalized_headers
|
||||
|
||||
if allow_stream:
|
||||
stream = api_kwargs.get("stream")
|
||||
if stream is not None and stream is not True:
|
||||
raise ValueError("Codex Responses 'stream' must be true when set.")
|
||||
if stream is True:
|
||||
normalized["stream"] = True
|
||||
allowed_keys.add("stream")
|
||||
elif "stream" in api_kwargs:
|
||||
raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
|
||||
|
||||
unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
|
||||
if unexpected:
|
||||
raise ValueError(
|
||||
f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
|
||||
)
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Response extraction helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _extract_responses_message_text(item: Any) -> str:
|
||||
"""Extract assistant text from a Responses message output item."""
|
||||
content = getattr(item, "content", None)
|
||||
if not isinstance(content, list):
|
||||
return ""
|
||||
|
||||
chunks: List[str] = []
|
||||
for part in content:
|
||||
ptype = getattr(part, "type", None)
|
||||
if ptype not in {"output_text", "text"}:
|
||||
continue
|
||||
text = getattr(part, "text", None)
|
||||
if isinstance(text, str) and text:
|
||||
chunks.append(text)
|
||||
return "".join(chunks).strip()
|
||||
|
||||
|
||||
def _extract_responses_reasoning_text(item: Any) -> str:
|
||||
"""Extract a compact reasoning text from a Responses reasoning item."""
|
||||
summary = getattr(item, "summary", None)
|
||||
if isinstance(summary, list):
|
||||
chunks: List[str] = []
|
||||
for part in summary:
|
||||
text = getattr(part, "text", None)
|
||||
if isinstance(text, str) and text:
|
||||
chunks.append(text)
|
||||
if chunks:
|
||||
return "\n".join(chunks).strip()
|
||||
text = getattr(item, "text", None)
|
||||
if isinstance(text, str) and text:
|
||||
return text.strip()
|
||||
return ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full response normalization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
"""Normalize a Responses API object to an assistant_message-like object."""
|
||||
output = getattr(response, "output", None)
|
||||
if not isinstance(output, list) or not output:
|
||||
# The Codex backend can return empty output when the answer was
|
||||
# delivered entirely via stream events. Check output_text as a
|
||||
# last-resort fallback before raising.
|
||||
out_text = getattr(response, "output_text", None)
|
||||
if isinstance(out_text, str) and out_text.strip():
|
||||
logger.debug(
|
||||
"Codex response has empty output but output_text is present (%d chars); "
|
||||
"synthesizing output item.", len(out_text.strip()),
|
||||
)
|
||||
output = [SimpleNamespace(
|
||||
type="message", role="assistant", status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=out_text.strip())],
|
||||
)]
|
||||
response.output = output
|
||||
else:
|
||||
raise RuntimeError("Responses API returned no output items")
|
||||
|
||||
response_status = getattr(response, "status", None)
|
||||
if isinstance(response_status, str):
|
||||
response_status = response_status.strip().lower()
|
||||
else:
|
||||
response_status = None
|
||||
|
||||
if response_status in {"failed", "cancelled"}:
|
||||
error_obj = getattr(response, "error", None)
|
||||
if isinstance(error_obj, dict):
|
||||
error_msg = error_obj.get("message") or str(error_obj)
|
||||
else:
|
||||
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
content_parts: List[str] = []
|
||||
reasoning_parts: List[str] = []
|
||||
reasoning_items_raw: List[Dict[str, Any]] = []
|
||||
tool_calls: List[Any] = []
|
||||
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
||||
saw_commentary_phase = False
|
||||
saw_final_answer_phase = False
|
||||
|
||||
for item in output:
|
||||
item_type = getattr(item, "type", None)
|
||||
item_status = getattr(item, "status", None)
|
||||
if isinstance(item_status, str):
|
||||
item_status = item_status.strip().lower()
|
||||
else:
|
||||
item_status = None
|
||||
|
||||
if item_status in {"queued", "in_progress", "incomplete"}:
|
||||
has_incomplete_items = True
|
||||
|
||||
if item_type == "message":
|
||||
item_phase = getattr(item, "phase", None)
|
||||
if isinstance(item_phase, str):
|
||||
normalized_phase = item_phase.strip().lower()
|
||||
if normalized_phase in {"commentary", "analysis"}:
|
||||
saw_commentary_phase = True
|
||||
elif normalized_phase in {"final_answer", "final"}:
|
||||
saw_final_answer_phase = True
|
||||
message_text = _extract_responses_message_text(item)
|
||||
if message_text:
|
||||
content_parts.append(message_text)
|
||||
elif item_type == "reasoning":
|
||||
reasoning_text = _extract_responses_reasoning_text(item)
|
||||
if reasoning_text:
|
||||
reasoning_parts.append(reasoning_text)
|
||||
# Capture the full reasoning item for multi-turn continuity.
|
||||
# encrypted_content is an opaque blob the API needs back on
|
||||
# subsequent turns to maintain coherent reasoning chains.
|
||||
encrypted = getattr(item, "encrypted_content", None)
|
||||
if isinstance(encrypted, str) and encrypted:
|
||||
raw_item = {"type": "reasoning", "encrypted_content": encrypted}
|
||||
item_id = getattr(item, "id", None)
|
||||
if isinstance(item_id, str) and item_id:
|
||||
raw_item["id"] = item_id
|
||||
# Capture summary — required by the API when replaying reasoning items
|
||||
summary = getattr(item, "summary", None)
|
||||
if isinstance(summary, list):
|
||||
raw_summary = []
|
||||
for part in summary:
|
||||
text = getattr(part, "text", None)
|
||||
if isinstance(text, str):
|
||||
raw_summary.append({"type": "summary_text", "text": text})
|
||||
raw_item["summary"] = raw_summary
|
||||
reasoning_items_raw.append(raw_item)
|
||||
elif item_type == "function_call":
|
||||
if item_status in {"queued", "in_progress", "incomplete"}:
|
||||
continue
|
||||
fn_name = getattr(item, "name", "") or ""
|
||||
arguments = getattr(item, "arguments", "{}")
|
||||
if not isinstance(arguments, str):
|
||||
arguments = json.dumps(arguments, ensure_ascii=False)
|
||||
raw_call_id = getattr(item, "call_id", None)
|
||||
raw_item_id = getattr(item, "id", None)
|
||||
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
|
||||
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
|
||||
call_id = call_id.strip()
|
||||
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
|
||||
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
|
||||
tool_calls.append(SimpleNamespace(
|
||||
id=call_id,
|
||||
call_id=call_id,
|
||||
response_item_id=response_item_id,
|
||||
type="function",
|
||||
function=SimpleNamespace(name=fn_name, arguments=arguments),
|
||||
))
|
||||
elif item_type == "custom_tool_call":
|
||||
fn_name = getattr(item, "name", "") or ""
|
||||
arguments = getattr(item, "input", "{}")
|
||||
if not isinstance(arguments, str):
|
||||
arguments = json.dumps(arguments, ensure_ascii=False)
|
||||
raw_call_id = getattr(item, "call_id", None)
|
||||
raw_item_id = getattr(item, "id", None)
|
||||
embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
|
||||
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
|
||||
if not isinstance(call_id, str) or not call_id.strip():
|
||||
call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
|
||||
call_id = call_id.strip()
|
||||
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
|
||||
response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
|
||||
tool_calls.append(SimpleNamespace(
|
||||
id=call_id,
|
||||
call_id=call_id,
|
||||
response_item_id=response_item_id,
|
||||
type="function",
|
||||
function=SimpleNamespace(name=fn_name, arguments=arguments),
|
||||
))
|
||||
|
||||
final_text = "\n".join([p for p in content_parts if p]).strip()
|
||||
if not final_text and hasattr(response, "output_text"):
|
||||
out_text = getattr(response, "output_text", "")
|
||||
if isinstance(out_text, str):
|
||||
final_text = out_text.strip()
|
||||
|
||||
assistant_message = SimpleNamespace(
|
||||
content=final_text,
|
||||
tool_calls=tool_calls,
|
||||
reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=reasoning_items_raw or None,
|
||||
)
|
||||
|
||||
if tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
||||
finish_reason = "incomplete"
|
||||
elif reasoning_items_raw and not final_text:
|
||||
# Response contains only reasoning (encrypted thinking state) with
|
||||
# no visible content or tool calls. The model is still thinking and
|
||||
# needs another turn to produce the actual answer. Marking this as
|
||||
# "stop" would send it into the empty-content retry loop which burns
|
||||
# 3 retries then fails — treat it as incomplete instead so the Codex
|
||||
# continuation path handles it correctly.
|
||||
finish_reason = "incomplete"
|
||||
else:
|
||||
finish_reason = "stop"
|
||||
return assistant_message, finish_reason
|
||||
+13
-73
@@ -31,7 +31,6 @@ from agent.model_metadata import (
|
||||
get_model_context_length,
|
||||
estimate_messages_tokens_rough,
|
||||
)
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -64,47 +63,6 @@ _CHARS_PER_TOKEN = 4
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
def _content_text_for_contains(content: Any) -> str:
|
||||
"""Return a best-effort text view of message content.
|
||||
|
||||
Used only for substring checks when we need to know whether we've already
|
||||
appended a note to a message. Keeps multimodal lists intact elsewhere.
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
parts.append(item)
|
||||
elif isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
return "\n".join(part for part in parts if part)
|
||||
return str(content)
|
||||
|
||||
|
||||
def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
|
||||
"""Append or prepend plain text to message content safely.
|
||||
|
||||
Compression sometimes needs to add a note or merge a summary into an
|
||||
existing message. Message content may be plain text or a multimodal list of
|
||||
blocks, so direct string concatenation is not always safe.
|
||||
"""
|
||||
if content is None:
|
||||
return text
|
||||
if isinstance(content, str):
|
||||
return text + content if prepend else content + text
|
||||
if isinstance(content, list):
|
||||
text_block = {"type": "text", "text": text}
|
||||
return [text_block, *content] if prepend else [*content, text_block]
|
||||
rendered = str(content)
|
||||
return text + rendered if prepend else rendered + text
|
||||
|
||||
|
||||
def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
|
||||
"""Shrink long string values inside a tool-call arguments JSON blob while
|
||||
preserving JSON validity.
|
||||
@@ -592,15 +550,11 @@ class ContextCompressor(ContextEngine):
|
||||
Includes tool call arguments and result content (up to
|
||||
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
|
||||
specific details like file paths, commands, and outputs.
|
||||
|
||||
All content is redacted before serialization to prevent secrets
|
||||
(API keys, tokens, passwords) from leaking into the summary that
|
||||
gets sent to the auxiliary model and persisted across compactions.
|
||||
"""
|
||||
parts = []
|
||||
for msg in turns:
|
||||
role = msg.get("role", "unknown")
|
||||
content = redact_sensitive_text(msg.get("content") or "")
|
||||
content = msg.get("content") or ""
|
||||
|
||||
# Tool results: keep enough content for the summarizer
|
||||
if role == "tool":
|
||||
@@ -621,7 +575,7 @@ class ContextCompressor(ContextEngine):
|
||||
if isinstance(tc, dict):
|
||||
fn = tc.get("function", {})
|
||||
name = fn.get("name", "?")
|
||||
args = redact_sensitive_text(fn.get("arguments", ""))
|
||||
args = fn.get("arguments", "")
|
||||
# Truncate long arguments but keep enough for context
|
||||
if len(args) > self._TOOL_ARGS_MAX:
|
||||
args = args[:self._TOOL_ARGS_HEAD] + "..."
|
||||
@@ -679,13 +633,7 @@ class ContextCompressor(ContextEngine):
|
||||
"assistant that continues the conversation. "
|
||||
"Do NOT respond to any questions or requests in the conversation — "
|
||||
"only output the structured summary. "
|
||||
"Do NOT include any preamble, greeting, or prefix. "
|
||||
"Write the summary in the same language the user was using in the "
|
||||
"conversation — do not translate or switch to English. "
|
||||
"NEVER include API keys, tokens, passwords, secrets, credentials, "
|
||||
"or connection strings in the summary — replace any that appear "
|
||||
"with [REDACTED]. Note that the user had credentials present, but "
|
||||
"do not preserve their values."
|
||||
"Do NOT include any preamble, greeting, or prefix."
|
||||
)
|
||||
|
||||
# Shared structured template (used by both paths).
|
||||
@@ -742,7 +690,7 @@ Be specific with file paths, commands, line numbers, and results.]
|
||||
[What remains to be done — framed as context, not instructions]
|
||||
|
||||
## Critical Context
|
||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
|
||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
|
||||
|
||||
Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
|
||||
|
||||
@@ -782,7 +730,7 @@ Use this exact structure:
|
||||
prompt += f"""
|
||||
|
||||
FOCUS TOPIC: "{focus_topic}"
|
||||
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
|
||||
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
|
||||
|
||||
try:
|
||||
call_kwargs = {
|
||||
@@ -805,9 +753,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
# Handle cases where content is not a string (e.g., dict from llama.cpp)
|
||||
if not isinstance(content, str):
|
||||
content = str(content) if content else ""
|
||||
# Redact the summary output as well — the summarizer LLM may
|
||||
# ignore prompt instructions and echo back secrets verbatim.
|
||||
summary = redact_sensitive_text(content.strip())
|
||||
summary = content.strip()
|
||||
# Store for iterative updates on next compaction
|
||||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
@@ -848,7 +794,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
)
|
||||
self.summary_model = "" # empty = use main model
|
||||
self._summary_failure_cooldown_until = 0.0 # no cooldown
|
||||
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately
|
||||
return self._generate_summary(messages, summary_budget) # retry immediately
|
||||
|
||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
@@ -1185,13 +1131,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
for i in range(compress_start):
|
||||
msg = messages[i].copy()
|
||||
if i == 0 and msg.get("role") == "system":
|
||||
existing = msg.get("content")
|
||||
existing = msg.get("content") or ""
|
||||
_compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
|
||||
if _compression_note not in _content_text_for_contains(existing):
|
||||
msg["content"] = _append_text_to_content(
|
||||
existing,
|
||||
"\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
|
||||
)
|
||||
if _compression_note not in existing:
|
||||
msg["content"] = existing + "\n\n" + _compression_note
|
||||
compressed.append(msg)
|
||||
|
||||
# If LLM summary failed, insert a static fallback so the model
|
||||
@@ -1235,15 +1178,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
for i in range(compress_end, n_messages):
|
||||
msg = messages[i].copy()
|
||||
if _merge_summary_into_tail and i == compress_end:
|
||||
merged_prefix = (
|
||||
original = msg.get("content") or ""
|
||||
msg["content"] = (
|
||||
summary
|
||||
+ "\n\n--- END OF CONTEXT SUMMARY — "
|
||||
"respond to the message below, not the summary above ---\n\n"
|
||||
)
|
||||
msg["content"] = _append_text_to_content(
|
||||
msg.get("content"),
|
||||
merged_prefix,
|
||||
prepend=True,
|
||||
+ original
|
||||
)
|
||||
_merge_summary_into_tail = False
|
||||
compressed.append(msg)
|
||||
|
||||
@@ -483,7 +483,9 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
except subprocess.TimeoutExpired:
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
|
||||
@@ -21,9 +21,6 @@ from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from agent.file_safety import get_read_block_error, is_write_denied
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
ACP_MARKER_BASE_URL = "acp://copilot"
|
||||
_DEFAULT_TIMEOUT_SECONDS = 900.0
|
||||
|
||||
@@ -57,18 +54,6 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
def _permission_denied(message_id: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": {
|
||||
"outcome": {
|
||||
"outcome": "cancelled",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _format_messages_as_prompt(
|
||||
messages: list[dict[str, Any]],
|
||||
model: str | None = None,
|
||||
@@ -401,8 +386,6 @@ class CopilotACPClient:
|
||||
stderr_tail: deque[str] = deque(maxlen=40)
|
||||
|
||||
def _stdout_reader() -> None:
|
||||
if proc.stdout is None:
|
||||
return
|
||||
for line in proc.stdout:
|
||||
try:
|
||||
inbox.put(json.loads(line))
|
||||
@@ -550,13 +533,18 @@ class CopilotACPClient:
|
||||
params = msg.get("params") or {}
|
||||
|
||||
if method == "session/request_permission":
|
||||
response = _permission_denied(message_id)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": {
|
||||
"outcome": {
|
||||
"outcome": "allow_once",
|
||||
}
|
||||
},
|
||||
}
|
||||
elif method == "fs/read_text_file":
|
||||
try:
|
||||
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
|
||||
block_error = get_read_block_error(str(path))
|
||||
if block_error:
|
||||
raise PermissionError(block_error)
|
||||
content = path.read_text() if path.exists() else ""
|
||||
line = params.get("line")
|
||||
limit = params.get("limit")
|
||||
@@ -565,8 +553,6 @@ class CopilotACPClient:
|
||||
start = line - 1
|
||||
end = start + limit if isinstance(limit, int) and limit > 0 else None
|
||||
content = "".join(lines[start:end])
|
||||
if content:
|
||||
content = redact_sensitive_text(content)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
@@ -579,10 +565,6 @@ class CopilotACPClient:
|
||||
elif method == "fs/write_text_file":
|
||||
try:
|
||||
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
|
||||
if is_write_denied(str(path)):
|
||||
raise PermissionError(
|
||||
f"Write denied: '{path}' is a protected system/credential file."
|
||||
)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(str(params.get("content") or ""))
|
||||
response = {
|
||||
|
||||
+69
-91
@@ -983,14 +983,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
active_sources: Set[str] = set()
|
||||
auth_store = _load_auth_store()
|
||||
|
||||
# Shared suppression gate — used at every upsert site so
|
||||
# `hermes auth remove <provider> <N>` is stable across all source types.
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed as _is_suppressed
|
||||
except ImportError:
|
||||
def _is_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
|
||||
if provider == "anthropic":
|
||||
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
|
||||
# when the user has explicitly configured anthropic as their provider.
|
||||
@@ -1010,8 +1002,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
("claude_code", read_claude_code_credentials()),
|
||||
):
|
||||
if creds and creds.get("accessToken"):
|
||||
if _is_suppressed(provider, source_name):
|
||||
continue
|
||||
# Check if user explicitly removed this source
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
if is_source_suppressed(provider, source_name):
|
||||
continue
|
||||
except ImportError:
|
||||
pass
|
||||
active_sources.add(source_name)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
@@ -1029,7 +1026,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
|
||||
elif provider == "nous":
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if state and not _is_suppressed(provider, "device_code"):
|
||||
if state:
|
||||
active_sources.add("device_code")
|
||||
# Prefer a user-supplied label embedded in the singleton state
|
||||
# (set by persist_nous_credentials(label=...) when the user ran
|
||||
@@ -1070,21 +1067,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
token, source = resolve_copilot_token()
|
||||
if token:
|
||||
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": token,
|
||||
"base_url": pconfig.inference_base_url if pconfig else "",
|
||||
"label": source,
|
||||
},
|
||||
)
|
||||
active_sources.add(source_name)
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": token,
|
||||
"base_url": pconfig.inference_base_url if pconfig else "",
|
||||
"label": source,
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Copilot token seed failed: %s", exc)
|
||||
|
||||
@@ -1100,21 +1096,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
token = creds.get("api_key", "")
|
||||
if token:
|
||||
source_name = creds.get("source", "qwen-cli")
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"access_token": token,
|
||||
"expires_at_ms": creds.get("expires_at_ms"),
|
||||
"base_url": creds.get("base_url", ""),
|
||||
"label": creds.get("auth_file", source_name),
|
||||
},
|
||||
)
|
||||
active_sources.add(source_name)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"access_token": token,
|
||||
"expires_at_ms": creds.get("expires_at_ms"),
|
||||
"base_url": creds.get("base_url", ""),
|
||||
"label": creds.get("auth_file", source_name),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Qwen OAuth token seed failed: %s", exc)
|
||||
|
||||
@@ -1123,7 +1118,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
# the device_code source as suppressed so it won't be re-seeded from
|
||||
# the Hermes auth store. Without this gate the removal is instantly
|
||||
# undone on the next load_pool() call.
|
||||
if _is_suppressed(provider, "device_code"):
|
||||
codex_suppressed = False
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
codex_suppressed = is_source_suppressed(provider, "device_code")
|
||||
except ImportError:
|
||||
pass
|
||||
if codex_suppressed:
|
||||
return changed, active_sources
|
||||
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
@@ -1157,22 +1158,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
|
||||
changed = False
|
||||
active_sources: Set[str] = set()
|
||||
# Honour user suppression — `hermes auth remove <provider> <N>` for an
|
||||
# env-seeded credential marks the env:<VAR> source as suppressed so it
|
||||
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
|
||||
# Without this gate the removal is silently undone on the next
|
||||
# load_pool() call whenever the var is still exported by the shell.
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
|
||||
except ImportError:
|
||||
def _is_source_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
if provider == "openrouter":
|
||||
token = os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||
if token:
|
||||
source = "env:OPENROUTER_API_KEY"
|
||||
if _is_source_suppressed(provider, source):
|
||||
return changed, active_sources
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
@@ -1209,8 +1198,6 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
if not token:
|
||||
continue
|
||||
source = f"env:{env_var}"
|
||||
if _is_source_suppressed(provider, source):
|
||||
continue
|
||||
active_sources.add(source)
|
||||
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
|
||||
base_url = env_url or pconfig.inference_base_url
|
||||
@@ -1255,13 +1242,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
|
||||
changed = False
|
||||
active_sources: Set[str] = set()
|
||||
|
||||
# Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed as _is_suppressed
|
||||
except ImportError:
|
||||
def _is_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
|
||||
# Seed from the custom_providers config entry's api_key field
|
||||
cp_config = _get_custom_provider_config(pool_key)
|
||||
if cp_config:
|
||||
@@ -1270,20 +1250,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
|
||||
name = str(cp_config.get("name") or "").strip()
|
||||
if api_key:
|
||||
source = f"config:{name}"
|
||||
if not _is_suppressed(pool_key, source):
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
pool_key,
|
||||
source,
|
||||
{
|
||||
"source": source,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": api_key,
|
||||
"base_url": base_url,
|
||||
"label": name or source,
|
||||
},
|
||||
)
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
pool_key,
|
||||
source,
|
||||
{
|
||||
"source": source,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": api_key,
|
||||
"base_url": base_url,
|
||||
"label": name or source,
|
||||
},
|
||||
)
|
||||
|
||||
# Seed from model.api_key if model.provider=='custom' and model.base_url matches
|
||||
try:
|
||||
@@ -1303,20 +1282,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
|
||||
matched_key = get_custom_provider_pool_key(model_base_url)
|
||||
if matched_key == pool_key:
|
||||
source = "model_config"
|
||||
if not _is_suppressed(pool_key, source):
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
pool_key,
|
||||
source,
|
||||
{
|
||||
"source": source,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": model_api_key,
|
||||
"base_url": model_base_url,
|
||||
"label": "model_config",
|
||||
},
|
||||
)
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
pool_key,
|
||||
source,
|
||||
{
|
||||
"source": source,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": model_api_key,
|
||||
"base_url": model_base_url,
|
||||
"label": "model_config",
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@@ -1,401 +0,0 @@
|
||||
"""Unified removal contract for every credential source Hermes reads from.
|
||||
|
||||
Hermes seeds its credential pool from many places:
|
||||
|
||||
env:<VAR> — os.environ / ~/.hermes/.env
|
||||
claude_code — ~/.claude/.credentials.json
|
||||
hermes_pkce — ~/.hermes/.anthropic_oauth.json
|
||||
device_code — auth.json providers.<provider> (nous, openai-codex, ...)
|
||||
qwen-cli — ~/.qwen/oauth_creds.json
|
||||
gh_cli — gh auth token
|
||||
config:<name> — custom_providers config entry
|
||||
model_config — model.api_key when model.provider == "custom"
|
||||
manual — user ran `hermes auth add`
|
||||
|
||||
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
|
||||
(which keep their existing shape — we haven't restructured them). What we
|
||||
unify here is **removal**:
|
||||
|
||||
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
|
||||
|
||||
Before this module, every source had an ad-hoc removal branch in
|
||||
``auth_remove_command``, and several sources had no branch at all — so
|
||||
``auth remove`` silently reverted on the next ``load_pool()`` call for
|
||||
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
|
||||
custom-config sources.
|
||||
|
||||
Now every source registers a ``RemovalStep`` that does exactly three things
|
||||
in the same shape:
|
||||
|
||||
1. Clean up whatever externally-readable state the source reads from
|
||||
(.env line, auth.json block, OAuth file, etc.)
|
||||
2. Suppress the ``(provider, source_id)`` in auth.json so the
|
||||
corresponding ``_seed_from_*`` branch skips the upsert on re-load
|
||||
3. Return ``RemovalResult`` describing what was cleaned and any
|
||||
diagnostic hints the user should see (shell-exported env vars,
|
||||
external credential files we deliberately don't delete, etc.)
|
||||
|
||||
Adding a new credential source is:
|
||||
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
|
||||
- gate that reader behind ``is_source_suppressed(provider, source_id)``
|
||||
- register a ``RemovalStep`` here
|
||||
|
||||
No more per-source if/elif chain in ``auth_remove_command``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemovalResult:
|
||||
"""Outcome of removing a credential source.
|
||||
|
||||
Attributes:
|
||||
cleaned: Short strings describing external state that was actually
|
||||
mutated (``"Cleared XAI_API_KEY from .env"``,
|
||||
``"Cleared openai-codex OAuth tokens from auth store"``).
|
||||
Printed as plain lines to the user.
|
||||
hints: Diagnostic lines ABOUT state the user may need to clean up
|
||||
themselves or is deliberately left intact (shell-exported env
|
||||
var, Claude Code credential file we don't delete, etc.).
|
||||
Printed as plain lines to the user. Always non-destructive.
|
||||
suppress: Whether to call ``suppress_credential_source`` after
|
||||
cleanup so future ``load_pool`` calls skip this source.
|
||||
Default True — almost every source needs this to stay sticky.
|
||||
The only legitimate False is ``manual`` entries, which aren't
|
||||
seeded from anywhere external.
|
||||
"""
|
||||
|
||||
cleaned: List[str] = field(default_factory=list)
|
||||
hints: List[str] = field(default_factory=list)
|
||||
suppress: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemovalStep:
|
||||
"""How to remove one specific credential source cleanly.
|
||||
|
||||
Attributes:
|
||||
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
|
||||
Special value ``"*"`` means "matches any provider" — used for
|
||||
sources like ``manual`` that aren't provider-specific.
|
||||
source_id: Source identifier as it appears in
|
||||
``PooledCredential.source``. May be a literal (``"claude_code"``)
|
||||
or a prefix pattern matched via ``match_fn``.
|
||||
match_fn: Optional predicate overriding literal ``source_id``
|
||||
matching. Gets the removed entry's source string. Used for
|
||||
``env:*`` (any env-seeded key), ``config:*`` (any custom
|
||||
pool), and ``manual:*`` (any manual-source variant).
|
||||
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
|
||||
actual cleanup and returns what happened for the user.
|
||||
description: One-line human-readable description for docs / tests.
|
||||
"""
|
||||
|
||||
provider: str
|
||||
source_id: str
|
||||
remove_fn: Callable[..., RemovalResult]
|
||||
match_fn: Optional[Callable[[str], bool]] = None
|
||||
description: str = ""
|
||||
|
||||
def matches(self, provider: str, source: str) -> bool:
|
||||
if self.provider != "*" and self.provider != provider:
|
||||
return False
|
||||
if self.match_fn is not None:
|
||||
return self.match_fn(source)
|
||||
return source == self.source_id
|
||||
|
||||
|
||||
_REGISTRY: List[RemovalStep] = []
|
||||
|
||||
|
||||
def register(step: RemovalStep) -> RemovalStep:
|
||||
_REGISTRY.append(step)
|
||||
return step
|
||||
|
||||
|
||||
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
|
||||
"""Return the first matching RemovalStep, or None if unregistered.
|
||||
|
||||
Unregistered sources fall through to the default remove path in
|
||||
``auth_remove_command``: the pool entry is already gone (that happens
|
||||
before dispatch), no external cleanup, no suppression. This is the
|
||||
correct behaviour for ``manual`` entries — they were only ever stored
|
||||
in the pool, nothing external to clean up.
|
||||
"""
|
||||
for step in _REGISTRY:
|
||||
if step.matches(provider, source):
|
||||
return step
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Individual RemovalStep implementations — one per source.
|
||||
# ---------------------------------------------------------------------------
|
||||
# Each remove_fn is intentionally small and single-purpose. Adding a new
|
||||
# credential source means adding ONE entry here — no other changes to
|
||||
# auth_remove_command.
|
||||
|
||||
|
||||
def _remove_env_source(provider: str, removed) -> RemovalResult:
|
||||
"""env:<VAR> — the most common case.
|
||||
|
||||
Handles three user situations:
|
||||
1. Var lives only in ~/.hermes/.env → clear it
|
||||
2. Var lives only in the user's shell (shell profile, systemd
|
||||
EnvironmentFile, launchd plist) → hint them where to unset it
|
||||
3. Var lives in both → clear from .env, hint about shell
|
||||
"""
|
||||
from hermes_cli.config import get_env_path, remove_env_value
|
||||
|
||||
result = RemovalResult()
|
||||
env_var = removed.source[len("env:"):]
|
||||
if not env_var:
|
||||
return result
|
||||
|
||||
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
|
||||
env_in_process = bool(os.getenv(env_var))
|
||||
env_in_dotenv = False
|
||||
try:
|
||||
env_path = get_env_path()
|
||||
if env_path.exists():
|
||||
env_in_dotenv = any(
|
||||
line.strip().startswith(f"{env_var}=")
|
||||
for line in env_path.read_text(errors="replace").splitlines()
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
shell_exported = env_in_process and not env_in_dotenv
|
||||
|
||||
cleared = remove_env_value(env_var)
|
||||
if cleared:
|
||||
result.cleaned.append(f"Cleared {env_var} from .env")
|
||||
|
||||
if shell_exported:
|
||||
result.hints.extend([
|
||||
f"Note: {env_var} is still set in your shell environment "
|
||||
f"(not in ~/.hermes/.env).",
|
||||
" Unset it there (shell profile, systemd EnvironmentFile, "
|
||||
"launchd plist, etc.) or it will keep being visible to Hermes.",
|
||||
f" The pool entry is now suppressed — Hermes will ignore "
|
||||
f"{env_var} until you run `hermes auth add {provider}`.",
|
||||
])
|
||||
else:
|
||||
result.hints.append(
|
||||
f"Suppressed env:{env_var} — it will not be re-seeded even "
|
||||
f"if the variable is re-exported later."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _remove_claude_code(provider: str, removed) -> RemovalResult:
|
||||
"""~/.claude/.credentials.json is owned by Claude Code itself.
|
||||
|
||||
We don't delete it — the user's Claude Code install still needs to
|
||||
work. We just suppress it so Hermes stops reading it.
|
||||
"""
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed claude_code credential — it will not be re-seeded.",
|
||||
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
|
||||
"Run `hermes auth add anthropic` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
|
||||
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
result = RemovalResult()
|
||||
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
|
||||
if oauth_file.exists():
|
||||
try:
|
||||
oauth_file.unlink()
|
||||
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
|
||||
except OSError as exc:
|
||||
result.hints.append(f"Could not delete {oauth_file}: {exc}")
|
||||
return result
|
||||
|
||||
|
||||
def _clear_auth_store_provider(provider: str) -> bool:
|
||||
"""Delete auth_store.providers[provider]. Returns True if deleted."""
|
||||
from hermes_cli.auth import (
|
||||
_auth_store_lock,
|
||||
_load_auth_store,
|
||||
_save_auth_store,
|
||||
)
|
||||
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
providers_dict = auth_store.get("providers")
|
||||
if isinstance(providers_dict, dict) and provider in providers_dict:
|
||||
del providers_dict[provider]
|
||||
_save_auth_store(auth_store)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
|
||||
|
||||
We suppress in addition to clearing because nothing else stops the
|
||||
user's next `hermes login` run from writing providers.nous again
|
||||
before they decide to. Suppression forces them to go through
|
||||
`hermes auth add nous` to re-engage, which is the documented re-add
|
||||
path and clears the suppression atomically.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
|
||||
|
||||
refresh_codex_oauth_pure() writes both every time, so clearing only
|
||||
the Hermes auth store is not enough — _seed_from_singletons() would
|
||||
re-import from ~/.codex/auth.json on the next load_pool() call and
|
||||
the removal would be instantly undone. We suppress instead of
|
||||
deleting Codex CLI's file, so the Codex CLI itself keeps working.
|
||||
|
||||
The canonical source name in ``_seed_from_singletons`` is
|
||||
``"device_code"`` (no prefix). Entries may show up in the pool as
|
||||
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
|
||||
via ``hermes auth add openai-codex``), but in both cases the re-seed
|
||||
gate lives at the ``"device_code"`` suppression key. We suppress
|
||||
that canonical key here; the central dispatcher also suppresses
|
||||
``removed.source`` which is fine — belt-and-suspenders, idempotent.
|
||||
"""
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
# Suppress the canonical re-seed source, not just whatever source the
|
||||
# removed entry had. Otherwise `manual:device_code` removals wouldn't
|
||||
# block the `device_code` re-seed path.
|
||||
suppress_credential_source(provider, "device_code")
|
||||
result.hints.extend([
|
||||
"Suppressed openai-codex device_code source — it will not be re-seeded.",
|
||||
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
|
||||
"Run `hermes auth add openai-codex` to re-enable if needed.",
|
||||
])
|
||||
return result
|
||||
|
||||
|
||||
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
|
||||
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
|
||||
|
||||
Same pattern as claude_code — suppress, don't delete. The user's
|
||||
Qwen CLI install still reads from that file.
|
||||
"""
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed qwen-cli credential — it will not be re-seeded.",
|
||||
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
|
||||
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
|
||||
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
|
||||
|
||||
Copilot is special: the same token can be seeded as multiple source
|
||||
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
|
||||
``_seed_from_env``), so removing one entry without suppressing the
|
||||
others lets the duplicates resurrect. We suppress ALL known copilot
|
||||
sources here so removal is stable regardless of which entry the
|
||||
user clicked.
|
||||
|
||||
We don't touch the user's gh CLI or shell state — just suppress so
|
||||
Hermes stops picking the token up.
|
||||
"""
|
||||
# Suppress ALL copilot source variants up-front so no path resurrects
|
||||
# the pool entry. The central dispatcher in auth_remove_command will
|
||||
# ALSO suppress removed.source, but it's idempotent so double-calling
|
||||
# is harmless.
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
suppress_credential_source(provider, "gh_cli")
|
||||
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
|
||||
suppress_credential_source(provider, f"env:{env_var}")
|
||||
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
|
||||
"Note: Your gh CLI / shell environment is unchanged.",
|
||||
"Run `hermes auth add copilot` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_custom_config(provider: str, removed) -> RemovalResult:
|
||||
"""Custom provider pools are seeded from custom_providers config or
|
||||
model.api_key. Both are in config.yaml — modifying that from here
|
||||
is more invasive than suppression. We suppress; the user can edit
|
||||
config.yaml if they want to remove the key from disk entirely.
|
||||
"""
|
||||
source_label = removed.source
|
||||
return RemovalResult(hints=[
|
||||
f"Suppressed {source_label} — it will not be re-seeded.",
|
||||
"Note: The underlying value in config.yaml is unchanged. Edit it "
|
||||
"directly if you want to remove the credential from disk.",
|
||||
])
|
||||
|
||||
|
||||
def _register_all_sources() -> None:
|
||||
"""Called once on module import.
|
||||
|
||||
ORDER MATTERS — ``find_removal_step`` returns the first match. Put
|
||||
provider-specific steps before the generic ``env:*`` step so that e.g.
|
||||
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
|
||||
doesn't touch the user's shell), not the generic env-var removal
|
||||
(which would try to clear .env).
|
||||
"""
|
||||
register(RemovalStep(
|
||||
provider="copilot", source_id="gh_cli",
|
||||
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
|
||||
remove_fn=_remove_copilot_gh,
|
||||
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="env:",
|
||||
match_fn=lambda src: src.startswith("env:"),
|
||||
remove_fn=_remove_env_source,
|
||||
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="anthropic", source_id="claude_code",
|
||||
remove_fn=_remove_claude_code,
|
||||
description="~/.claude/.credentials.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="anthropic", source_id="hermes_pkce",
|
||||
remove_fn=_remove_hermes_pkce,
|
||||
description="~/.hermes/.anthropic_oauth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="nous", source_id="device_code",
|
||||
remove_fn=_remove_nous_device_code,
|
||||
description="auth.json providers.nous",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="openai-codex", source_id="device_code",
|
||||
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
|
||||
remove_fn=_remove_codex_device_code,
|
||||
description="auth.json providers.openai-codex + ~/.codex/auth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="qwen-oauth", source_id="qwen-cli",
|
||||
remove_fn=_remove_qwen_cli,
|
||||
description="~/.qwen/oauth_creds.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="config:",
|
||||
match_fn=lambda src: src.startswith("config:") or src == "model_config",
|
||||
remove_fn=_remove_custom_config,
|
||||
description="Custom provider config.yaml api_key field",
|
||||
))
|
||||
|
||||
|
||||
_register_all_sources()
|
||||
+4
-10
@@ -225,11 +225,9 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
|
||||
content = _oneline(args.get("content", ""))
|
||||
return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
|
||||
elif action == "replace":
|
||||
old = _oneline(args.get("old_text") or "") or "<missing old_text>"
|
||||
return f"~{target}: \"{old[:20]}\""
|
||||
return f"~{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
|
||||
elif action == "remove":
|
||||
old = _oneline(args.get("old_text") or "") or "<missing old_text>"
|
||||
return f"-{target}: \"{old[:20]}\""
|
||||
return f"-{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
|
||||
return action
|
||||
|
||||
if tool_name == "send_message":
|
||||
@@ -941,13 +939,9 @@ def get_cute_tool_message(
|
||||
if action == "add":
|
||||
return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
|
||||
elif action == "replace":
|
||||
old = args.get("old_text") or ""
|
||||
old = old if old else "<missing old_text>"
|
||||
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(old, 20)}\" {dur}")
|
||||
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
|
||||
elif action == "remove":
|
||||
old = args.get("old_text") or ""
|
||||
old = old if old else "<missing old_text>"
|
||||
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(old, 20)}\" {dur}")
|
||||
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
|
||||
return _wrap(f"┊ 🧠 memory {action} {dur}")
|
||||
if tool_name == "skills_list":
|
||||
return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
|
||||
|
||||
+15
-79
@@ -220,25 +220,12 @@ _TRANSPORT_ERROR_TYPES = frozenset({
|
||||
"ConnectionAbortedError", "BrokenPipeError",
|
||||
"TimeoutError", "ReadError",
|
||||
"ServerDisconnectedError",
|
||||
# SSL/TLS transport errors — transient mid-stream handshake/record
|
||||
# failures that should retry rather than surface as a stalled session.
|
||||
# ssl.SSLError subclasses OSError (caught by isinstance) but we list
|
||||
# the type names here so provider-wrapped SSL errors (e.g. when the
|
||||
# SDK re-raises without preserving the exception chain) still classify
|
||||
# as transport rather than falling through to the unknown bucket.
|
||||
"SSLError", "SSLZeroReturnError", "SSLWantReadError",
|
||||
"SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
|
||||
# OpenAI SDK errors (not subclasses of Python builtins)
|
||||
"APIConnectionError",
|
||||
"APITimeoutError",
|
||||
})
|
||||
|
||||
# Server disconnect patterns (no status code, but transport-level).
|
||||
# These are the "ambiguous" patterns — a plain connection close could be
|
||||
# transient transport hiccup OR server-side context overflow rejection
|
||||
# (common when the API gateway disconnects instead of returning an HTTP
|
||||
# error for oversized requests). A large session + one of these patterns
|
||||
# triggers the context-overflow-with-compression recovery path.
|
||||
# Server disconnect patterns (no status code, but transport-level)
|
||||
_SERVER_DISCONNECT_PATTERNS = [
|
||||
"server disconnected",
|
||||
"peer closed connection",
|
||||
@@ -249,40 +236,6 @@ _SERVER_DISCONNECT_PATTERNS = [
|
||||
"incomplete chunked read",
|
||||
]
|
||||
|
||||
# SSL/TLS transient failure patterns — intentionally distinct from
|
||||
# _SERVER_DISCONNECT_PATTERNS above.
|
||||
#
|
||||
# An SSL alert mid-stream is almost always a transport-layer hiccup
|
||||
# (flaky network, mid-session TLS renegotiation failure, load balancer
|
||||
# dropping the connection) — NOT a server-side context overflow signal.
|
||||
# So we want the retry path but NOT the compression path; lumping these
|
||||
# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
|
||||
# expensive) context compression on any large-session SSL hiccup.
|
||||
#
|
||||
# The OpenSSL library constructs error codes by prepending a format string
|
||||
# to the uppercased alert reason; OpenSSL 3.x changed the separator
|
||||
# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
|
||||
# which silently stopped matching anything explicit. Matching on the
|
||||
# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
|
||||
# survives future OpenSSL format churn without code changes.
|
||||
_SSL_TRANSIENT_PATTERNS = [
|
||||
# Space-separated (human-readable form, Python ssl module, most SDKs)
|
||||
"bad record mac",
|
||||
"ssl alert",
|
||||
"tls alert",
|
||||
"ssl handshake failure",
|
||||
"tlsv1 alert",
|
||||
"sslv3 alert",
|
||||
# Underscore-separated (OpenSSL error code tokens, e.g.
|
||||
# `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
|
||||
"bad_record_mac",
|
||||
"ssl_alert",
|
||||
"tls_alert",
|
||||
"tls_alert_internal_error",
|
||||
# Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
|
||||
"[ssl:",
|
||||
]
|
||||
|
||||
|
||||
# ── Classification pipeline ─────────────────────────────────────────────
|
||||
|
||||
@@ -302,10 +255,9 @@ def classify_api_error(
|
||||
2. HTTP status code + message-aware refinement
|
||||
3. Error code classification (from body)
|
||||
4. Message pattern matching (billing vs rate_limit vs context vs auth)
|
||||
5. SSL/TLS transient alert patterns → retry as timeout
|
||||
5. Transport error heuristics
|
||||
6. Server disconnect + large session → context overflow
|
||||
7. Transport error heuristics
|
||||
8. Fallback: unknown (retryable with backoff)
|
||||
7. Fallback: unknown (retryable with backoff)
|
||||
|
||||
Args:
|
||||
error: The exception from the API call.
|
||||
@@ -338,7 +290,7 @@ def classify_api_error(
|
||||
if isinstance(body, dict):
|
||||
_err_obj = body.get("error", {})
|
||||
if isinstance(_err_obj, dict):
|
||||
_body_msg = str(_err_obj.get("message") or "").lower()
|
||||
_body_msg = (_err_obj.get("message") or "").lower()
|
||||
# Parse metadata.raw for wrapped provider errors
|
||||
_metadata = _err_obj.get("metadata", {})
|
||||
if isinstance(_metadata, dict):
|
||||
@@ -350,11 +302,11 @@ def classify_api_error(
|
||||
if isinstance(_inner, dict):
|
||||
_inner_err = _inner.get("error", {})
|
||||
if isinstance(_inner_err, dict):
|
||||
_metadata_msg = str(_inner_err.get("message") or "").lower()
|
||||
_metadata_msg = (_inner_err.get("message") or "").lower()
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
if not _body_msg:
|
||||
_body_msg = str(body.get("message") or "").lower()
|
||||
_body_msg = (body.get("message") or "").lower()
|
||||
# Combine all message sources for pattern matching
|
||||
parts = [_raw_msg]
|
||||
if _body_msg and _body_msg not in _raw_msg:
|
||||
@@ -436,18 +388,7 @@ def classify_api_error(
|
||||
if classified is not None:
|
||||
return classified
|
||||
|
||||
# ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
|
||||
# SSL alerts mid-stream are transport hiccups, not server-side context
|
||||
# overflow signals. Classify before the disconnect check so a large
|
||||
# session doesn't incorrectly trigger context compression when the real
|
||||
# cause is a flaky TLS handshake. Also matches when the error is
|
||||
# wrapped in a generic exception whose message string carries the SSL
|
||||
# alert text but the type isn't ssl.SSLError (happens with some SDKs
|
||||
# that re-raise without chaining).
|
||||
if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 6. Server disconnect + large session → context overflow ─────
|
||||
# ── 5. Server disconnect + large session → context overflow ─────
|
||||
# Must come BEFORE generic transport error catch — a disconnect on
|
||||
# a large session is more likely context overflow than a transient
|
||||
# transport hiccup. Without this ordering, RemoteProtocolError
|
||||
@@ -464,12 +405,12 @@ def classify_api_error(
|
||||
)
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 7. Transport / timeout heuristics ───────────────────────────
|
||||
# ── 6. Transport / timeout heuristics ───────────────────────────
|
||||
|
||||
if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
|
||||
return _result(FailoverReason.timeout, retryable=True)
|
||||
|
||||
# ── 8. Fallback: unknown ────────────────────────────────────────
|
||||
# ── 7. Fallback: unknown ────────────────────────────────────────
|
||||
|
||||
return _result(FailoverReason.unknown, retryable=True)
|
||||
|
||||
@@ -529,16 +470,11 @@ def _classify_by_status(
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
# Generic 404 with no "model not found" signal — could be a wrong
|
||||
# endpoint path (common with local llama.cpp / Ollama / vLLM when
|
||||
# the URL is slightly misconfigured), a proxy routing glitch, or
|
||||
# a transient backend issue. Classifying these as model_not_found
|
||||
# silently falls back to a different provider and tells the model
|
||||
# the model is missing, which is wrong and wastes a turn. Treat
|
||||
# as unknown so the retry loop surfaces the real error instead.
|
||||
# Generic 404 — could be model or endpoint
|
||||
return result_fn(
|
||||
FailoverReason.unknown,
|
||||
retryable=True,
|
||||
FailoverReason.model_not_found,
|
||||
retryable=False,
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
if status_code == 413:
|
||||
@@ -670,10 +606,10 @@ def _classify_400(
|
||||
if isinstance(body, dict):
|
||||
err_obj = body.get("error", {})
|
||||
if isinstance(err_obj, dict):
|
||||
err_body_msg = str(err_obj.get("message") or "").strip().lower()
|
||||
err_body_msg = (err_obj.get("message") or "").strip().lower()
|
||||
# Responses API (and some providers) use flat body: {"message": "..."}
|
||||
if not err_body_msg:
|
||||
err_body_msg = str(body.get("message") or "").strip().lower()
|
||||
err_body_msg = (body.get("message") or "").strip().lower()
|
||||
is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
|
||||
is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80
|
||||
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
"""Shared file safety rules used by both tools and ACP shims."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _hermes_home_path() -> Path:
|
||||
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
|
||||
try:
|
||||
from hermes_constants import get_hermes_home # local import to avoid cycles
|
||||
return get_hermes_home()
|
||||
except Exception:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def build_write_denied_paths(home: str) -> set[str]:
|
||||
"""Return exact sensitive paths that must never be written."""
|
||||
hermes_home = _hermes_home_path()
|
||||
return {
|
||||
os.path.realpath(p)
|
||||
for p in [
|
||||
os.path.join(home, ".ssh", "authorized_keys"),
|
||||
os.path.join(home, ".ssh", "id_rsa"),
|
||||
os.path.join(home, ".ssh", "id_ed25519"),
|
||||
os.path.join(home, ".ssh", "config"),
|
||||
str(hermes_home / ".env"),
|
||||
os.path.join(home, ".bashrc"),
|
||||
os.path.join(home, ".zshrc"),
|
||||
os.path.join(home, ".profile"),
|
||||
os.path.join(home, ".bash_profile"),
|
||||
os.path.join(home, ".zprofile"),
|
||||
os.path.join(home, ".netrc"),
|
||||
os.path.join(home, ".pgpass"),
|
||||
os.path.join(home, ".npmrc"),
|
||||
os.path.join(home, ".pypirc"),
|
||||
"/etc/sudoers",
|
||||
"/etc/passwd",
|
||||
"/etc/shadow",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def build_write_denied_prefixes(home: str) -> list[str]:
|
||||
"""Return sensitive directory prefixes that must never be written."""
|
||||
return [
|
||||
os.path.realpath(p) + os.sep
|
||||
for p in [
|
||||
os.path.join(home, ".ssh"),
|
||||
os.path.join(home, ".aws"),
|
||||
os.path.join(home, ".gnupg"),
|
||||
os.path.join(home, ".kube"),
|
||||
"/etc/sudoers.d",
|
||||
"/etc/systemd",
|
||||
os.path.join(home, ".docker"),
|
||||
os.path.join(home, ".azure"),
|
||||
os.path.join(home, ".config", "gh"),
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def get_safe_write_root() -> Optional[str]:
|
||||
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
|
||||
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
|
||||
if not root:
|
||||
return None
|
||||
try:
|
||||
return os.path.realpath(os.path.expanduser(root))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def is_write_denied(path: str) -> bool:
|
||||
"""Return True if path is blocked by the write denylist or safe root."""
|
||||
home = os.path.realpath(os.path.expanduser("~"))
|
||||
resolved = os.path.realpath(os.path.expanduser(str(path)))
|
||||
|
||||
if resolved in build_write_denied_paths(home):
|
||||
return True
|
||||
for prefix in build_write_denied_prefixes(home):
|
||||
if resolved.startswith(prefix):
|
||||
return True
|
||||
|
||||
safe_root = get_safe_write_root()
|
||||
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_read_block_error(path: str) -> Optional[str]:
|
||||
"""Return an error message when a read targets internal Hermes cache files."""
|
||||
resolved = Path(path).expanduser().resolve()
|
||||
hermes_home = _hermes_home_path().resolve()
|
||||
blocked_dirs = [
|
||||
hermes_home / "skills" / ".hub" / "index-cache",
|
||||
hermes_home / "skills" / ".hub",
|
||||
]
|
||||
for blocked in blocked_dirs:
|
||||
try:
|
||||
resolved.relative_to(blocked)
|
||||
except ValueError:
|
||||
continue
|
||||
return (
|
||||
f"Access denied: {path} is an internal Hermes cache file "
|
||||
"and cannot be read directly to prevent prompt injection. "
|
||||
"Use the skills_list or skill_view tools instead."
|
||||
)
|
||||
return None
|
||||
@@ -39,7 +39,6 @@ from typing import Any, Dict, Iterator, List, Optional
|
||||
import httpx
|
||||
|
||||
from agent import google_oauth
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
from agent.google_code_assist import (
|
||||
CODE_ASSIST_ENDPOINT,
|
||||
FREE_TIER_ID,
|
||||
@@ -206,7 +205,7 @@ def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
|
||||
decl["description"] = str(fn["description"])
|
||||
params = fn.get("parameters")
|
||||
if isinstance(params, dict):
|
||||
decl["parameters"] = sanitize_gemini_tool_parameters(params)
|
||||
decl["parameters"] = params
|
||||
declarations.append(decl)
|
||||
if not declarations:
|
||||
return []
|
||||
@@ -505,16 +504,9 @@ def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
|
||||
def _translate_stream_event(
|
||||
event: Dict[str, Any],
|
||||
model: str,
|
||||
tool_call_counter: List[int],
|
||||
tool_call_indices: Dict[str, int],
|
||||
) -> List[_GeminiStreamChunk]:
|
||||
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s).
|
||||
|
||||
``tool_call_counter`` is a single-element list used as a mutable counter
|
||||
across events in the same stream. Each ``functionCall`` part gets a
|
||||
fresh, unique OpenAI ``index`` — keying by function name would collide
|
||||
whenever the model issues parallel calls to the same tool (e.g. reading
|
||||
three files in one turn).
|
||||
"""
|
||||
"""Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s)."""
|
||||
inner = event.get("response") if isinstance(event.get("response"), dict) else event
|
||||
candidates = inner.get("candidates") or []
|
||||
if not candidates:
|
||||
@@ -540,8 +532,7 @@ def _translate_stream_event(
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
name = str(fc["name"])
|
||||
idx = tool_call_counter[0]
|
||||
tool_call_counter[0] += 1
|
||||
idx = tool_call_indices.setdefault(name, len(tool_call_indices))
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
|
||||
except (TypeError, ValueError):
|
||||
@@ -558,7 +549,7 @@ def _translate_stream_event(
|
||||
finish_reason_raw = str(cand.get("finishReason") or "")
|
||||
if finish_reason_raw:
|
||||
mapped = _map_gemini_finish_reason(finish_reason_raw)
|
||||
if tool_call_counter[0] > 0:
|
||||
if tool_call_indices:
|
||||
mapped = "tool_calls"
|
||||
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
|
||||
return chunks
|
||||
@@ -742,9 +733,9 @@ class GeminiCloudCodeClient:
|
||||
# Materialize error body for better diagnostics
|
||||
response.read()
|
||||
raise _gemini_http_error(response)
|
||||
tool_call_counter: List[int] = [0]
|
||||
tool_call_indices: Dict[str, int] = {}
|
||||
for event in _iter_sse_events(response):
|
||||
for chunk in _translate_stream_event(event, model, tool_call_counter):
|
||||
for chunk in _translate_stream_event(event, model, tool_call_indices):
|
||||
yield chunk
|
||||
except httpx.HTTPError as exc:
|
||||
raise CodeAssistError(
|
||||
@@ -799,8 +790,7 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
|
||||
err_obj = {}
|
||||
err_status = str(err_obj.get("status") or "").strip()
|
||||
err_message = str(err_obj.get("message") or "").strip()
|
||||
_raw_details = err_obj.get("details")
|
||||
err_details_list = _raw_details if isinstance(_raw_details, list) else []
|
||||
err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
|
||||
|
||||
# Extract google.rpc.ErrorInfo reason + metadata. There may be more
|
||||
# than one ErrorInfo (rare), so we pick the first one with a reason.
|
||||
|
||||
@@ -1,847 +0,0 @@
|
||||
"""OpenAI-compatible facade over Google AI Studio's native Gemini API.
|
||||
|
||||
Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the
|
||||
main agent loop can keep using its existing OpenAI-shaped message flow.
|
||||
This adapter is the transport shim that converts those OpenAI-style
|
||||
``messages[]`` / ``tools[]`` requests into Gemini's native
|
||||
``models/{model}:generateContent`` schema and converts the responses back.
|
||||
|
||||
Why this exists
|
||||
---------------
|
||||
Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn
|
||||
agent/tool loop (auth churn, tool-call replay quirks, thought-signature
|
||||
requirements). The native Gemini API is the canonical path and avoids the
|
||||
OpenAI-compat layer entirely.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
|
||||
|
||||
|
||||
def is_native_gemini_base_url(base_url: str) -> bool:
|
||||
"""Return True when the endpoint speaks Gemini's native REST API."""
|
||||
normalized = str(base_url or "").strip().rstrip("/").lower()
|
||||
if not normalized:
|
||||
return False
|
||||
if "generativelanguage.googleapis.com" not in normalized:
|
||||
return False
|
||||
return not normalized.endswith("/openai")
|
||||
|
||||
|
||||
class GeminiAPIError(Exception):
|
||||
"""Error shape compatible with Hermes retry/error classification."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
code: str = "gemini_api_error",
|
||||
status_code: Optional[int] = None,
|
||||
response: Optional[httpx.Response] = None,
|
||||
retry_after: Optional[float] = None,
|
||||
details: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.code = code
|
||||
self.status_code = status_code
|
||||
self.response = response
|
||||
self.retry_after = retry_after
|
||||
self.details = details or {}
|
||||
|
||||
|
||||
def _coerce_content_to_text(content: Any) -> str:
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
pieces: List[str] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
pieces.append(part)
|
||||
elif isinstance(part, dict) and part.get("type") == "text":
|
||||
text = part.get("text")
|
||||
if isinstance(text, str):
|
||||
pieces.append(text)
|
||||
return "\n".join(pieces)
|
||||
return str(content)
|
||||
|
||||
|
||||
def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
if not isinstance(content, list):
|
||||
text = _coerce_content_to_text(content)
|
||||
return [{"text": text}] if text else []
|
||||
|
||||
parts: List[Dict[str, Any]] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
parts.append({"text": item})
|
||||
continue
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
ptype = item.get("type")
|
||||
if ptype == "text":
|
||||
text = item.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
parts.append({"text": text})
|
||||
elif ptype == "image_url":
|
||||
url = ((item.get("image_url") or {}).get("url") or "")
|
||||
if not isinstance(url, str) or not url.startswith("data:"):
|
||||
continue
|
||||
try:
|
||||
header, encoded = url.split(",", 1)
|
||||
mime = header.split(":", 1)[1].split(";", 1)[0]
|
||||
raw = base64.b64decode(encoded)
|
||||
except Exception:
|
||||
continue
|
||||
parts.append(
|
||||
{
|
||||
"inlineData": {
|
||||
"mimeType": mime,
|
||||
"data": base64.b64encode(raw).decode("ascii"),
|
||||
}
|
||||
}
|
||||
)
|
||||
return parts
|
||||
|
||||
|
||||
def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]:
|
||||
extra = tool_call.get("extra_content") or {}
|
||||
if not isinstance(extra, dict):
|
||||
return None
|
||||
google = extra.get("google") or extra.get("thought_signature")
|
||||
if isinstance(google, dict):
|
||||
sig = google.get("thought_signature") or google.get("thoughtSignature")
|
||||
return str(sig) if isinstance(sig, str) and sig else None
|
||||
if isinstance(google, str) and google:
|
||||
return google
|
||||
return None
|
||||
|
||||
|
||||
def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
|
||||
fn = tool_call.get("function") or {}
|
||||
args_raw = fn.get("arguments", "")
|
||||
try:
|
||||
args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
|
||||
except json.JSONDecodeError:
|
||||
args = {"_raw": args_raw}
|
||||
if not isinstance(args, dict):
|
||||
args = {"_value": args}
|
||||
|
||||
part: Dict[str, Any] = {
|
||||
"functionCall": {
|
||||
"name": str(fn.get("name") or ""),
|
||||
"args": args,
|
||||
}
|
||||
}
|
||||
thought_signature = _tool_call_extra_signature(tool_call)
|
||||
if thought_signature:
|
||||
part["thoughtSignature"] = thought_signature
|
||||
return part
|
||||
|
||||
|
||||
def _translate_tool_result_to_gemini(
|
||||
message: Dict[str, Any],
|
||||
tool_name_by_call_id: Optional[Dict[str, str]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
tool_name_by_call_id = tool_name_by_call_id or {}
|
||||
tool_call_id = str(message.get("tool_call_id") or "")
|
||||
name = str(
|
||||
message.get("name")
|
||||
or tool_name_by_call_id.get(tool_call_id)
|
||||
or tool_call_id
|
||||
or "tool"
|
||||
)
|
||||
content = _coerce_content_to_text(message.get("content"))
|
||||
try:
|
||||
parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
|
||||
except json.JSONDecodeError:
|
||||
parsed = None
|
||||
response = parsed if isinstance(parsed, dict) else {"output": content}
|
||||
return {
|
||||
"functionResponse": {
|
||||
"name": name,
|
||||
"response": response,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
|
||||
system_text_parts: List[str] = []
|
||||
contents: List[Dict[str, Any]] = []
|
||||
tool_name_by_call_id: Dict[str, str] = {}
|
||||
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
role = str(msg.get("role") or "user")
|
||||
|
||||
if role == "system":
|
||||
system_text_parts.append(_coerce_content_to_text(msg.get("content")))
|
||||
continue
|
||||
|
||||
if role in {"tool", "function"}:
|
||||
contents.append(
|
||||
{
|
||||
"role": "user",
|
||||
"parts": [
|
||||
_translate_tool_result_to_gemini(
|
||||
msg,
|
||||
tool_name_by_call_id=tool_name_by_call_id,
|
||||
)
|
||||
],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
gemini_role = "model" if role == "assistant" else "user"
|
||||
parts: List[Dict[str, Any]] = []
|
||||
|
||||
content_parts = _extract_multimodal_parts(msg.get("content"))
|
||||
parts.extend(content_parts)
|
||||
|
||||
tool_calls = msg.get("tool_calls") or []
|
||||
if isinstance(tool_calls, list):
|
||||
for tool_call in tool_calls:
|
||||
if isinstance(tool_call, dict):
|
||||
tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "")
|
||||
tool_name = str(((tool_call.get("function") or {}).get("name") or ""))
|
||||
if tool_call_id and tool_name:
|
||||
tool_name_by_call_id[tool_call_id] = tool_name
|
||||
parts.append(_translate_tool_call_to_gemini(tool_call))
|
||||
|
||||
if parts:
|
||||
contents.append({"role": gemini_role, "parts": parts})
|
||||
|
||||
system_instruction = None
|
||||
joined_system = "\n".join(part for part in system_text_parts if part).strip()
|
||||
if joined_system:
|
||||
system_instruction = {"parts": [{"text": joined_system}]}
|
||||
return contents, system_instruction
|
||||
|
||||
|
||||
def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
|
||||
if not isinstance(tools, list):
|
||||
return []
|
||||
declarations: List[Dict[str, Any]] = []
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
continue
|
||||
fn = tool.get("function") or {}
|
||||
if not isinstance(fn, dict):
|
||||
continue
|
||||
name = fn.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
decl: Dict[str, Any] = {"name": name}
|
||||
description = fn.get("description")
|
||||
if isinstance(description, str) and description:
|
||||
decl["description"] = description
|
||||
parameters = fn.get("parameters")
|
||||
if isinstance(parameters, dict):
|
||||
decl["parameters"] = sanitize_gemini_tool_parameters(parameters)
|
||||
declarations.append(decl)
|
||||
return [{"functionDeclarations": declarations}] if declarations else []
|
||||
|
||||
|
||||
def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
|
||||
if tool_choice is None:
|
||||
return None
|
||||
if isinstance(tool_choice, str):
|
||||
if tool_choice == "auto":
|
||||
return {"functionCallingConfig": {"mode": "AUTO"}}
|
||||
if tool_choice == "required":
|
||||
return {"functionCallingConfig": {"mode": "ANY"}}
|
||||
if tool_choice == "none":
|
||||
return {"functionCallingConfig": {"mode": "NONE"}}
|
||||
if isinstance(tool_choice, dict):
|
||||
fn = tool_choice.get("function") or {}
|
||||
name = fn.get("name")
|
||||
if isinstance(name, str) and name:
|
||||
return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}}
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
budget = config.get("thinkingBudget", config.get("thinking_budget"))
|
||||
include = config.get("includeThoughts", config.get("include_thoughts"))
|
||||
level = config.get("thinkingLevel", config.get("thinking_level"))
|
||||
normalized: Dict[str, Any] = {}
|
||||
if isinstance(budget, (int, float)):
|
||||
normalized["thinkingBudget"] = int(budget)
|
||||
if isinstance(include, bool):
|
||||
normalized["includeThoughts"] = include
|
||||
if isinstance(level, str) and level.strip():
|
||||
normalized["thinkingLevel"] = level.strip().lower()
|
||||
return normalized or None
|
||||
|
||||
|
||||
def build_gemini_request(
|
||||
*,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
thinking_config: Any = None,
|
||||
) -> Dict[str, Any]:
|
||||
contents, system_instruction = _build_gemini_contents(messages)
|
||||
request: Dict[str, Any] = {"contents": contents}
|
||||
if system_instruction:
|
||||
request["systemInstruction"] = system_instruction
|
||||
|
||||
gemini_tools = _translate_tools_to_gemini(tools)
|
||||
if gemini_tools:
|
||||
request["tools"] = gemini_tools
|
||||
|
||||
tool_config = _translate_tool_choice_to_gemini(tool_choice)
|
||||
if tool_config:
|
||||
request["toolConfig"] = tool_config
|
||||
|
||||
generation_config: Dict[str, Any] = {}
|
||||
if temperature is not None:
|
||||
generation_config["temperature"] = temperature
|
||||
if max_tokens is not None:
|
||||
generation_config["maxOutputTokens"] = max_tokens
|
||||
if top_p is not None:
|
||||
generation_config["topP"] = top_p
|
||||
if stop:
|
||||
generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)]
|
||||
normalized_thinking = _normalize_thinking_config(thinking_config)
|
||||
if normalized_thinking:
|
||||
generation_config["thinkingConfig"] = normalized_thinking
|
||||
if generation_config:
|
||||
request["generationConfig"] = generation_config
|
||||
|
||||
return request
|
||||
|
||||
|
||||
def _map_gemini_finish_reason(reason: str) -> str:
|
||||
mapping = {
|
||||
"STOP": "stop",
|
||||
"MAX_TOKENS": "length",
|
||||
"SAFETY": "content_filter",
|
||||
"RECITATION": "content_filter",
|
||||
"OTHER": "stop",
|
||||
}
|
||||
return mapping.get(str(reason or "").upper(), "stop")
|
||||
|
||||
|
||||
def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
sig = part.get("thoughtSignature")
|
||||
if isinstance(sig, str) and sig:
|
||||
return {"google": {"thought_signature": sig}}
|
||||
return None
|
||||
|
||||
|
||||
def _empty_response(model: str) -> SimpleNamespace:
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content="",
|
||||
tool_calls=None,
|
||||
reasoning=None,
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_tokens=0,
|
||||
prompt_tokens_details=SimpleNamespace(cached_tokens=0),
|
||||
)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace:
|
||||
candidates = resp.get("candidates") or []
|
||||
if not isinstance(candidates, list) or not candidates:
|
||||
return _empty_response(model)
|
||||
|
||||
cand = candidates[0] if isinstance(candidates[0], dict) else {}
|
||||
content_obj = cand.get("content") if isinstance(cand, dict) else {}
|
||||
parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
|
||||
|
||||
text_pieces: List[str] = []
|
||||
reasoning_pieces: List[str] = []
|
||||
tool_calls: List[SimpleNamespace] = []
|
||||
|
||||
for index, part in enumerate(parts or []):
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("thought") is True and isinstance(part.get("text"), str):
|
||||
reasoning_pieces.append(part["text"])
|
||||
continue
|
||||
if isinstance(part.get("text"), str):
|
||||
text_pieces.append(part["text"])
|
||||
continue
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
tool_call = SimpleNamespace(
|
||||
id=f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
index=index,
|
||||
function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
|
||||
)
|
||||
extra_content = _tool_call_extra_from_part(part)
|
||||
if extra_content:
|
||||
tool_call.extra_content = extra_content
|
||||
tool_calls.append(tool_call)
|
||||
|
||||
finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or ""))
|
||||
usage_meta = resp.get("usageMetadata") or {}
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
|
||||
completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
|
||||
total_tokens=int(usage_meta.get("totalTokenCount") or 0),
|
||||
prompt_tokens_details=SimpleNamespace(
|
||||
cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
|
||||
),
|
||||
)
|
||||
reasoning = "".join(reasoning_pieces) or None
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content="".join(text_pieces) if text_pieces else None,
|
||||
tool_calls=tool_calls or None,
|
||||
reasoning=reasoning,
|
||||
reasoning_content=reasoning,
|
||||
reasoning_details=None,
|
||||
)
|
||||
choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason)
|
||||
return SimpleNamespace(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
class _GeminiStreamChunk(SimpleNamespace):
|
||||
pass
|
||||
|
||||
|
||||
def _make_stream_chunk(
|
||||
*,
|
||||
model: str,
|
||||
content: str = "",
|
||||
tool_call_delta: Optional[Dict[str, Any]] = None,
|
||||
finish_reason: Optional[str] = None,
|
||||
reasoning: str = "",
|
||||
) -> _GeminiStreamChunk:
|
||||
delta_kwargs: Dict[str, Any] = {
|
||||
"role": "assistant",
|
||||
"content": None,
|
||||
"tool_calls": None,
|
||||
"reasoning": None,
|
||||
"reasoning_content": None,
|
||||
}
|
||||
if content:
|
||||
delta_kwargs["content"] = content
|
||||
if tool_call_delta is not None:
|
||||
tool_delta = SimpleNamespace(
|
||||
index=tool_call_delta.get("index", 0),
|
||||
id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=tool_call_delta.get("name") or "",
|
||||
arguments=tool_call_delta.get("arguments") or "",
|
||||
),
|
||||
)
|
||||
extra_content = tool_call_delta.get("extra_content")
|
||||
if isinstance(extra_content, dict):
|
||||
tool_delta.extra_content = extra_content
|
||||
delta_kwargs["tool_calls"] = [tool_delta]
|
||||
if reasoning:
|
||||
delta_kwargs["reasoning"] = reasoning
|
||||
delta_kwargs["reasoning_content"] = reasoning
|
||||
delta = SimpleNamespace(**delta_kwargs)
|
||||
choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
|
||||
return _GeminiStreamChunk(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
|
||||
object="chat.completion.chunk",
|
||||
created=int(time.time()),
|
||||
model=model,
|
||||
choices=[choice],
|
||||
usage=None,
|
||||
)
|
||||
|
||||
|
||||
def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
|
||||
buffer = ""
|
||||
for chunk in response.iter_text():
|
||||
if not chunk:
|
||||
continue
|
||||
buffer += chunk
|
||||
while "\n" in buffer:
|
||||
line, buffer = buffer.split("\n", 1)
|
||||
line = line.rstrip("\r")
|
||||
if not line:
|
||||
continue
|
||||
if not line.startswith("data: "):
|
||||
continue
|
||||
data = line[6:]
|
||||
if data == "[DONE]":
|
||||
return
|
||||
try:
|
||||
payload = json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Non-JSON Gemini SSE line: %s", data[:200])
|
||||
continue
|
||||
if isinstance(payload, dict):
|
||||
yield payload
|
||||
|
||||
|
||||
def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]:
|
||||
candidates = event.get("candidates") or []
|
||||
if not candidates:
|
||||
return []
|
||||
cand = candidates[0] if isinstance(candidates[0], dict) else {}
|
||||
parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else []
|
||||
chunks: List[_GeminiStreamChunk] = []
|
||||
|
||||
for part_index, part in enumerate(parts):
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("thought") is True and isinstance(part.get("text"), str):
|
||||
chunks.append(_make_stream_chunk(model=model, reasoning=part["text"]))
|
||||
continue
|
||||
if isinstance(part.get("text"), str) and part["text"]:
|
||||
chunks.append(_make_stream_chunk(model=model, content=part["text"]))
|
||||
fc = part.get("functionCall")
|
||||
if isinstance(fc, dict) and fc.get("name"):
|
||||
name = str(fc["name"])
|
||||
try:
|
||||
args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True)
|
||||
except (TypeError, ValueError):
|
||||
args_str = "{}"
|
||||
thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else ""
|
||||
call_key = json.dumps(
|
||||
{
|
||||
"part_index": part_index,
|
||||
"name": name,
|
||||
"thought_signature": thought_signature,
|
||||
},
|
||||
sort_keys=True,
|
||||
)
|
||||
slot = tool_call_indices.get(call_key)
|
||||
if slot is None:
|
||||
slot = {
|
||||
"index": len(tool_call_indices),
|
||||
"id": f"call_{uuid.uuid4().hex[:12]}",
|
||||
"last_arguments": "",
|
||||
}
|
||||
tool_call_indices[call_key] = slot
|
||||
emitted_arguments = args_str
|
||||
last_arguments = str(slot.get("last_arguments") or "")
|
||||
if last_arguments:
|
||||
if args_str == last_arguments:
|
||||
emitted_arguments = ""
|
||||
elif args_str.startswith(last_arguments):
|
||||
emitted_arguments = args_str[len(last_arguments):]
|
||||
slot["last_arguments"] = args_str
|
||||
chunks.append(
|
||||
_make_stream_chunk(
|
||||
model=model,
|
||||
tool_call_delta={
|
||||
"index": slot["index"],
|
||||
"id": slot["id"],
|
||||
"name": name,
|
||||
"arguments": emitted_arguments,
|
||||
"extra_content": _tool_call_extra_from_part(part),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
finish_reason_raw = str(cand.get("finishReason") or "")
|
||||
if finish_reason_raw:
|
||||
mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
|
||||
chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
|
||||
return chunks
|
||||
|
||||
|
||||
def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
|
||||
status = response.status_code
|
||||
body_text = ""
|
||||
body_json: Dict[str, Any] = {}
|
||||
try:
|
||||
body_text = response.text
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if body_text:
|
||||
try:
|
||||
parsed = json.loads(body_text)
|
||||
if isinstance(parsed, dict):
|
||||
body_json = parsed
|
||||
except (ValueError, TypeError):
|
||||
body_json = {}
|
||||
|
||||
err_obj = body_json.get("error") if isinstance(body_json, dict) else None
|
||||
if not isinstance(err_obj, dict):
|
||||
err_obj = {}
|
||||
err_status = str(err_obj.get("status") or "").strip()
|
||||
err_message = str(err_obj.get("message") or "").strip()
|
||||
_raw_details = err_obj.get("details")
|
||||
details_list = _raw_details if isinstance(_raw_details, list) else []
|
||||
|
||||
reason = ""
|
||||
retry_after: Optional[float] = None
|
||||
metadata: Dict[str, Any] = {}
|
||||
for detail in details_list:
|
||||
if not isinstance(detail, dict):
|
||||
continue
|
||||
type_url = str(detail.get("@type") or "")
|
||||
if not reason and type_url.endswith("/google.rpc.ErrorInfo"):
|
||||
reason_value = detail.get("reason")
|
||||
if isinstance(reason_value, str):
|
||||
reason = reason_value
|
||||
md = detail.get("metadata")
|
||||
if isinstance(md, dict):
|
||||
metadata = md
|
||||
header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after")
|
||||
if header_retry:
|
||||
try:
|
||||
retry_after = float(header_retry)
|
||||
except (TypeError, ValueError):
|
||||
retry_after = None
|
||||
|
||||
code = f"gemini_http_{status}"
|
||||
if status == 401:
|
||||
code = "gemini_unauthorized"
|
||||
elif status == 429:
|
||||
code = "gemini_rate_limited"
|
||||
elif status == 404:
|
||||
code = "gemini_model_not_found"
|
||||
|
||||
if err_message:
|
||||
message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}"
|
||||
else:
|
||||
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
return GeminiAPIError(
|
||||
message,
|
||||
code=code,
|
||||
status_code=status,
|
||||
response=response,
|
||||
retry_after=retry_after,
|
||||
details={
|
||||
"status": err_status,
|
||||
"reason": reason,
|
||||
"metadata": metadata,
|
||||
"message": err_message,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class _GeminiChatCompletions:
|
||||
def __init__(self, client: "GeminiNativeClient"):
|
||||
self._client = client
|
||||
|
||||
def create(self, **kwargs: Any) -> Any:
|
||||
return self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _AsyncGeminiChatCompletions:
|
||||
def __init__(self, client: "AsyncGeminiNativeClient"):
|
||||
self._client = client
|
||||
|
||||
async def create(self, **kwargs: Any) -> Any:
|
||||
return await self._client._create_chat_completion(**kwargs)
|
||||
|
||||
|
||||
class _GeminiChatNamespace:
|
||||
def __init__(self, client: "GeminiNativeClient"):
|
||||
self.completions = _GeminiChatCompletions(client)
|
||||
|
||||
|
||||
class _AsyncGeminiChatNamespace:
|
||||
def __init__(self, client: "AsyncGeminiNativeClient"):
|
||||
self.completions = _AsyncGeminiChatCompletions(client)
|
||||
|
||||
|
||||
class GeminiNativeClient:
|
||||
"""Minimal OpenAI-SDK-compatible facade over Gemini's native REST API."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
base_url: Optional[str] = None,
|
||||
default_headers: Optional[Dict[str, str]] = None,
|
||||
timeout: Any = None,
|
||||
http_client: Optional[httpx.Client] = None,
|
||||
**_: Any,
|
||||
) -> None:
|
||||
self.api_key = api_key
|
||||
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
|
||||
if normalized_base.endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
self.base_url = normalized_base
|
||||
self._default_headers = dict(default_headers or {})
|
||||
self.chat = _GeminiChatNamespace(self)
|
||||
self.is_closed = False
|
||||
self._http = http_client or httpx.Client(
|
||||
timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0)
|
||||
)
|
||||
|
||||
def close(self) -> None:
|
||||
self.is_closed = True
|
||||
try:
|
||||
self._http.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"x-goog-api-key": self.api_key,
|
||||
"User-Agent": "hermes-agent (gemini-native)",
|
||||
}
|
||||
headers.update(self._default_headers)
|
||||
return headers
|
||||
|
||||
@staticmethod
|
||||
def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]:
|
||||
try:
|
||||
return False, next(iterator)
|
||||
except StopIteration:
|
||||
return True, None
|
||||
|
||||
def _create_chat_completion(
|
||||
self,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
messages: Optional[List[Dict[str, Any]]] = None,
|
||||
stream: bool = False,
|
||||
tools: Any = None,
|
||||
tool_choice: Any = None,
|
||||
temperature: Optional[float] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
stop: Any = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Any = None,
|
||||
**_: Any,
|
||||
) -> Any:
|
||||
thinking_config = None
|
||||
if isinstance(extra_body, dict):
|
||||
thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
|
||||
|
||||
request = build_gemini_request(
|
||||
messages=messages or [],
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
stop=stop,
|
||||
thinking_config=thinking_config,
|
||||
)
|
||||
|
||||
if stream:
|
||||
return self._stream_completion(model=model, request=request, timeout=timeout)
|
||||
|
||||
url = f"{self.base_url}/models/{model}:generateContent"
|
||||
response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout)
|
||||
if response.status_code != 200:
|
||||
raise gemini_http_error(response)
|
||||
try:
|
||||
payload = response.json()
|
||||
except ValueError as exc:
|
||||
raise GeminiAPIError(
|
||||
f"Invalid JSON from Gemini native API: {exc}",
|
||||
code="gemini_invalid_json",
|
||||
status_code=response.status_code,
|
||||
response=response,
|
||||
) from exc
|
||||
return translate_gemini_response(payload, model=model)
|
||||
|
||||
def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]:
|
||||
url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse"
|
||||
stream_headers = dict(self._headers())
|
||||
stream_headers["Accept"] = "text/event-stream"
|
||||
|
||||
def _generator() -> Iterator[_GeminiStreamChunk]:
|
||||
try:
|
||||
with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response:
|
||||
if response.status_code != 200:
|
||||
response.read()
|
||||
raise gemini_http_error(response)
|
||||
tool_call_indices: Dict[str, Dict[str, Any]] = {}
|
||||
for event in _iter_sse_events(response):
|
||||
for chunk in translate_stream_event(event, model, tool_call_indices):
|
||||
yield chunk
|
||||
except httpx.HTTPError as exc:
|
||||
raise GeminiAPIError(
|
||||
f"Gemini streaming request failed: {exc}",
|
||||
code="gemini_stream_error",
|
||||
) from exc
|
||||
|
||||
return _generator()
|
||||
|
||||
|
||||
class AsyncGeminiNativeClient:
|
||||
"""Async wrapper used by auxiliary_client for native Gemini calls."""
|
||||
|
||||
def __init__(self, sync_client: GeminiNativeClient):
|
||||
self._sync = sync_client
|
||||
self.api_key = sync_client.api_key
|
||||
self.base_url = sync_client.base_url
|
||||
self.chat = _AsyncGeminiChatNamespace(self)
|
||||
|
||||
async def _create_chat_completion(self, **kwargs: Any) -> Any:
|
||||
stream = bool(kwargs.get("stream"))
|
||||
result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs)
|
||||
if not stream:
|
||||
return result
|
||||
|
||||
async def _async_stream() -> Any:
|
||||
while True:
|
||||
done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result)
|
||||
if done:
|
||||
break
|
||||
yield chunk
|
||||
|
||||
return _async_stream()
|
||||
|
||||
async def close(self) -> None:
|
||||
await asyncio.to_thread(self._sync.close)
|
||||
@@ -1,85 +0,0 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
|
||||
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
|
||||
# outside that subset before sending Hermes tool schemas to Google.
|
||||
_GEMINI_SCHEMA_ALLOWED_KEYS = {
|
||||
"type",
|
||||
"format",
|
||||
"title",
|
||||
"description",
|
||||
"nullable",
|
||||
"enum",
|
||||
"maxItems",
|
||||
"minItems",
|
||||
"properties",
|
||||
"required",
|
||||
"minProperties",
|
||||
"maxProperties",
|
||||
"minLength",
|
||||
"maxLength",
|
||||
"pattern",
|
||||
"example",
|
||||
"anyOf",
|
||||
"propertyOrdering",
|
||||
"default",
|
||||
"items",
|
||||
"minimum",
|
||||
"maximum",
|
||||
}
|
||||
|
||||
|
||||
def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
|
||||
"""Return a Gemini-compatible copy of a tool parameter schema.
|
||||
|
||||
Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys
|
||||
such as ``$schema`` or ``additionalProperties`` that Google's Gemini
|
||||
``Schema`` object rejects. This helper preserves the documented Gemini
|
||||
subset and recursively sanitizes nested ``properties`` / ``items`` /
|
||||
``anyOf`` definitions.
|
||||
"""
|
||||
|
||||
if not isinstance(schema, dict):
|
||||
return {}
|
||||
|
||||
cleaned: Dict[str, Any] = {}
|
||||
for key, value in schema.items():
|
||||
if key not in _GEMINI_SCHEMA_ALLOWED_KEYS:
|
||||
continue
|
||||
if key == "properties":
|
||||
if not isinstance(value, dict):
|
||||
continue
|
||||
props: Dict[str, Any] = {}
|
||||
for prop_name, prop_schema in value.items():
|
||||
if not isinstance(prop_name, str):
|
||||
continue
|
||||
props[prop_name] = sanitize_gemini_schema(prop_schema)
|
||||
cleaned[key] = props
|
||||
continue
|
||||
if key == "items":
|
||||
cleaned[key] = sanitize_gemini_schema(value)
|
||||
continue
|
||||
if key == "anyOf":
|
||||
if not isinstance(value, list):
|
||||
continue
|
||||
cleaned[key] = [
|
||||
sanitize_gemini_schema(item)
|
||||
for item in value
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
continue
|
||||
cleaned[key] = value
|
||||
return cleaned
|
||||
|
||||
|
||||
def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a valid Gemini object schema."""
|
||||
|
||||
cleaned = sanitize_gemini_schema(parameters)
|
||||
if not cleaned:
|
||||
return {"type": "object", "properties": {}}
|
||||
return cleaned
|
||||
@@ -1,242 +0,0 @@
|
||||
"""
|
||||
Image Generation Provider ABC
|
||||
=============================
|
||||
|
||||
Defines the pluggable-backend interface for image generation. Providers register
|
||||
instances via ``PluginContext.register_image_gen_provider()``; the active one
|
||||
(selected via ``image_gen.provider`` in ``config.yaml``) services every
|
||||
``image_generate`` tool call.
|
||||
|
||||
Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
|
||||
as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
|
||||
via ``plugins.enabled``).
|
||||
|
||||
Response shape
|
||||
--------------
|
||||
All providers return a dict that :func:`success_response` / :func:`error_response`
|
||||
produce. The tool wrapper JSON-serializes it. Keys:
|
||||
|
||||
success bool
|
||||
image str | None URL or absolute file path
|
||||
model str provider-specific model identifier
|
||||
prompt str echoed prompt
|
||||
aspect_ratio str "landscape" | "square" | "portrait"
|
||||
provider str provider name (for diagnostics)
|
||||
error str only when success=False
|
||||
error_type str only when success=False
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import base64
|
||||
import datetime
|
||||
import logging
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
|
||||
DEFAULT_ASPECT_RATIO = "landscape"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ABC
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ImageGenProvider(abc.ABC):
|
||||
"""Abstract base class for an image generation backend.
|
||||
|
||||
Subclasses must implement :meth:`generate`. Everything else has sane
|
||||
defaults — override only what your provider needs.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Stable short identifier used in ``image_gen.provider`` config.
|
||||
|
||||
Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
|
||||
"""
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
|
||||
return self.name.title()
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Return True when this provider can service calls.
|
||||
|
||||
Typically checks for a required API key. Default: True
|
||||
(providers with no external dependencies are always available).
|
||||
"""
|
||||
return True
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
"""Return catalog entries for ``hermes tools`` model picker.
|
||||
|
||||
Each entry::
|
||||
|
||||
{
|
||||
"id": "gpt-image-1.5", # required
|
||||
"display": "GPT Image 1.5", # optional; defaults to id
|
||||
"speed": "~10s", # optional
|
||||
"strengths": "...", # optional
|
||||
"price": "$...", # optional
|
||||
}
|
||||
|
||||
Default: empty list (provider has no user-selectable models).
|
||||
"""
|
||||
return []
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
"""Return provider metadata for the ``hermes tools`` picker.
|
||||
|
||||
Used by ``tools_config.py`` to inject this provider as a row in
|
||||
the Image Generation provider list. Shape::
|
||||
|
||||
{
|
||||
"name": "OpenAI", # picker label
|
||||
"badge": "paid", # optional short tag
|
||||
"tag": "One-line description...", # optional subtitle
|
||||
"env_vars": [ # keys to prompt for
|
||||
{"key": "OPENAI_API_KEY",
|
||||
"prompt": "OpenAI API key",
|
||||
"url": "https://platform.openai.com/api-keys"},
|
||||
],
|
||||
}
|
||||
|
||||
Default: minimal entry derived from ``display_name``. Override to
|
||||
expose API key prompts and custom badges.
|
||||
"""
|
||||
return {
|
||||
"name": self.display_name,
|
||||
"badge": "",
|
||||
"tag": "",
|
||||
"env_vars": [],
|
||||
}
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
"""Return the default model id, or None if not applicable."""
|
||||
models = self.list_models()
|
||||
if models:
|
||||
return models[0].get("id")
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image.
|
||||
|
||||
Implementations should return the dict from :func:`success_response`
|
||||
or :func:`error_response`. ``kwargs`` may contain forward-compat
|
||||
parameters future versions of the schema will expose — implementations
|
||||
should ignore unknown keys.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def resolve_aspect_ratio(value: Optional[str]) -> str:
|
||||
"""Clamp an aspect_ratio value to the valid set, defaulting to landscape.
|
||||
|
||||
Invalid values are coerced rather than rejected so the tool surface is
|
||||
forgiving of agent mistakes.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
v = value.strip().lower()
|
||||
if v in VALID_ASPECT_RATIOS:
|
||||
return v
|
||||
return DEFAULT_ASPECT_RATIO
|
||||
|
||||
|
||||
def _images_cache_dir() -> Path:
|
||||
"""Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
path = get_hermes_home() / "cache" / "images"
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def save_b64_image(
|
||||
b64_data: str,
|
||||
*,
|
||||
prefix: str = "image",
|
||||
extension: str = "png",
|
||||
) -> Path:
|
||||
"""Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
|
||||
|
||||
Returns the absolute :class:`Path` to the saved file.
|
||||
|
||||
Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
|
||||
"""
|
||||
raw = base64.b64decode(b64_data)
|
||||
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
short = uuid.uuid4().hex[:8]
|
||||
path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
|
||||
path.write_bytes(raw)
|
||||
return path
|
||||
|
||||
|
||||
def success_response(
|
||||
*,
|
||||
image: str,
|
||||
model: str,
|
||||
prompt: str,
|
||||
aspect_ratio: str,
|
||||
provider: str,
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform success response dict.
|
||||
|
||||
``image`` may be an HTTP URL or an absolute filesystem path (for b64
|
||||
providers like OpenAI). Callers that need to pass through additional
|
||||
backend-specific fields can supply ``extra``.
|
||||
"""
|
||||
payload: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"image": image,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
if extra:
|
||||
for k, v in extra.items():
|
||||
payload.setdefault(k, v)
|
||||
return payload
|
||||
|
||||
|
||||
def error_response(
|
||||
*,
|
||||
error: str,
|
||||
error_type: str = "provider_error",
|
||||
provider: str = "",
|
||||
model: str = "",
|
||||
prompt: str = "",
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build a uniform error response dict."""
|
||||
return {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": error,
|
||||
"error_type": error_type,
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"provider": provider,
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
"""
|
||||
Image Generation Provider Registry
|
||||
==================================
|
||||
|
||||
Central map of registered providers. Populated by plugins at import-time via
|
||||
``PluginContext.register_image_gen_provider()``; consumed by the
|
||||
``image_generate`` tool to dispatch each call to the active backend.
|
||||
|
||||
Active selection
|
||||
----------------
|
||||
The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
|
||||
If unset, :func:`get_active_provider` applies fallback logic:
|
||||
|
||||
1. If exactly one provider is registered, use it.
|
||||
2. Otherwise if a provider named ``fal`` is registered, use it (legacy
|
||||
default — matches pre-plugin behavior).
|
||||
3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
|
||||
the user at ``hermes tools``).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from agent.image_gen_provider import ImageGenProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_providers: Dict[str, ImageGenProvider] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
|
||||
def register_provider(provider: ImageGenProvider) -> None:
|
||||
"""Register an image generation provider.
|
||||
|
||||
Re-registration (same ``name``) overwrites the previous entry and logs
|
||||
a debug message — this makes hot-reload scenarios (tests, dev loops)
|
||||
behave predictably.
|
||||
"""
|
||||
if not isinstance(provider, ImageGenProvider):
|
||||
raise TypeError(
|
||||
f"register_provider() expects an ImageGenProvider instance, "
|
||||
f"got {type(provider).__name__}"
|
||||
)
|
||||
name = provider.name
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
raise ValueError("Image gen provider .name must be a non-empty string")
|
||||
with _lock:
|
||||
existing = _providers.get(name)
|
||||
_providers[name] = provider
|
||||
if existing is not None:
|
||||
logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
|
||||
else:
|
||||
logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
|
||||
|
||||
|
||||
def list_providers() -> List[ImageGenProvider]:
|
||||
"""Return all registered providers, sorted by name."""
|
||||
with _lock:
|
||||
items = list(_providers.values())
|
||||
return sorted(items, key=lambda p: p.name)
|
||||
|
||||
|
||||
def get_provider(name: str) -> Optional[ImageGenProvider]:
|
||||
"""Return the provider registered under *name*, or None."""
|
||||
if not isinstance(name, str):
|
||||
return None
|
||||
with _lock:
|
||||
return _providers.get(name.strip())
|
||||
|
||||
|
||||
def get_active_provider() -> Optional[ImageGenProvider]:
|
||||
"""Resolve the currently-active provider.
|
||||
|
||||
Reads ``image_gen.provider`` from config.yaml; falls back per the
|
||||
module docstring.
|
||||
"""
|
||||
configured: Optional[str] = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
section = cfg.get("image_gen") if isinstance(cfg, dict) else None
|
||||
if isinstance(section, dict):
|
||||
raw = section.get("provider")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
configured = raw.strip()
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read image_gen.provider from config: %s", exc)
|
||||
|
||||
with _lock:
|
||||
snapshot = dict(_providers)
|
||||
|
||||
if configured:
|
||||
provider = snapshot.get(configured)
|
||||
if provider is not None:
|
||||
return provider
|
||||
logger.debug(
|
||||
"image_gen.provider='%s' configured but not registered; falling back",
|
||||
configured,
|
||||
)
|
||||
|
||||
# Fallback: single-provider case
|
||||
if len(snapshot) == 1:
|
||||
return next(iter(snapshot.values()))
|
||||
|
||||
# Fallback: prefer legacy FAL for backward compat
|
||||
if "fal" in snapshot:
|
||||
return snapshot["fal"]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _reset_for_tests() -> None:
|
||||
"""Clear the registry. **Test-only.**"""
|
||||
with _lock:
|
||||
_providers.clear()
|
||||
@@ -124,7 +124,6 @@ class InsightsEngine:
|
||||
# Gather raw data
|
||||
sessions = self._get_sessions(cutoff, source)
|
||||
tool_usage = self._get_tool_usage(cutoff, source)
|
||||
skill_usage = self._get_skill_usage(cutoff, source)
|
||||
message_stats = self._get_message_stats(cutoff, source)
|
||||
|
||||
if not sessions:
|
||||
@@ -136,15 +135,6 @@ class InsightsEngine:
|
||||
"models": [],
|
||||
"platforms": [],
|
||||
"tools": [],
|
||||
"skills": {
|
||||
"summary": {
|
||||
"total_skill_loads": 0,
|
||||
"total_skill_edits": 0,
|
||||
"total_skill_actions": 0,
|
||||
"distinct_skills_used": 0,
|
||||
},
|
||||
"top_skills": [],
|
||||
},
|
||||
"activity": {},
|
||||
"top_sessions": [],
|
||||
}
|
||||
@@ -154,7 +144,6 @@ class InsightsEngine:
|
||||
models = self._compute_model_breakdown(sessions)
|
||||
platforms = self._compute_platform_breakdown(sessions)
|
||||
tools = self._compute_tool_breakdown(tool_usage)
|
||||
skills = self._compute_skill_breakdown(skill_usage)
|
||||
activity = self._compute_activity_patterns(sessions)
|
||||
top_sessions = self._compute_top_sessions(sessions)
|
||||
|
||||
@@ -167,7 +156,6 @@ class InsightsEngine:
|
||||
"models": models,
|
||||
"platforms": platforms,
|
||||
"tools": tools,
|
||||
"skills": skills,
|
||||
"activity": activity,
|
||||
"top_sessions": top_sessions,
|
||||
}
|
||||
@@ -296,82 +284,6 @@ class InsightsEngine:
|
||||
for name, count in tool_counts.most_common()
|
||||
]
|
||||
|
||||
def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
|
||||
"""Extract per-skill usage from assistant tool calls."""
|
||||
skill_counts: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT m.tool_calls, m.timestamp
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ? AND s.source = ?
|
||||
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
|
||||
(cutoff, source),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT m.tool_calls, m.timestamp
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE s.started_at >= ?
|
||||
AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
|
||||
(cutoff,),
|
||||
)
|
||||
|
||||
for row in cursor.fetchall():
|
||||
try:
|
||||
calls = row["tool_calls"]
|
||||
if isinstance(calls, str):
|
||||
calls = json.loads(calls)
|
||||
if not isinstance(calls, list):
|
||||
continue
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
|
||||
timestamp = row["timestamp"]
|
||||
for call in calls:
|
||||
if not isinstance(call, dict):
|
||||
continue
|
||||
func = call.get("function", {})
|
||||
tool_name = func.get("name")
|
||||
if tool_name not in {"skill_view", "skill_manage"}:
|
||||
continue
|
||||
|
||||
args = func.get("arguments")
|
||||
if isinstance(args, str):
|
||||
try:
|
||||
args = json.loads(args)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
if not isinstance(args, dict):
|
||||
continue
|
||||
|
||||
skill_name = args.get("name")
|
||||
if not isinstance(skill_name, str) or not skill_name.strip():
|
||||
continue
|
||||
|
||||
entry = skill_counts.setdefault(
|
||||
skill_name,
|
||||
{
|
||||
"skill": skill_name,
|
||||
"view_count": 0,
|
||||
"manage_count": 0,
|
||||
"last_used_at": None,
|
||||
},
|
||||
)
|
||||
if tool_name == "skill_view":
|
||||
entry["view_count"] += 1
|
||||
else:
|
||||
entry["manage_count"] += 1
|
||||
|
||||
if timestamp is not None and (
|
||||
entry["last_used_at"] is None or timestamp > entry["last_used_at"]
|
||||
):
|
||||
entry["last_used_at"] = timestamp
|
||||
|
||||
return list(skill_counts.values())
|
||||
|
||||
def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
|
||||
"""Get aggregate message statistics."""
|
||||
if source:
|
||||
@@ -563,46 +475,6 @@ class InsightsEngine:
|
||||
})
|
||||
return result
|
||||
|
||||
def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
|
||||
"""Process per-skill usage into summary + ranked list."""
|
||||
total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
|
||||
total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
|
||||
total_skill_actions = total_skill_loads + total_skill_edits
|
||||
|
||||
top_skills = []
|
||||
for skill in skill_usage:
|
||||
total_count = skill["view_count"] + skill["manage_count"]
|
||||
percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
|
||||
top_skills.append({
|
||||
"skill": skill["skill"],
|
||||
"view_count": skill["view_count"],
|
||||
"manage_count": skill["manage_count"],
|
||||
"total_count": total_count,
|
||||
"percentage": percentage,
|
||||
"last_used_at": skill.get("last_used_at"),
|
||||
})
|
||||
|
||||
top_skills.sort(
|
||||
key=lambda s: (
|
||||
s["total_count"],
|
||||
s["view_count"],
|
||||
s["manage_count"],
|
||||
s["last_used_at"] or 0,
|
||||
s["skill"],
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
return {
|
||||
"summary": {
|
||||
"total_skill_loads": total_skill_loads,
|
||||
"total_skill_edits": total_skill_edits,
|
||||
"total_skill_actions": total_skill_actions,
|
||||
"distinct_skills_used": len(skill_usage),
|
||||
},
|
||||
"top_skills": top_skills,
|
||||
}
|
||||
|
||||
def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
|
||||
"""Analyze activity patterns by day of week and hour."""
|
||||
day_counts = Counter() # 0=Monday ... 6=Sunday
|
||||
@@ -798,28 +670,6 @@ class InsightsEngine:
|
||||
lines.append(f" ... and {len(report['tools']) - 15} more tools")
|
||||
lines.append("")
|
||||
|
||||
# Skill usage
|
||||
skills = report.get("skills", {})
|
||||
top_skills = skills.get("top_skills", [])
|
||||
if top_skills:
|
||||
lines.append(" 🧠 Top Skills")
|
||||
lines.append(" " + "─" * 56)
|
||||
lines.append(f" {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
|
||||
for skill in top_skills[:10]:
|
||||
last_used = "—"
|
||||
if skill.get("last_used_at"):
|
||||
last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
|
||||
lines.append(
|
||||
f" {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
|
||||
)
|
||||
summary = skills.get("summary", {})
|
||||
lines.append(
|
||||
f" Distinct skills: {summary.get('distinct_skills_used', 0)} "
|
||||
f"Loads: {summary.get('total_skill_loads', 0):,} "
|
||||
f"Edits: {summary.get('total_skill_edits', 0):,}"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
# Activity patterns
|
||||
act = report.get("activity", {})
|
||||
if act.get("by_day"):
|
||||
@@ -903,18 +753,6 @@ class InsightsEngine:
|
||||
lines.append(f" {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
|
||||
lines.append("")
|
||||
|
||||
skills = report.get("skills", {})
|
||||
if skills.get("top_skills"):
|
||||
lines.append("**🧠 Top Skills:**")
|
||||
for skill in skills["top_skills"][:5]:
|
||||
suffix = ""
|
||||
if skill.get("last_used_at"):
|
||||
suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
|
||||
lines.append(
|
||||
f" {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
# Activity summary
|
||||
act = report.get("activity", {})
|
||||
if act.get("busiest_day") and act.get("busiest_hour"):
|
||||
|
||||
+20
-121
@@ -4,7 +4,6 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
|
||||
and run_agent.py for pre-flight context checks.
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
@@ -15,8 +14,6 @@ from urllib.parse import urlparse
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
from utils import base_url_host_matches, base_url_hostname
|
||||
|
||||
from hermes_constants import OPENROUTER_MODELS_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -26,7 +23,7 @@ logger = logging.getLogger(__name__)
|
||||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
@@ -37,7 +34,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
|
||||
"ollama",
|
||||
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"arcee-ai", "arceeai",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
@@ -52,13 +49,6 @@ _OLLAMA_TAG_PATTERN = re.compile(
|
||||
)
|
||||
|
||||
|
||||
# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
|
||||
# block, so without an explicit check Ollama reached over Tailscale (e.g.
|
||||
# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
|
||||
# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
|
||||
_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
|
||||
|
||||
|
||||
def _strip_provider_prefix(model: str) -> str:
|
||||
"""Strip a recognised provider prefix from a model string.
|
||||
|
||||
@@ -126,6 +116,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
"gpt-5.3-codex-spark": 128000, # Spark variant has reduced 128k context
|
||||
"gpt-5.1-chat": 128000, # Chat variant has 128k context
|
||||
"gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k)
|
||||
"gpt-4.1": 1047576,
|
||||
@@ -133,8 +124,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
# Google
|
||||
"gemini": 1048576,
|
||||
# Gemma (open models served via AI Studio)
|
||||
"gemma-4": 256000, # Gemma 4 family
|
||||
"gemma4": 256000, # Ollama-style naming (e.g. gemma4:31b-cloud)
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
@@ -180,15 +169,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"Qwen/Qwen3.5-35B-A3B": 131072,
|
||||
"deepseek-ai/DeepSeek-V3.2": 65536,
|
||||
"moonshotai/Kimi-K2.5": 262144,
|
||||
"moonshotai/Kimi-K2.6": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 256000,
|
||||
"mimo-v2-pro": 1000000,
|
||||
"mimo-v2-omni": 256000,
|
||||
"mimo-v2-flash": 256000,
|
||||
"mimo-v2.5-pro": 1000000,
|
||||
"mimo-v2.5": 1000000,
|
||||
"zai-org/GLM-5": 202752,
|
||||
}
|
||||
|
||||
@@ -203,7 +189,6 @@ _CONTEXT_LENGTH_KEYS = (
|
||||
"max_seq_len",
|
||||
"n_ctx_train",
|
||||
"n_ctx",
|
||||
"ctx_size",
|
||||
)
|
||||
|
||||
_MAX_COMPLETION_KEYS = (
|
||||
@@ -226,15 +211,8 @@ def _normalize_base_url(base_url: str) -> str:
|
||||
return (base_url or "").strip().rstrip("/")
|
||||
|
||||
|
||||
def _auth_headers(api_key: str = "") -> Dict[str, str]:
|
||||
token = str(api_key or "").strip()
|
||||
if not token:
|
||||
return {}
|
||||
return {"Authorization": f"Bearer {token}"}
|
||||
|
||||
|
||||
def _is_openrouter_base_url(base_url: str) -> bool:
|
||||
return base_url_host_matches(base_url, "openrouter.ai")
|
||||
return "openrouter.ai" in _normalize_base_url(base_url).lower()
|
||||
|
||||
|
||||
def _is_custom_endpoint(base_url: str) -> bool:
|
||||
@@ -247,12 +225,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"chatgpt.com": "openai",
|
||||
"api.anthropic.com": "anthropic",
|
||||
"api.z.ai": "zai",
|
||||
"open.bigmodel.cn": "zai",
|
||||
"api.moonshot.ai": "kimi-coding",
|
||||
"api.moonshot.cn": "kimi-coding-cn",
|
||||
"api.kimi.com": "kimi-coding",
|
||||
"api.stepfun.ai": "stepfun",
|
||||
"api.stepfun.com": "stepfun",
|
||||
"api.arcee.ai": "arcee",
|
||||
"api.minimax": "minimax",
|
||||
"dashscope.aliyuncs.com": "alibaba",
|
||||
@@ -297,15 +272,7 @@ def _is_known_provider_base_url(base_url: str) -> bool:
|
||||
|
||||
|
||||
def is_local_endpoint(base_url: str) -> bool:
|
||||
"""Return True if base_url points to a local machine.
|
||||
|
||||
Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
|
||||
container-internal DNS names (``host.docker.internal`` et al.),
|
||||
RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
|
||||
link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
|
||||
is included so remote-but-trusted Ollama boxes reached over a
|
||||
Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
|
||||
"""
|
||||
"""Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
|
||||
normalized = _normalize_base_url(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
@@ -320,17 +287,14 @@ def is_local_endpoint(base_url: str) -> bool:
|
||||
# Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
|
||||
if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
|
||||
return True
|
||||
# RFC-1918 private ranges, link-local, and Tailscale CGNAT
|
||||
# RFC-1918 private ranges and link-local
|
||||
import ipaddress
|
||||
try:
|
||||
addr = ipaddress.ip_address(host)
|
||||
if addr.is_private or addr.is_loopback or addr.is_link_local:
|
||||
return True
|
||||
if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
|
||||
return True
|
||||
return addr.is_private or addr.is_loopback or addr.is_link_local
|
||||
except ValueError:
|
||||
pass
|
||||
# Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
|
||||
# or Tailscale CGNAT (100.64.x.x–100.127.x.x).
|
||||
parts = host.split(".")
|
||||
if len(parts) == 4:
|
||||
try:
|
||||
@@ -341,14 +305,12 @@ def is_local_endpoint(base_url: str) -> bool:
|
||||
return True
|
||||
if first == 192 and second == 168:
|
||||
return True
|
||||
if first == 100 and 64 <= second <= 127:
|
||||
return True
|
||||
except ValueError:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
|
||||
def detect_local_server_type(base_url: str) -> Optional[str]:
|
||||
"""Detect which local server is running at base_url by probing known endpoints.
|
||||
|
||||
Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
|
||||
@@ -360,10 +322,8 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=2.0, headers=headers) as client:
|
||||
with httpx.Client(timeout=2.0) as client:
|
||||
# LM Studio exposes /api/v1/models — check first (most specific)
|
||||
try:
|
||||
r = client.get(f"{server_url}/api/v1/models")
|
||||
@@ -550,59 +510,6 @@ def fetch_endpoint_model_metadata(
|
||||
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
|
||||
last_error: Optional[Exception] = None
|
||||
|
||||
if is_local_endpoint(normalized):
|
||||
try:
|
||||
if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
|
||||
server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
|
||||
response = requests.get(
|
||||
server_url.rstrip("/") + "/api/v1/models",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
cache: Dict[str, Dict[str, Any]] = {}
|
||||
for model in payload.get("models", []):
|
||||
if not isinstance(model, dict):
|
||||
continue
|
||||
model_id = model.get("key") or model.get("id")
|
||||
if not model_id:
|
||||
continue
|
||||
entry: Dict[str, Any] = {"name": model.get("name", model_id)}
|
||||
|
||||
context_length = None
|
||||
for inst in model.get("loaded_instances", []) or []:
|
||||
if not isinstance(inst, dict):
|
||||
continue
|
||||
cfg = inst.get("config", {})
|
||||
ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
|
||||
if isinstance(ctx, int) and ctx > 0:
|
||||
context_length = ctx
|
||||
break
|
||||
if context_length is None:
|
||||
context_length = _extract_context_length(model)
|
||||
if context_length is not None:
|
||||
entry["context_length"] = context_length
|
||||
|
||||
max_completion_tokens = _extract_max_completion_tokens(model)
|
||||
if max_completion_tokens is not None:
|
||||
entry["max_completion_tokens"] = max_completion_tokens
|
||||
|
||||
pricing = _extract_pricing(model)
|
||||
if pricing:
|
||||
entry["pricing"] = pricing
|
||||
|
||||
_add_model_aliases(cache, model_id, entry)
|
||||
alt_id = model.get("id")
|
||||
if isinstance(alt_id, str) and alt_id and alt_id != model_id:
|
||||
_add_model_aliases(cache, alt_id, entry)
|
||||
|
||||
_endpoint_model_metadata_cache[normalized] = cache
|
||||
_endpoint_model_metadata_cache_time[normalized] = time.time()
|
||||
return cache
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
|
||||
for candidate in candidates:
|
||||
url = candidate.rstrip("/") + "/models"
|
||||
try:
|
||||
@@ -809,7 +716,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
|
||||
def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
|
||||
"""Query an Ollama server for the model's context length.
|
||||
|
||||
Returns the model's maximum context from GGUF metadata via ``/api/show``,
|
||||
@@ -827,16 +734,14 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
|
||||
server_url = server_url[:-3]
|
||||
|
||||
try:
|
||||
server_type = detect_local_server_type(base_url, api_key=api_key)
|
||||
server_type = detect_local_server_type(base_url)
|
||||
except Exception:
|
||||
return None
|
||||
if server_type != "ollama":
|
||||
return None
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=3.0, headers=headers) as client:
|
||||
with httpx.Client(timeout=3.0) as client:
|
||||
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
@@ -864,7 +769,7 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
|
||||
return None
|
||||
|
||||
|
||||
def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
|
||||
def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
|
||||
"""Query a local server for the model's context length."""
|
||||
import httpx
|
||||
|
||||
@@ -877,15 +782,13 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
||||
if server_url.endswith("/v1"):
|
||||
server_url = server_url[:-3]
|
||||
|
||||
headers = _auth_headers(api_key)
|
||||
|
||||
try:
|
||||
server_type = detect_local_server_type(base_url, api_key=api_key)
|
||||
server_type = detect_local_server_type(base_url)
|
||||
except Exception:
|
||||
server_type = None
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=3.0, headers=headers) as client:
|
||||
with httpx.Client(timeout=3.0) as client:
|
||||
# Ollama: /api/show returns model details with context info
|
||||
if server_type == "ollama":
|
||||
resp = client.post(f"{server_url}/api/show", json={"name": model})
|
||||
@@ -1096,7 +999,7 @@ def get_model_context_length(
|
||||
if not _is_known_provider_base_url(base_url):
|
||||
# 3. Try querying local server directly
|
||||
if is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
local_ctx = _query_local_context_length(model, base_url)
|
||||
if local_ctx and local_ctx > 0:
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
@@ -1110,7 +1013,7 @@ def get_model_context_length(
|
||||
|
||||
# 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
|
||||
if provider == "anthropic" or (
|
||||
base_url and base_url_hostname(base_url) == "api.anthropic.com"
|
||||
base_url and "api.anthropic.com" in base_url
|
||||
):
|
||||
ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
|
||||
if ctx:
|
||||
@@ -1119,11 +1022,7 @@ def get_model_context_length(
|
||||
# 4b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
@@ -1170,7 +1069,7 @@ def get_model_context_length(
|
||||
|
||||
# 9. Query local server as last resort
|
||||
if base_url and is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
local_ctx = _query_local_context_length(model, base_url)
|
||||
if local_ctx and local_ctx > 0:
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
|
||||
@@ -146,7 +146,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"openai-codex": "openai",
|
||||
"zai": "zai",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
"stepfun": "stepfun",
|
||||
"kimi-coding-cn": "kimi-for-coding",
|
||||
"minimax": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
@@ -418,9 +417,6 @@ def list_provider_models(provider: str) -> List[str]:
|
||||
|
||||
Returns an empty list if the provider is unknown or has no data.
|
||||
"""
|
||||
from hermes_cli.models import normalize_provider
|
||||
provider = normalize_provider(provider) or provider
|
||||
|
||||
models = _get_provider_models(provider)
|
||||
if models is None:
|
||||
return []
|
||||
|
||||
+4
-42
@@ -152,13 +152,7 @@ MEMORY_GUIDANCE = (
|
||||
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
|
||||
"state to memory; use session_search to recall those from past transcripts. "
|
||||
"If you've discovered a new way to do something, solved a problem that could be "
|
||||
"necessary later, save it as a skill with the skill tool.\n"
|
||||
"Write memories as declarative facts, not instructions to yourself. "
|
||||
"'User prefers concise responses' ✓ — 'Always respond concisely' ✗. "
|
||||
"'Project uses pytest with xdist' ✓ — 'Run tests with pytest -n 4' ✗. "
|
||||
"Imperative phrasing gets re-read as a directive in later sessions and can "
|
||||
"cause repeated work or override the user's current request. Procedures and "
|
||||
"workflows belong in skills, not memory."
|
||||
"necessary later, save it as a skill with the skill tool."
|
||||
)
|
||||
|
||||
SESSION_SEARCH_GUIDANCE = (
|
||||
@@ -350,13 +344,7 @@ PLATFORM_HINTS = {
|
||||
),
|
||||
"cli": (
|
||||
"You are a CLI AI Agent. Try not to use markdown but simple text "
|
||||
"renderable inside a terminal. "
|
||||
"File delivery: there is no attachment channel — the user reads your "
|
||||
"response directly in their terminal. Do NOT emit MEDIA:/path tags "
|
||||
"(those are only intercepted on messaging platforms like Telegram, "
|
||||
"Discord, Slack, etc.; on the CLI they render as literal text). "
|
||||
"When referring to a file you created or changed, just state its "
|
||||
"absolute path in plain text; the user can open it from there."
|
||||
"renderable inside a terminal."
|
||||
),
|
||||
"sms": (
|
||||
"You are communicating via SMS. Keep responses concise and use plain text "
|
||||
@@ -370,32 +358,6 @@ PLATFORM_HINTS = {
|
||||
"MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
|
||||
".heic) appear as photos and other files arrive as attachments."
|
||||
),
|
||||
"mattermost": (
|
||||
"You are in a Mattermost workspace communicating with your user. "
|
||||
"Mattermost renders standard Markdown — headings, bold, italic, code "
|
||||
"blocks, and tables all work. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are uploaded as photo "
|
||||
"attachments, audio and video as file attachments. "
|
||||
"Image URLs in markdown format  are rendered as inline previews automatically."
|
||||
),
|
||||
"matrix": (
|
||||
"You are in a Matrix room communicating with your user. "
|
||||
"Matrix renders Markdown — bold, italic, code blocks, and links work; "
|
||||
"the adapter converts your Markdown to HTML for rich display. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
|
||||
"audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
|
||||
"and other files as downloadable attachments."
|
||||
),
|
||||
"feishu": (
|
||||
"You are in a Feishu (Lark) workspace communicating with your user. "
|
||||
"Feishu renders Markdown in messages — bold, italic, code blocks, and "
|
||||
"links are supported. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
|
||||
"inline, audio files as voice messages, and other files as attachments."
|
||||
),
|
||||
"weixin": (
|
||||
"You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
|
||||
"it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
|
||||
@@ -651,14 +613,12 @@ def build_skills_system_prompt(
|
||||
or get_session_env("HERMES_SESSION_PLATFORM")
|
||||
or ""
|
||||
)
|
||||
disabled = get_disabled_skill_names()
|
||||
cache_key = (
|
||||
str(skills_dir.resolve()),
|
||||
tuple(str(d) for d in external_dirs),
|
||||
tuple(sorted(str(t) for t in (available_tools or set()))),
|
||||
tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
|
||||
_platform_hint,
|
||||
tuple(sorted(disabled)),
|
||||
)
|
||||
with _SKILLS_PROMPT_CACHE_LOCK:
|
||||
cached = _SKILLS_PROMPT_CACHE.get(cache_key)
|
||||
@@ -666,6 +626,8 @@ def build_skills_system_prompt(
|
||||
_SKILLS_PROMPT_CACHE.move_to_end(cache_key)
|
||||
return cached
|
||||
|
||||
disabled = get_disabled_skill_names()
|
||||
|
||||
# ── Layer 2: disk snapshot ────────────────────────────────────────
|
||||
snapshot = _load_skills_snapshot(skills_dir)
|
||||
|
||||
|
||||
-142
@@ -13,48 +13,6 @@ import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Sensitive query-string parameter names (case-insensitive exact match).
|
||||
# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
|
||||
# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
|
||||
_SENSITIVE_QUERY_PARAMS = frozenset({
|
||||
"access_token",
|
||||
"refresh_token",
|
||||
"id_token",
|
||||
"token",
|
||||
"api_key",
|
||||
"apikey",
|
||||
"client_secret",
|
||||
"password",
|
||||
"auth",
|
||||
"jwt",
|
||||
"session",
|
||||
"secret",
|
||||
"key",
|
||||
"code", # OAuth authorization codes
|
||||
"signature", # pre-signed URL signatures
|
||||
"x-amz-signature",
|
||||
})
|
||||
|
||||
# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
|
||||
# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
|
||||
# Ported from nearai/ironclaw#2529.
|
||||
_SENSITIVE_BODY_KEYS = frozenset({
|
||||
"access_token",
|
||||
"refresh_token",
|
||||
"id_token",
|
||||
"token",
|
||||
"api_key",
|
||||
"apikey",
|
||||
"client_secret",
|
||||
"password",
|
||||
"auth",
|
||||
"jwt",
|
||||
"secret",
|
||||
"private_key",
|
||||
"authorization",
|
||||
"key",
|
||||
})
|
||||
|
||||
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
|
||||
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
|
||||
@@ -150,30 +108,6 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
|
||||
# Negative lookahead prevents matching hex strings or identifiers
|
||||
_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")
|
||||
|
||||
# URLs containing query strings — matches `scheme://...?...[# or end]`.
|
||||
# Used to scan text for URLs whose query params may contain secrets.
|
||||
# Ported from nearai/ironclaw#2529.
|
||||
_URL_WITH_QUERY_RE = re.compile(
|
||||
r"(https?|wss?|ftp)://" # scheme
|
||||
r"([^\s/?#]+)" # authority (may include userinfo)
|
||||
r"([^\s?#]*)" # path
|
||||
r"\?([^\s#]+)" # query (required)
|
||||
r"(#\S*)?", # optional fragment
|
||||
)
|
||||
|
||||
# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
|
||||
# (not just DB protocols already covered by _DB_CONNSTR_RE above).
|
||||
# Catches things like `https://user:token@api.example.com/v1/foo`.
|
||||
_URL_USERINFO_RE = re.compile(
|
||||
r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
|
||||
)
|
||||
|
||||
# Form-urlencoded body detection: conservative — only applies when the entire
|
||||
# text looks like a query string (k=v&k=v pattern with no newlines).
|
||||
_FORM_BODY_RE = re.compile(
|
||||
r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
|
||||
)
|
||||
|
||||
# Compile known prefix patterns into one alternation
|
||||
_PREFIX_RE = re.compile(
|
||||
r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
|
||||
@@ -187,72 +121,6 @@ def _mask_token(token: str) -> str:
|
||||
return f"{token[:6]}...{token[-4:]}"
|
||||
|
||||
|
||||
def _redact_query_string(query: str) -> str:
|
||||
"""Redact sensitive parameter values in a URL query string.
|
||||
|
||||
Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
|
||||
replaced with `***`. Non-sensitive keys pass through unchanged.
|
||||
Empty or malformed pairs are preserved as-is.
|
||||
"""
|
||||
if not query:
|
||||
return query
|
||||
parts = []
|
||||
for pair in query.split("&"):
|
||||
if "=" not in pair:
|
||||
parts.append(pair)
|
||||
continue
|
||||
key, _, value = pair.partition("=")
|
||||
if key.lower() in _SENSITIVE_QUERY_PARAMS:
|
||||
parts.append(f"{key}=***")
|
||||
else:
|
||||
parts.append(pair)
|
||||
return "&".join(parts)
|
||||
|
||||
|
||||
def _redact_url_query_params(text: str) -> str:
|
||||
"""Scan text for URLs with query strings and redact sensitive params.
|
||||
|
||||
Catches opaque tokens that don't match vendor prefix regexes, e.g.
|
||||
`https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
|
||||
"""
|
||||
def _sub(m: re.Match) -> str:
|
||||
scheme = m.group(1)
|
||||
authority = m.group(2)
|
||||
path = m.group(3)
|
||||
query = _redact_query_string(m.group(4))
|
||||
fragment = m.group(5) or ""
|
||||
return f"{scheme}://{authority}{path}?{query}{fragment}"
|
||||
return _URL_WITH_QUERY_RE.sub(_sub, text)
|
||||
|
||||
|
||||
def _redact_url_userinfo(text: str) -> str:
|
||||
"""Strip `user:password@` from HTTP/WS/FTP URLs.
|
||||
|
||||
DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
|
||||
separately by `_DB_CONNSTR_RE`.
|
||||
"""
|
||||
return _URL_USERINFO_RE.sub(
|
||||
lambda m: f"{m.group(1)}://{m.group(2)}:***@",
|
||||
text,
|
||||
)
|
||||
|
||||
|
||||
def _redact_form_body(text: str) -> str:
|
||||
"""Redact sensitive values in a form-urlencoded body.
|
||||
|
||||
Only applies when the entire input looks like a pure form body
|
||||
(k=v&k=v with no newlines, no other text). Single-line non-form
|
||||
text passes through unchanged. This is a conservative pass — the
|
||||
`_redact_url_query_params` function handles embedded query strings.
|
||||
"""
|
||||
if not text or "\n" in text or "&" not in text:
|
||||
return text
|
||||
# The body-body form check is strict: only trigger on clean k=v&k=v.
|
||||
if not _FORM_BODY_RE.match(text.strip()):
|
||||
return text
|
||||
return _redact_query_string(text.strip())
|
||||
|
||||
|
||||
def redact_sensitive_text(text: str) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
@@ -305,16 +173,6 @@ def redact_sensitive_text(text: str) -> str:
|
||||
# JWT tokens (eyJ... — base64-encoded JSON headers)
|
||||
text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)
|
||||
|
||||
# URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
|
||||
# DB schemes are handled above by _DB_CONNSTR_RE.
|
||||
text = _redact_url_userinfo(text)
|
||||
|
||||
# URL query params containing opaque tokens (?access_token=…&code=…)
|
||||
text = _redact_url_query_params(text)
|
||||
|
||||
# Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
|
||||
text = _redact_form_body(text)
|
||||
|
||||
# Discord user/role mentions (<@snowflake_id>)
|
||||
text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)
|
||||
|
||||
|
||||
@@ -1,831 +0,0 @@
|
||||
"""
|
||||
Shell-script hooks bridge.
|
||||
|
||||
Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
|
||||
consent on first use of each ``(event, command)`` pair, and registers
|
||||
callbacks on the existing plugin hook manager so every existing
|
||||
``invoke_hook()`` site dispatches to the configured shell scripts — with
|
||||
zero changes to call sites.
|
||||
|
||||
Design notes
|
||||
------------
|
||||
* Python plugins and shell hooks compose naturally: both flow through
|
||||
:func:`hermes_cli.plugins.invoke_hook` and its aggregators. Python
|
||||
plugins are registered first (via ``discover_and_load()``) so their
|
||||
block decisions win ties over shell-hook blocks.
|
||||
* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
|
||||
with ``shell=False`` — no shell injection footguns. Users that need
|
||||
pipes/redirection wrap their logic in a script.
|
||||
* First-use consent is gated by the allowlist under
|
||||
``~/.hermes/shell-hooks-allowlist.json``. Non-TTY callers must pass
|
||||
``accept_hooks=True`` (resolved from ``--accept-hooks``,
|
||||
``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
|
||||
for registration to succeed without a prompt.
|
||||
* Registration is idempotent — safe to invoke from both the CLI entry
|
||||
point (``hermes_cli/main.py``) and the gateway entry point
|
||||
(``gateway/run.py``).
|
||||
|
||||
Wire protocol
|
||||
-------------
|
||||
**stdin** (JSON, piped to the script)::
|
||||
|
||||
{
|
||||
"hook_event_name": "pre_tool_call",
|
||||
"tool_name": "terminal",
|
||||
"tool_input": {"command": "rm -rf /"},
|
||||
"session_id": "sess_abc123",
|
||||
"cwd": "/home/user/project",
|
||||
"extra": {...} # event-specific kwargs
|
||||
}
|
||||
|
||||
**stdout** (JSON, optional — anything else is ignored)::
|
||||
|
||||
# Block a pre_tool_call (either shape accepted; normalised internally):
|
||||
{"decision": "block", "reason": "Forbidden command"} # Claude-Code-style
|
||||
{"action": "block", "message": "Forbidden command"} # Hermes-canonical
|
||||
|
||||
# Inject context for pre_llm_call:
|
||||
{"context": "Today is Friday"}
|
||||
|
||||
# Silent no-op:
|
||||
<empty or any non-matching JSON object>
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import difflib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
|
||||
|
||||
try:
|
||||
import fcntl # POSIX only; Windows falls back to best-effort without flock.
|
||||
except ImportError: # pragma: no cover
|
||||
fcntl = None # type: ignore[assignment]
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_TIMEOUT_SECONDS = 60
|
||||
MAX_TIMEOUT_SECONDS = 300
|
||||
ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
|
||||
|
||||
# (event, matcher, command) triples that have been wired to the plugin
|
||||
# manager in the current process. Matcher is part of the key because
|
||||
# the same script can legitimately register for different matchers under
|
||||
# the same event (e.g. one entry per tool the user wants to gate).
|
||||
# Second registration attempts for the exact same triple become no-ops
|
||||
# so the CLI and gateway can both call register_from_config() safely.
|
||||
_registered: Set[Tuple[str, Optional[str], str]] = set()
|
||||
_registered_lock = threading.Lock()
|
||||
|
||||
# Intra-process lock for allowlist read-modify-write on platforms that
|
||||
# lack ``fcntl`` (non-POSIX). Kept separate from ``_registered_lock``
|
||||
# because ``register_from_config`` already holds ``_registered_lock`` when
|
||||
# it triggers ``_record_approval`` — reusing it here would self-deadlock
|
||||
# (``threading.Lock`` is non-reentrant). POSIX callers use the sibling
|
||||
# ``.lock`` file via ``fcntl.flock`` and bypass this.
|
||||
_allowlist_write_lock = threading.Lock()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ShellHookSpec:
|
||||
"""Parsed and validated representation of a single ``hooks:`` entry."""
|
||||
|
||||
event: str
|
||||
command: str
|
||||
matcher: Optional[str] = None
|
||||
timeout: int = DEFAULT_TIMEOUT_SECONDS
|
||||
compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
# Strip whitespace introduced by YAML quirks (e.g. multi-line string
|
||||
# folding) — a matcher of " terminal" would otherwise silently fail
|
||||
# to match "terminal" without any diagnostic.
|
||||
if isinstance(self.matcher, str):
|
||||
stripped = self.matcher.strip()
|
||||
self.matcher = stripped if stripped else None
|
||||
if self.matcher:
|
||||
try:
|
||||
self.compiled_matcher = re.compile(self.matcher)
|
||||
except re.error as exc:
|
||||
logger.warning(
|
||||
"shell hook matcher %r is invalid (%s) — treating as "
|
||||
"literal equality", self.matcher, exc,
|
||||
)
|
||||
self.compiled_matcher = None
|
||||
|
||||
def matches_tool(self, tool_name: Optional[str]) -> bool:
|
||||
if not self.matcher:
|
||||
return True
|
||||
if tool_name is None:
|
||||
return False
|
||||
if self.compiled_matcher is not None:
|
||||
return self.compiled_matcher.fullmatch(tool_name) is not None
|
||||
# compiled_matcher is None only when the regex failed to compile,
|
||||
# in which case we already warned and fall back to literal equality.
|
||||
return tool_name == self.matcher
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def register_from_config(
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
*,
|
||||
accept_hooks: bool = False,
|
||||
) -> List[ShellHookSpec]:
|
||||
"""Register every configured shell hook on the plugin manager.
|
||||
|
||||
``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
|
||||
output). The ``hooks:`` key is read out of it. Missing, empty, or
|
||||
non-dict ``hooks`` is treated as zero configured hooks.
|
||||
|
||||
``accept_hooks=True`` skips the TTY consent prompt — the caller is
|
||||
promising that the user has opted in via a flag, env var, or config
|
||||
setting. ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
|
||||
also honored inside this function so either CLI or gateway call sites
|
||||
pick them up.
|
||||
|
||||
Returns the list of :class:`ShellHookSpec` entries that ended up wired
|
||||
up on the plugin manager. Skipped entries (unknown events, malformed,
|
||||
not allowlisted, already registered) are logged but not returned.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return []
|
||||
|
||||
effective_accept = _resolve_effective_accept(cfg, accept_hooks)
|
||||
|
||||
specs = _parse_hooks_block(cfg.get("hooks"))
|
||||
if not specs:
|
||||
return []
|
||||
|
||||
registered: List[ShellHookSpec] = []
|
||||
|
||||
# Import lazily — avoids circular imports at module-load time.
|
||||
from hermes_cli.plugins import get_plugin_manager
|
||||
|
||||
manager = get_plugin_manager()
|
||||
|
||||
# Idempotence + allowlist read happen under the lock; the TTY
|
||||
# prompt runs outside so other threads aren't parked on a blocking
|
||||
# input(). Mutation re-takes the lock with a defensive idempotence
|
||||
# re-check in case two callers ever race through the prompt.
|
||||
for spec in specs:
|
||||
key = (spec.event, spec.matcher, spec.command)
|
||||
with _registered_lock:
|
||||
if key in _registered:
|
||||
continue
|
||||
already_allowlisted = _is_allowlisted(spec.event, spec.command)
|
||||
|
||||
if not already_allowlisted:
|
||||
if not _prompt_and_record(
|
||||
spec.event, spec.command, accept_hooks=effective_accept,
|
||||
):
|
||||
logger.warning(
|
||||
"shell hook for %s (%s) not allowlisted — skipped. "
|
||||
"Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
|
||||
"hooks_auto_accept: true, or approve at the TTY "
|
||||
"prompt next run.",
|
||||
spec.event, spec.command,
|
||||
)
|
||||
continue
|
||||
|
||||
with _registered_lock:
|
||||
if key in _registered:
|
||||
continue
|
||||
manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
|
||||
_registered.add(key)
|
||||
registered.append(spec)
|
||||
logger.info(
|
||||
"shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
|
||||
spec.event, spec.command, spec.matcher, spec.timeout,
|
||||
)
|
||||
|
||||
return registered
|
||||
|
||||
|
||||
def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
|
||||
"""Return the parsed ``ShellHookSpec`` entries from config without
|
||||
registering anything. Used by ``hermes hooks list`` and ``doctor``."""
|
||||
if not isinstance(cfg, dict):
|
||||
return []
|
||||
return _parse_hooks_block(cfg.get("hooks"))
|
||||
|
||||
|
||||
def reset_for_tests() -> None:
|
||||
"""Clear the idempotence set. Test-only helper."""
|
||||
with _registered_lock:
|
||||
_registered.clear()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
|
||||
"""Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
|
||||
|
||||
Malformed entries warn-and-skip — we never raise from config parsing
|
||||
because a broken hook must not crash the agent.
|
||||
"""
|
||||
from hermes_cli.plugins import VALID_HOOKS
|
||||
|
||||
if not isinstance(hooks_cfg, dict):
|
||||
return []
|
||||
|
||||
specs: List[ShellHookSpec] = []
|
||||
|
||||
for event_name, entries in hooks_cfg.items():
|
||||
if event_name not in VALID_HOOKS:
|
||||
suggestion = difflib.get_close_matches(
|
||||
str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
|
||||
)
|
||||
if suggestion:
|
||||
logger.warning(
|
||||
"unknown hook event %r in hooks: config — did you mean %r?",
|
||||
event_name, suggestion[0],
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"unknown hook event %r in hooks: config (valid: %s)",
|
||||
event_name, ", ".join(sorted(VALID_HOOKS)),
|
||||
)
|
||||
continue
|
||||
|
||||
if entries is None:
|
||||
continue
|
||||
|
||||
if not isinstance(entries, list):
|
||||
logger.warning(
|
||||
"hooks.%s must be a list of hook definitions; got %s",
|
||||
event_name, type(entries).__name__,
|
||||
)
|
||||
continue
|
||||
|
||||
for i, raw in enumerate(entries):
|
||||
spec = _parse_single_entry(event_name, i, raw)
|
||||
if spec is not None:
|
||||
specs.append(spec)
|
||||
|
||||
return specs
|
||||
|
||||
|
||||
def _parse_single_entry(
|
||||
event: str, index: int, raw: Any,
|
||||
) -> Optional[ShellHookSpec]:
|
||||
if not isinstance(raw, dict):
|
||||
logger.warning(
|
||||
"hooks.%s[%d] must be a mapping with a 'command' key; got %s",
|
||||
event, index, type(raw).__name__,
|
||||
)
|
||||
return None
|
||||
|
||||
command = raw.get("command")
|
||||
if not isinstance(command, str) or not command.strip():
|
||||
logger.warning(
|
||||
"hooks.%s[%d] is missing a non-empty 'command' field",
|
||||
event, index,
|
||||
)
|
||||
return None
|
||||
|
||||
matcher = raw.get("matcher")
|
||||
if matcher is not None and not isinstance(matcher, str):
|
||||
logger.warning(
|
||||
"hooks.%s[%d].matcher must be a string regex; ignoring",
|
||||
event, index,
|
||||
)
|
||||
matcher = None
|
||||
|
||||
if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
|
||||
logger.warning(
|
||||
"hooks.%s[%d].matcher=%r will be ignored at runtime — the "
|
||||
"matcher field is only honored for pre_tool_call / "
|
||||
"post_tool_call. The hook will fire on every %s event.",
|
||||
event, index, matcher, event,
|
||||
)
|
||||
matcher = None
|
||||
|
||||
timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
|
||||
try:
|
||||
timeout = int(timeout_raw)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"hooks.%s[%d].timeout must be an int (got %r); using default %ds",
|
||||
event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
|
||||
)
|
||||
timeout = DEFAULT_TIMEOUT_SECONDS
|
||||
|
||||
if timeout < 1:
|
||||
logger.warning(
|
||||
"hooks.%s[%d].timeout must be >=1; using default %ds",
|
||||
event, index, DEFAULT_TIMEOUT_SECONDS,
|
||||
)
|
||||
timeout = DEFAULT_TIMEOUT_SECONDS
|
||||
|
||||
if timeout > MAX_TIMEOUT_SECONDS:
|
||||
logger.warning(
|
||||
"hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
|
||||
event, index, timeout, MAX_TIMEOUT_SECONDS,
|
||||
)
|
||||
timeout = MAX_TIMEOUT_SECONDS
|
||||
|
||||
return ShellHookSpec(
|
||||
event=event,
|
||||
command=command.strip(),
|
||||
matcher=matcher,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subprocess callback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
|
||||
|
||||
|
||||
def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
|
||||
"""Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
|
||||
|
||||
Returns a diagnostic dict with the same keys for every outcome
|
||||
(``returncode``, ``stdout``, ``stderr``, ``timed_out``,
|
||||
``elapsed_seconds``, ``error``). This is the single place the
|
||||
subprocess is actually invoked — both the live callback path
|
||||
(:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
|
||||
go through it.
|
||||
"""
|
||||
result: Dict[str, Any] = {
|
||||
"returncode": None,
|
||||
"stdout": "",
|
||||
"stderr": "",
|
||||
"timed_out": False,
|
||||
"elapsed_seconds": 0.0,
|
||||
"error": None,
|
||||
}
|
||||
try:
|
||||
argv = shlex.split(os.path.expanduser(spec.command))
|
||||
except ValueError as exc:
|
||||
result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
|
||||
return result
|
||||
if not argv:
|
||||
result["error"] = "empty command"
|
||||
return result
|
||||
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
argv,
|
||||
input=stdin_json,
|
||||
capture_output=True,
|
||||
timeout=spec.timeout,
|
||||
text=True,
|
||||
shell=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
result["timed_out"] = True
|
||||
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
|
||||
return result
|
||||
except FileNotFoundError:
|
||||
result["error"] = "command not found"
|
||||
return result
|
||||
except PermissionError:
|
||||
result["error"] = "command not executable"
|
||||
return result
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
result["error"] = str(exc)
|
||||
return result
|
||||
|
||||
result["returncode"] = proc.returncode
|
||||
result["stdout"] = proc.stdout or ""
|
||||
result["stderr"] = proc.stderr or ""
|
||||
result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
|
||||
return result
|
||||
|
||||
|
||||
def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
|
||||
"""Build the closure that ``invoke_hook()`` will call per firing."""
|
||||
|
||||
def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
|
||||
# Matcher gate — only meaningful for tool-scoped events.
|
||||
if spec.event in ("pre_tool_call", "post_tool_call"):
|
||||
if not spec.matches_tool(kwargs.get("tool_name")):
|
||||
return None
|
||||
|
||||
r = _spawn(spec, _serialize_payload(spec.event, kwargs))
|
||||
|
||||
if r["error"]:
|
||||
logger.warning(
|
||||
"shell hook failed (event=%s command=%s): %s",
|
||||
spec.event, spec.command, r["error"],
|
||||
)
|
||||
return None
|
||||
if r["timed_out"]:
|
||||
logger.warning(
|
||||
"shell hook timed out after %.2fs (event=%s command=%s)",
|
||||
r["elapsed_seconds"], spec.event, spec.command,
|
||||
)
|
||||
return None
|
||||
|
||||
stderr = r["stderr"].strip()
|
||||
if stderr:
|
||||
logger.debug(
|
||||
"shell hook stderr (event=%s command=%s): %s",
|
||||
spec.event, spec.command, stderr[:400],
|
||||
)
|
||||
# Non-zero exits: log but still parse stdout so scripts that
|
||||
# signal failure via exit code can also return a block directive.
|
||||
if r["returncode"] != 0:
|
||||
logger.warning(
|
||||
"shell hook exited %d (event=%s command=%s); stderr=%s",
|
||||
r["returncode"], spec.event, spec.command, stderr[:400],
|
||||
)
|
||||
return _parse_response(spec.event, r["stdout"])
|
||||
|
||||
_callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
|
||||
_callback.__qualname__ = _callback.__name__
|
||||
return _callback
|
||||
|
||||
|
||||
def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
|
||||
"""Render the stdin JSON payload. Unserialisable values are
|
||||
stringified via ``default=str`` rather than dropped."""
|
||||
extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
|
||||
try:
|
||||
cwd = str(Path.cwd())
|
||||
except OSError:
|
||||
cwd = ""
|
||||
payload = {
|
||||
"hook_event_name": event,
|
||||
"tool_name": kwargs.get("tool_name"),
|
||||
"tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
|
||||
"session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
|
||||
"cwd": cwd,
|
||||
"extra": extras,
|
||||
}
|
||||
return json.dumps(payload, ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
|
||||
"""Translate stdout JSON into a Hermes wire-shape dict.
|
||||
|
||||
For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
|
||||
"reason": "..."}`` payload is translated into the canonical Hermes
|
||||
``{"action": "block", "message": "..."}`` shape expected by
|
||||
:func:`hermes_cli.plugins.get_pre_tool_call_block_message`. This is
|
||||
the single most important correctness invariant in this module —
|
||||
skipping the translation silently breaks every ``pre_tool_call``
|
||||
block directive.
|
||||
|
||||
For ``pre_llm_call``, ``{"context": "..."}`` is passed through
|
||||
unchanged to match the existing plugin-hook contract.
|
||||
|
||||
Anything else returns ``None``.
|
||||
"""
|
||||
stdout = (stdout or "").strip()
|
||||
if not stdout:
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(stdout)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
"shell hook stdout was not valid JSON (event=%s): %s",
|
||||
event, stdout[:200],
|
||||
)
|
||||
return None
|
||||
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
|
||||
if event == "pre_tool_call":
|
||||
if data.get("action") == "block":
|
||||
message = data.get("message") or data.get("reason") or ""
|
||||
if isinstance(message, str) and message:
|
||||
return {"action": "block", "message": message}
|
||||
if data.get("decision") == "block":
|
||||
message = data.get("reason") or data.get("message") or ""
|
||||
if isinstance(message, str) and message:
|
||||
return {"action": "block", "message": message}
|
||||
return None
|
||||
|
||||
context = data.get("context")
|
||||
if isinstance(context, str) and context.strip():
|
||||
return {"context": context}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Allowlist / consent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def allowlist_path() -> Path:
|
||||
"""Path to the per-user shell-hook allowlist file."""
|
||||
return get_hermes_home() / ALLOWLIST_FILENAME
|
||||
|
||||
|
||||
def load_allowlist() -> Dict[str, Any]:
|
||||
"""Return the parsed allowlist, or an empty skeleton if absent."""
|
||||
try:
|
||||
raw = json.loads(allowlist_path().read_text())
|
||||
except (FileNotFoundError, json.JSONDecodeError, OSError):
|
||||
return {"approvals": []}
|
||||
if not isinstance(raw, dict):
|
||||
return {"approvals": []}
|
||||
approvals = raw.get("approvals")
|
||||
if not isinstance(approvals, list):
|
||||
raw["approvals"] = []
|
||||
return raw
|
||||
|
||||
|
||||
def save_allowlist(data: Dict[str, Any]) -> None:
|
||||
"""Atomically persist the allowlist via per-process ``mkstemp`` +
|
||||
``os.replace``. Cross-process read-modify-write races are handled
|
||||
by :func:`_locked_update_approvals` (``fcntl.flock``). On OSError
|
||||
the failure is logged; the in-process hook still registers but
|
||||
the approval won't survive across runs."""
|
||||
p = allowlist_path()
|
||||
try:
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w") as fh:
|
||||
fh.write(json.dumps(data, indent=2, sort_keys=True))
|
||||
os.replace(tmp_path, p)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
except OSError as exc:
|
||||
logger.warning(
|
||||
"Failed to persist shell hook allowlist to %s: %s. "
|
||||
"The approval is in-memory for this run, but the next "
|
||||
"startup will re-prompt (or skip registration on non-TTY "
|
||||
"runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
|
||||
p, exc,
|
||||
)
|
||||
|
||||
|
||||
def _is_allowlisted(event: str, command: str) -> bool:
|
||||
data = load_allowlist()
|
||||
return any(
|
||||
isinstance(e, dict)
|
||||
and e.get("event") == event
|
||||
and e.get("command") == command
|
||||
for e in data.get("approvals", [])
|
||||
)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
|
||||
"""Serialise read-modify-write on the allowlist across processes.
|
||||
|
||||
Holds an exclusive ``flock`` on a sibling lock file for the duration
|
||||
of the update so concurrent ``_record_approval``/``revoke`` callers
|
||||
cannot clobber each other's changes (the race Codex reproduced with
|
||||
20–50 simultaneous writers). Falls back to an in-process lock on
|
||||
platforms without ``fcntl``.
|
||||
"""
|
||||
p = allowlist_path()
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_path = p.with_suffix(p.suffix + ".lock")
|
||||
|
||||
if fcntl is None: # pragma: no cover — non-POSIX fallback
|
||||
with _allowlist_write_lock:
|
||||
data = load_allowlist()
|
||||
yield data
|
||||
save_allowlist(data)
|
||||
return
|
||||
|
||||
with open(lock_path, "a+") as lock_fh:
|
||||
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
|
||||
try:
|
||||
data = load_allowlist()
|
||||
yield data
|
||||
save_allowlist(data)
|
||||
finally:
|
||||
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
|
||||
|
||||
|
||||
def _prompt_and_record(
|
||||
event: str, command: str, *, accept_hooks: bool,
|
||||
) -> bool:
|
||||
"""Decide whether to approve an unseen ``(event, command)`` pair.
|
||||
Returns ``True`` iff the approval was granted and recorded.
|
||||
"""
|
||||
if accept_hooks:
|
||||
_record_approval(event, command)
|
||||
logger.info(
|
||||
"shell hook auto-approved via --accept-hooks / env / config: "
|
||||
"%s -> %s", event, command,
|
||||
)
|
||||
return True
|
||||
|
||||
if not sys.stdin.isatty():
|
||||
return False
|
||||
|
||||
print(
|
||||
f"\n⚠ Hermes is about to register a shell hook that will run a\n"
|
||||
f" command on your behalf.\n\n"
|
||||
f" Event: {event}\n"
|
||||
f" Command: {command}\n\n"
|
||||
f" Commands run with your full user credentials. Only approve\n"
|
||||
f" commands you trust."
|
||||
)
|
||||
try:
|
||||
answer = input("Allow this hook to run? [y/N]: ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print() # keep the terminal tidy after ^C
|
||||
return False
|
||||
|
||||
if answer in ("y", "yes"):
|
||||
_record_approval(event, command)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _record_approval(event: str, command: str) -> None:
|
||||
entry = {
|
||||
"event": event,
|
||||
"command": command,
|
||||
"approved_at": _utc_now_iso(),
|
||||
"script_mtime_at_approval": script_mtime_iso(command),
|
||||
}
|
||||
with _locked_update_approvals() as data:
|
||||
data["approvals"] = [
|
||||
e for e in data.get("approvals", [])
|
||||
if not (
|
||||
isinstance(e, dict)
|
||||
and e.get("event") == event
|
||||
and e.get("command") == command
|
||||
)
|
||||
] + [entry]
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
|
||||
|
||||
def revoke(command: str) -> int:
|
||||
"""Remove every allowlist entry matching ``command``.
|
||||
|
||||
Returns the number of entries removed. Does not unregister any
|
||||
callbacks that are already live on the plugin manager in the current
|
||||
process — restart the CLI / gateway to drop them.
|
||||
"""
|
||||
with _locked_update_approvals() as data:
|
||||
before = len(data.get("approvals", []))
|
||||
data["approvals"] = [
|
||||
e for e in data.get("approvals", [])
|
||||
if not (isinstance(e, dict) and e.get("command") == command)
|
||||
]
|
||||
after = len(data["approvals"])
|
||||
return before - after
|
||||
|
||||
|
||||
_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
|
||||
".sh", ".bash", ".zsh", ".fish",
|
||||
".py", ".pyw",
|
||||
".rb", ".pl", ".lua",
|
||||
".js", ".mjs", ".cjs", ".ts",
|
||||
)
|
||||
|
||||
|
||||
def _command_script_path(command: str) -> str:
|
||||
"""Return the script path from ``command`` for doctor / drift checks.
|
||||
|
||||
Prefers a token ending in a known script extension, then a token
|
||||
containing ``/`` or leading ``~``, then the first token. Handles
|
||||
``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
|
||||
common bare-path form.
|
||||
"""
|
||||
try:
|
||||
parts = shlex.split(command)
|
||||
except ValueError:
|
||||
return command
|
||||
if not parts:
|
||||
return command
|
||||
for part in parts:
|
||||
if part.lower().endswith(_SCRIPT_EXTENSIONS):
|
||||
return part
|
||||
for part in parts:
|
||||
if "/" in part or part.startswith("~"):
|
||||
return part
|
||||
return parts[0]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers for accept-hooks resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _resolve_effective_accept(
|
||||
cfg: Dict[str, Any], accept_hooks_arg: bool,
|
||||
) -> bool:
|
||||
"""Combine all three opt-in channels into a single boolean.
|
||||
|
||||
Precedence (any truthy source flips us on):
|
||||
1. ``--accept-hooks`` flag (CLI) / explicit argument
|
||||
2. ``HERMES_ACCEPT_HOOKS`` env var
|
||||
3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
|
||||
"""
|
||||
if accept_hooks_arg:
|
||||
return True
|
||||
env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
|
||||
if env in ("1", "true", "yes", "on"):
|
||||
return True
|
||||
cfg_val = cfg.get("hooks_auto_accept", False)
|
||||
return bool(cfg_val)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Introspection (used by `hermes hooks` CLI)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return the allowlist record for this pair, if any."""
|
||||
for e in load_allowlist().get("approvals", []):
|
||||
if (
|
||||
isinstance(e, dict)
|
||||
and e.get("event") == event
|
||||
and e.get("command") == command
|
||||
):
|
||||
return e
|
||||
return None
|
||||
|
||||
|
||||
def script_mtime_iso(command: str) -> Optional[str]:
|
||||
"""ISO-8601 mtime of the resolved script path, or ``None`` if the
|
||||
script is missing."""
|
||||
path = _command_script_path(command)
|
||||
if not path:
|
||||
return None
|
||||
try:
|
||||
expanded = os.path.expanduser(path)
|
||||
return datetime.fromtimestamp(
|
||||
os.path.getmtime(expanded), tz=timezone.utc,
|
||||
).isoformat().replace("+00:00", "Z")
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
|
||||
def script_is_executable(command: str) -> bool:
|
||||
"""Return ``True`` iff ``command`` is runnable as configured.
|
||||
|
||||
For a bare invocation (``/path/hook.sh``) the script itself must be
|
||||
executable. For interpreter-prefixed commands (``python3
|
||||
/path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
|
||||
to be readable — the interpreter doesn't care about the ``X_OK``
|
||||
bit. Mirrors what ``_spawn`` would actually do at runtime."""
|
||||
path = _command_script_path(command)
|
||||
if not path:
|
||||
return False
|
||||
expanded = os.path.expanduser(path)
|
||||
if not os.path.isfile(expanded):
|
||||
return False
|
||||
try:
|
||||
argv = shlex.split(command)
|
||||
except ValueError:
|
||||
return False
|
||||
is_bare_invocation = bool(argv) and argv[0] == path
|
||||
required = os.X_OK if is_bare_invocation else os.R_OK
|
||||
return os.access(expanded, required)
|
||||
|
||||
|
||||
def run_once(
|
||||
spec: ShellHookSpec, kwargs: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""Fire a single shell-hook invocation with a synthetic payload.
|
||||
Used by ``hermes hooks test`` and ``hermes hooks doctor``.
|
||||
|
||||
``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
|
||||
would pass at runtime. It is routed through :func:`_serialize_payload`
|
||||
so the synthetic stdin exactly matches what a real hook firing would
|
||||
produce — otherwise scripts tested via ``hermes hooks test`` could
|
||||
diverge silently from production behaviour.
|
||||
|
||||
Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
|
||||
holding the canonical Hermes-wire-shape response."""
|
||||
stdin_json = _serialize_payload(spec.event, kwargs)
|
||||
result = _spawn(spec, stdin_json)
|
||||
result["parsed"] = _parse_response(spec.event, result["stdout"])
|
||||
return result
|
||||
+3
-134
@@ -8,7 +8,6 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
@@ -23,110 +22,6 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only — no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def _load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available —
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return f"[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def _expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return _run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
user_instruction: str = "",
|
||||
@@ -238,36 +133,14 @@ def _build_skill_message(
|
||||
activation_note: str,
|
||||
user_instruction: str = "",
|
||||
runtime_note: str = "",
|
||||
session_id: str | None = None,
|
||||
) -> str:
|
||||
"""Format a loaded skill into a user/system message payload."""
|
||||
from tools.skills_tool import SKILLS_DIR
|
||||
|
||||
content = str(loaded_skill.get("content") or "")
|
||||
|
||||
# ── Template substitution and inline-shell expansion ──
|
||||
# Done before anything else so downstream blocks (setup notes,
|
||||
# supporting-file hints) see the expanded content.
|
||||
skills_cfg = _load_skills_config()
|
||||
if skills_cfg.get("template_vars", True):
|
||||
content = _substitute_template_vars(content, skill_dir, session_id)
|
||||
if skills_cfg.get("inline_shell", False):
|
||||
timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = _expand_inline_shell(content, skill_dir, timeout)
|
||||
|
||||
parts = [activation_note, "", content.strip()]
|
||||
|
||||
# ── Inject the absolute skill directory so the agent can reference
|
||||
# bundled scripts without an extra skill_view() round-trip. ──
|
||||
if skill_dir:
|
||||
parts.append("")
|
||||
parts.append(f"[Skill directory: {skill_dir}]")
|
||||
parts.append(
|
||||
"Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
|
||||
"`templates/config.yaml`) against that directory, then run them "
|
||||
"with the terminal tool using the absolute path."
|
||||
)
|
||||
|
||||
# ── Inject resolved skill config values ──
|
||||
_inject_skill_config(loaded_skill, parts)
|
||||
|
||||
@@ -315,13 +188,11 @@ def _build_skill_message(
|
||||
# Skill is from an external dir — use the skill name instead
|
||||
skill_view_target = skill_dir.name
|
||||
parts.append("")
|
||||
parts.append("[This skill has supporting files:]")
|
||||
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
|
||||
for sf in supporting:
|
||||
parts.append(f"- {sf} -> {skill_dir / sf}")
|
||||
parts.append(f"- {sf}")
|
||||
parts.append(
|
||||
f'\nLoad any of these with skill_view(name="{skill_view_target}", '
|
||||
f'file_path="<path>"), or run scripts directly by absolute path '
|
||||
f"(e.g. `node {skill_dir}/scripts/foo.js`)."
|
||||
f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
|
||||
)
|
||||
|
||||
if user_instruction:
|
||||
@@ -461,7 +332,6 @@ def build_skill_invocation_message(
|
||||
activation_note,
|
||||
user_instruction=user_instruction,
|
||||
runtime_note=runtime_note,
|
||||
session_id=task_id,
|
||||
)
|
||||
|
||||
|
||||
@@ -500,7 +370,6 @@ def build_preloaded_skills_prompt(
|
||||
loaded_skill,
|
||||
skill_dir,
|
||||
activation_note,
|
||||
session_id=task_id,
|
||||
)
|
||||
)
|
||||
loaded_names.append(skill_name)
|
||||
|
||||
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
Excludes ``.git``, ``.github``, ``.hub`` directories.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
for root, dirs, files in os.walk(skills_dir):
|
||||
dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
|
||||
if filename in files:
|
||||
matches.append(Path(root) / filename)
|
||||
|
||||
@@ -0,0 +1,195 @@
|
||||
"""Helpers for optional cheap-vs-strong model routing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from utils import is_truthy_value
|
||||
|
||||
_COMPLEX_KEYWORDS = {
|
||||
"debug",
|
||||
"debugging",
|
||||
"implement",
|
||||
"implementation",
|
||||
"refactor",
|
||||
"patch",
|
||||
"traceback",
|
||||
"stacktrace",
|
||||
"exception",
|
||||
"error",
|
||||
"analyze",
|
||||
"analysis",
|
||||
"investigate",
|
||||
"architecture",
|
||||
"design",
|
||||
"compare",
|
||||
"benchmark",
|
||||
"optimize",
|
||||
"optimise",
|
||||
"review",
|
||||
"terminal",
|
||||
"shell",
|
||||
"tool",
|
||||
"tools",
|
||||
"pytest",
|
||||
"test",
|
||||
"tests",
|
||||
"plan",
|
||||
"planning",
|
||||
"delegate",
|
||||
"subagent",
|
||||
"cron",
|
||||
"docker",
|
||||
"kubernetes",
|
||||
}
|
||||
|
||||
_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
|
||||
|
||||
|
||||
def _coerce_bool(value: Any, default: bool = False) -> bool:
|
||||
return is_truthy_value(value, default=default)
|
||||
|
||||
|
||||
def _coerce_int(value: Any, default: int) -> int:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
||||
"""Return the configured cheap-model route when a message looks simple.
|
||||
|
||||
Conservative by design: if the message has signs of code/tool/debugging/
|
||||
long-form work, keep the primary model.
|
||||
"""
|
||||
cfg = routing_config or {}
|
||||
if not _coerce_bool(cfg.get("enabled"), False):
|
||||
return None
|
||||
|
||||
cheap_model = cfg.get("cheap_model") or {}
|
||||
if not isinstance(cheap_model, dict):
|
||||
return None
|
||||
provider = str(cheap_model.get("provider") or "").strip().lower()
|
||||
model = str(cheap_model.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
return None
|
||||
|
||||
text = (user_message or "").strip()
|
||||
if not text:
|
||||
return None
|
||||
|
||||
max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
|
||||
max_words = _coerce_int(cfg.get("max_simple_words"), 28)
|
||||
|
||||
if len(text) > max_chars:
|
||||
return None
|
||||
if len(text.split()) > max_words:
|
||||
return None
|
||||
if text.count("\n") > 1:
|
||||
return None
|
||||
if "```" in text or "`" in text:
|
||||
return None
|
||||
if _URL_RE.search(text):
|
||||
return None
|
||||
|
||||
lowered = text.lower()
|
||||
words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
|
||||
if words & _COMPLEX_KEYWORDS:
|
||||
return None
|
||||
|
||||
route = dict(cheap_model)
|
||||
route["provider"] = provider
|
||||
route["model"] = model
|
||||
route["routing_reason"] = "simple_turn"
|
||||
return route
|
||||
|
||||
|
||||
def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Resolve the effective model/runtime for one turn.
|
||||
|
||||
Returns a dict with model/runtime/signature/label fields.
|
||||
"""
|
||||
route = choose_cheap_model_route(user_message, routing_config)
|
||||
if not route:
|
||||
return {
|
||||
"model": primary.get("model"),
|
||||
"runtime": {
|
||||
"api_key": primary.get("api_key"),
|
||||
"base_url": primary.get("base_url"),
|
||||
"provider": primary.get("provider"),
|
||||
"api_mode": primary.get("api_mode"),
|
||||
"command": primary.get("command"),
|
||||
"args": list(primary.get("args") or []),
|
||||
"credential_pool": primary.get("credential_pool"),
|
||||
},
|
||||
"label": None,
|
||||
"signature": (
|
||||
primary.get("model"),
|
||||
primary.get("provider"),
|
||||
primary.get("base_url"),
|
||||
primary.get("api_mode"),
|
||||
primary.get("command"),
|
||||
tuple(primary.get("args") or ()),
|
||||
),
|
||||
}
|
||||
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
explicit_api_key = None
|
||||
api_key_env = str(route.get("api_key_env") or "").strip()
|
||||
if api_key_env:
|
||||
explicit_api_key = os.getenv(api_key_env) or None
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=route.get("provider"),
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=route.get("base_url"),
|
||||
)
|
||||
except Exception:
|
||||
return {
|
||||
"model": primary.get("model"),
|
||||
"runtime": {
|
||||
"api_key": primary.get("api_key"),
|
||||
"base_url": primary.get("base_url"),
|
||||
"provider": primary.get("provider"),
|
||||
"api_mode": primary.get("api_mode"),
|
||||
"command": primary.get("command"),
|
||||
"args": list(primary.get("args") or []),
|
||||
"credential_pool": primary.get("credential_pool"),
|
||||
},
|
||||
"label": None,
|
||||
"signature": (
|
||||
primary.get("model"),
|
||||
primary.get("provider"),
|
||||
primary.get("base_url"),
|
||||
primary.get("api_mode"),
|
||||
primary.get("command"),
|
||||
tuple(primary.get("args") or ()),
|
||||
),
|
||||
}
|
||||
|
||||
return {
|
||||
"model": route.get("model"),
|
||||
"runtime": {
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
},
|
||||
"label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
|
||||
"signature": (
|
||||
route.get("model"),
|
||||
runtime.get("provider"),
|
||||
runtime.get("base_url"),
|
||||
runtime.get("api_mode"),
|
||||
runtime.get("command"),
|
||||
tuple(runtime.get("args") or ()),
|
||||
),
|
||||
}
|
||||
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
|
||||
response = call_llm(
|
||||
task="title_generation",
|
||||
messages=messages,
|
||||
max_tokens=500,
|
||||
max_tokens=30,
|
||||
temperature=0.3,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
"""Transport layer types and registry for provider response normalization.
|
||||
|
||||
Usage:
|
||||
from agent.transports import get_transport
|
||||
transport = get_transport("anthropic_messages")
|
||||
result = transport.normalize_response(raw_response)
|
||||
"""
|
||||
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
|
||||
|
||||
_REGISTRY: dict = {}
|
||||
|
||||
|
||||
def register_transport(api_mode: str, transport_cls: type) -> None:
|
||||
"""Register a transport class for an api_mode string."""
|
||||
_REGISTRY[api_mode] = transport_cls
|
||||
|
||||
|
||||
def get_transport(api_mode: str):
|
||||
"""Get a transport instance for the given api_mode.
|
||||
|
||||
Returns None if no transport is registered for this api_mode.
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
|
||||
|
||||
def _discover_transports() -> None:
|
||||
"""Import all transport modules to trigger auto-registration."""
|
||||
try:
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
import agent.transports.codex # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
import agent.transports.chat_completions # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
import agent.transports.bedrock # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
@@ -1,177 +0,0 @@
|
||||
"""Anthropic Messages API transport.
|
||||
|
||||
Delegates to the existing adapter functions in agent/anthropic_adapter.py.
|
||||
This transport owns format conversion and normalization — NOT client lifecycle.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse
|
||||
|
||||
|
||||
class AnthropicTransport(ProviderTransport):
|
||||
"""Transport for api_mode='anthropic_messages'.
|
||||
|
||||
Wraps the existing functions in anthropic_adapter.py behind the
|
||||
ProviderTransport ABC. Each method delegates — no logic is duplicated.
|
||||
"""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "anthropic_messages"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI messages to Anthropic (system, messages) tuple.
|
||||
|
||||
kwargs:
|
||||
base_url: Optional[str] — affects thinking signature handling.
|
||||
"""
|
||||
from agent.anthropic_adapter import convert_messages_to_anthropic
|
||||
|
||||
base_url = kwargs.get("base_url")
|
||||
return convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI tool schemas to Anthropic input_schema format."""
|
||||
from agent.anthropic_adapter import convert_tools_to_anthropic
|
||||
|
||||
return convert_tools_to_anthropic(tools)
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build Anthropic messages.create() kwargs.
|
||||
|
||||
Calls convert_messages and convert_tools internally.
|
||||
|
||||
params (all optional):
|
||||
max_tokens: int
|
||||
reasoning_config: dict | None
|
||||
tool_choice: str | None
|
||||
is_oauth: bool
|
||||
preserve_dots: bool
|
||||
context_length: int | None
|
||||
base_url: str | None
|
||||
fast_mode: bool
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
return build_anthropic_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
max_tokens=params.get("max_tokens", 16384),
|
||||
reasoning_config=params.get("reasoning_config"),
|
||||
tool_choice=params.get("tool_choice"),
|
||||
is_oauth=params.get("is_oauth", False),
|
||||
preserve_dots=params.get("preserve_dots", False),
|
||||
context_length=params.get("context_length"),
|
||||
base_url=params.get("base_url"),
|
||||
fast_mode=params.get("fast_mode", False),
|
||||
)
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize Anthropic response to NormalizedResponse.
|
||||
|
||||
Parses content blocks (text, thinking, tool_use), maps stop_reason
|
||||
to OpenAI finish_reason, and collects reasoning_details in provider_data.
|
||||
"""
|
||||
import json
|
||||
from agent.anthropic_adapter import _to_plain_data
|
||||
from agent.transports.types import ToolCall
|
||||
|
||||
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
|
||||
_MCP_PREFIX = "mcp_"
|
||||
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
reasoning_details = []
|
||||
tool_calls = []
|
||||
|
||||
for block in response.content:
|
||||
if block.type == "text":
|
||||
text_parts.append(block.text)
|
||||
elif block.type == "thinking":
|
||||
reasoning_parts.append(block.thinking)
|
||||
block_dict = _to_plain_data(block)
|
||||
if isinstance(block_dict, dict):
|
||||
reasoning_details.append(block_dict)
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
|
||||
name = name[len(_MCP_PREFIX):]
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=block.id,
|
||||
name=name,
|
||||
arguments=json.dumps(block.input),
|
||||
)
|
||||
)
|
||||
|
||||
finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
|
||||
|
||||
provider_data = {}
|
||||
if reasoning_details:
|
||||
provider_data["reasoning_details"] = reasoning_details
|
||||
|
||||
return NormalizedResponse(
|
||||
content="\n".join(text_parts) if text_parts else None,
|
||||
tool_calls=tool_calls or None,
|
||||
finish_reason=finish_reason,
|
||||
reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
|
||||
usage=None,
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check Anthropic response structure is valid.
|
||||
|
||||
An empty content list is legitimate when ``stop_reason == "end_turn"``
|
||||
— the model's canonical way of signalling "nothing more to add" after
|
||||
a tool turn that already delivered the user-facing text. Treating it
|
||||
as invalid falsely retries a completed response.
|
||||
"""
|
||||
if response is None:
|
||||
return False
|
||||
content_blocks = getattr(response, "content", None)
|
||||
if not isinstance(content_blocks, list):
|
||||
return False
|
||||
if not content_blocks:
|
||||
return getattr(response, "stop_reason", None) == "end_turn"
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
"""Extract Anthropic cache_read and cache_creation token counts."""
|
||||
usage = getattr(response, "usage", None)
|
||||
if usage is None:
|
||||
return None
|
||||
cached = getattr(usage, "cache_read_input_tokens", 0) or 0
|
||||
written = getattr(usage, "cache_creation_input_tokens", 0) or 0
|
||||
if cached or written:
|
||||
return {"cached_tokens": cached, "creation_tokens": written}
|
||||
return None
|
||||
|
||||
# Promote the adapter's canonical mapping to module level so it's shared
|
||||
_STOP_REASON_MAP = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"refusal": "content_filter",
|
||||
"model_context_window_exceeded": "length",
|
||||
}
|
||||
|
||||
def map_finish_reason(self, raw_reason: str) -> str:
|
||||
"""Map Anthropic stop_reason to OpenAI finish_reason."""
|
||||
return self._STOP_REASON_MAP.get(raw_reason, "stop")
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("anthropic_messages", AnthropicTransport)
|
||||
@@ -1,89 +0,0 @@
|
||||
"""Abstract base for provider transports.
|
||||
|
||||
A transport owns the data path for one api_mode:
|
||||
convert_messages → convert_tools → build_kwargs → normalize_response
|
||||
|
||||
It does NOT own: client construction, streaming, credential refresh,
|
||||
prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.types import NormalizedResponse
|
||||
|
||||
|
||||
class ProviderTransport(ABC):
|
||||
"""Base class for provider-specific format conversion and normalization."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def api_mode(self) -> str:
|
||||
"""The api_mode string this transport handles (e.g. 'anthropic_messages')."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI-format messages to provider-native format.
|
||||
|
||||
Returns provider-specific structure (e.g. (system, messages) for Anthropic,
|
||||
or the messages list unchanged for chat_completions).
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI-format tool definitions to provider-native format.
|
||||
|
||||
Returns provider-specific tool list (e.g. Anthropic input_schema format).
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build the complete API call kwargs dict.
|
||||
|
||||
This is the primary entry point — it typically calls convert_messages()
|
||||
and convert_tools() internally, then adds model-specific config.
|
||||
|
||||
Returns a dict ready to be passed to the provider's SDK client.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize a raw provider response to the shared NormalizedResponse type.
|
||||
|
||||
This is the only method that returns a transport-layer type.
|
||||
"""
|
||||
...
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Optional: check if the raw response is structurally valid.
|
||||
|
||||
Returns True if valid, False if the response should be treated as invalid.
|
||||
Default implementation always returns True.
|
||||
"""
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
"""Optional: extract provider-specific cache hit/creation stats.
|
||||
|
||||
Returns dict with 'cached_tokens' and 'creation_tokens', or None.
|
||||
Default returns None.
|
||||
"""
|
||||
return None
|
||||
|
||||
def map_finish_reason(self, raw_reason: str) -> str:
|
||||
"""Optional: map provider-specific stop reason to OpenAI equivalent.
|
||||
|
||||
Default returns the raw reason unchanged. Override for providers
|
||||
with different stop reason vocabularies.
|
||||
"""
|
||||
return raw_reason
|
||||
@@ -1,154 +0,0 @@
|
||||
"""AWS Bedrock Converse API transport.
|
||||
|
||||
Delegates to the existing adapter functions in agent/bedrock_adapter.py.
|
||||
Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
|
||||
owns format conversion and normalization, while client construction and
|
||||
boto3 calls stay on AIAgent.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class BedrockTransport(ProviderTransport):
|
||||
"""Transport for api_mode='bedrock_converse'."""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "bedrock_converse"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI messages to Bedrock Converse format."""
|
||||
from agent.bedrock_adapter import convert_messages_to_converse
|
||||
return convert_messages_to_converse(messages)
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
|
||||
from agent.bedrock_adapter import convert_tools_to_converse
|
||||
return convert_tools_to_converse(tools)
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build Bedrock converse() kwargs.
|
||||
|
||||
Calls convert_messages and convert_tools internally.
|
||||
|
||||
params:
|
||||
max_tokens: int — output token limit (default 4096)
|
||||
temperature: float | None
|
||||
guardrail_config: dict | None — Bedrock guardrails
|
||||
region: str — AWS region (default 'us-east-1')
|
||||
"""
|
||||
from agent.bedrock_adapter import build_converse_kwargs
|
||||
|
||||
region = params.get("region", "us-east-1")
|
||||
guardrail = params.get("guardrail_config")
|
||||
|
||||
kwargs = build_converse_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
max_tokens=params.get("max_tokens", 4096),
|
||||
temperature=params.get("temperature"),
|
||||
guardrail_config=guardrail,
|
||||
)
|
||||
# Sentinel keys for dispatch — agent pops these before the boto3 call
|
||||
kwargs["__bedrock_converse__"] = True
|
||||
kwargs["__bedrock_region__"] = region
|
||||
return kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize Bedrock response to NormalizedResponse.
|
||||
|
||||
Handles two shapes:
|
||||
1. Raw boto3 dict (from direct converse() calls)
|
||||
2. Already-normalized SimpleNamespace with .choices (from dispatch site)
|
||||
"""
|
||||
from agent.bedrock_adapter import normalize_converse_response
|
||||
|
||||
# Normalize to OpenAI-compatible SimpleNamespace
|
||||
if hasattr(response, "choices") and response.choices:
|
||||
# Already normalized at dispatch site
|
||||
ns = response
|
||||
else:
|
||||
# Raw boto3 dict
|
||||
ns = normalize_converse_response(response)
|
||||
|
||||
choice = ns.choices[0]
|
||||
msg = choice.message
|
||||
finish_reason = choice.finish_reason or "stop"
|
||||
|
||||
tool_calls = None
|
||||
if msg.tool_calls:
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
)
|
||||
for tc in msg.tool_calls
|
||||
]
|
||||
|
||||
usage = None
|
||||
if hasattr(ns, "usage") and ns.usage:
|
||||
u = ns.usage
|
||||
usage = Usage(
|
||||
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
|
||||
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
|
||||
total_tokens=getattr(u, "total_tokens", 0) or 0,
|
||||
)
|
||||
|
||||
reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
|
||||
|
||||
return NormalizedResponse(
|
||||
content=msg.content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
reasoning=reasoning,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check Bedrock response structure.
|
||||
|
||||
After normalize_converse_response, the response has OpenAI-compatible
|
||||
.choices — same check as chat_completions.
|
||||
"""
|
||||
if response is None:
|
||||
return False
|
||||
# Raw Bedrock dict response — check for 'output' key
|
||||
if isinstance(response, dict):
|
||||
return "output" in response
|
||||
# Already-normalized SimpleNamespace
|
||||
if hasattr(response, "choices"):
|
||||
return bool(response.choices)
|
||||
return False
|
||||
|
||||
def map_finish_reason(self, raw_reason: str) -> str:
|
||||
"""Map Bedrock stop reason to OpenAI finish_reason.
|
||||
|
||||
The adapter already does this mapping inside normalize_converse_response,
|
||||
so this is only used for direct access to raw responses.
|
||||
"""
|
||||
_MAP = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"guardrail_intervened": "content_filter",
|
||||
"content_filtered": "content_filter",
|
||||
}
|
||||
return _MAP.get(raw_reason, "stop")
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("bedrock_converse", BedrockTransport)
|
||||
@@ -1,387 +0,0 @@
|
||||
"""OpenAI Chat Completions transport.
|
||||
|
||||
Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
|
||||
providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
|
||||
|
||||
Messages and tools are already in OpenAI format — convert_messages and
|
||||
convert_tools are near-identity. The complexity lives in build_kwargs
|
||||
which has provider-specific conditionals for max_tokens defaults,
|
||||
reasoning configuration, temperature handling, and extra_body assembly.
|
||||
"""
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class ChatCompletionsTransport(ProviderTransport):
|
||||
"""Transport for api_mode='chat_completions'.
|
||||
|
||||
The default path for OpenAI-compatible providers.
|
||||
"""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "chat_completions"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` on the
|
||||
message, ``call_id``/``response_item_id`` on tool_calls) that strict
|
||||
chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
|
||||
needs_sanitize = True
|
||||
break
|
||||
if needs_sanitize:
|
||||
break
|
||||
|
||||
if not needs_sanitize:
|
||||
return messages
|
||||
|
||||
sanitized = copy.deepcopy(messages)
|
||||
for msg in sanitized:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
if isinstance(tc, dict):
|
||||
tc.pop("call_id", None)
|
||||
tc.pop("response_item_id", None)
|
||||
return sanitized
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Tools are already in OpenAI format — identity."""
|
||||
return tools
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build chat.completions.create() kwargs.
|
||||
|
||||
This is the most complex transport method — it handles ~16 providers
|
||||
via params rather than subclasses.
|
||||
|
||||
params:
|
||||
timeout: float — API call timeout
|
||||
max_tokens: int | None — user-configured max tokens
|
||||
ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
|
||||
max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
|
||||
reasoning_config: dict | None
|
||||
request_overrides: dict | None
|
||||
session_id: str | None
|
||||
qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
|
||||
model_lower: str — lowercase model name for pattern matching
|
||||
# Provider detection flags (all optional, default False)
|
||||
is_openrouter: bool
|
||||
is_nous: bool
|
||||
is_qwen_portal: bool
|
||||
is_github_models: bool
|
||||
is_nvidia_nim: bool
|
||||
is_kimi: bool
|
||||
is_custom_provider: bool
|
||||
ollama_num_ctx: int | None
|
||||
# Provider routing
|
||||
provider_preferences: dict | None
|
||||
# Qwen-specific
|
||||
qwen_prepare_fn: callable | None — runs AFTER codex sanitization
|
||||
qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
|
||||
# Temperature
|
||||
fixed_temperature: Any — from _fixed_temperature_for_model()
|
||||
omit_temperature: bool
|
||||
# Reasoning
|
||||
supports_reasoning: bool
|
||||
github_reasoning_extra: dict | None
|
||||
# Claude on OpenRouter/Nous max output
|
||||
anthropic_max_output: int | None
|
||||
# Extra
|
||||
extra_body_additions: dict | None — pre-built extra_body entries
|
||||
"""
|
||||
# Codex sanitization: drop reasoning_items / call_id / response_item_id
|
||||
sanitized = self.convert_messages(messages)
|
||||
|
||||
# Qwen portal prep AFTER codex sanitization. If sanitize already
|
||||
# deepcopied, reuse that copy via the in-place variant to avoid a
|
||||
# second deepcopy.
|
||||
is_qwen = params.get("is_qwen_portal", False)
|
||||
if is_qwen:
|
||||
qwen_prep = params.get("qwen_prepare_fn")
|
||||
qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
|
||||
if sanitized is messages:
|
||||
if qwen_prep is not None:
|
||||
sanitized = qwen_prep(sanitized)
|
||||
else:
|
||||
# Already deepcopied — transform in place
|
||||
if qwen_prep_inplace is not None:
|
||||
qwen_prep_inplace(sanitized)
|
||||
elif qwen_prep is not None:
|
||||
sanitized = qwen_prep(sanitized)
|
||||
|
||||
# Developer role swap for GPT-5/Codex models
|
||||
model_lower = params.get("model_lower", (model or "").lower())
|
||||
if (
|
||||
sanitized
|
||||
and isinstance(sanitized[0], dict)
|
||||
and sanitized[0].get("role") == "system"
|
||||
and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
|
||||
):
|
||||
sanitized = list(sanitized)
|
||||
sanitized[0] = {**sanitized[0], "role": "developer"}
|
||||
|
||||
api_kwargs: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": sanitized,
|
||||
}
|
||||
|
||||
timeout = params.get("timeout")
|
||||
if timeout is not None:
|
||||
api_kwargs["timeout"] = timeout
|
||||
|
||||
# Temperature
|
||||
fixed_temp = params.get("fixed_temperature")
|
||||
omit_temp = params.get("omit_temperature", False)
|
||||
if omit_temp:
|
||||
api_kwargs.pop("temperature", None)
|
||||
elif fixed_temp is not None:
|
||||
api_kwargs["temperature"] = fixed_temp
|
||||
|
||||
# Qwen metadata (caller precomputes {sessionId, promptId})
|
||||
qwen_meta = params.get("qwen_session_metadata")
|
||||
if qwen_meta and is_qwen:
|
||||
api_kwargs["metadata"] = qwen_meta
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > provider default
|
||||
max_tokens_fn = params.get("max_tokens_param_fn")
|
||||
ephemeral = params.get("ephemeral_max_output_tokens")
|
||||
max_tokens = params.get("max_tokens")
|
||||
anthropic_max_out = params.get("anthropic_max_output")
|
||||
is_nvidia_nim = params.get("is_nvidia_nim", False)
|
||||
is_kimi = params.get("is_kimi", False)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(ephemeral))
|
||||
elif max_tokens is not None and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(max_tokens))
|
||||
elif is_nvidia_nim and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(16384))
|
||||
elif is_qwen and max_tokens_fn:
|
||||
api_kwargs.update(max_tokens_fn(65536))
|
||||
elif is_kimi and max_tokens_fn:
|
||||
# Kimi/Moonshot: 32000 matches Kimi CLI's default
|
||||
api_kwargs.update(max_tokens_fn(32000))
|
||||
elif anthropic_max_out is not None:
|
||||
api_kwargs["max_tokens"] = anthropic_max_out
|
||||
|
||||
# Kimi: top-level reasoning_effort (unless thinking disabled)
|
||||
if is_kimi:
|
||||
_kimi_thinking_off = bool(
|
||||
reasoning_config
|
||||
and isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _kimi_thinking_off:
|
||||
_kimi_effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in ("low", "medium", "high"):
|
||||
_kimi_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _kimi_effort
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: Dict[str, Any] = {}
|
||||
|
||||
is_openrouter = params.get("is_openrouter", False)
|
||||
is_nous = params.get("is_nous", False)
|
||||
is_github_models = params.get("is_github_models", False)
|
||||
|
||||
provider_prefs = params.get("provider_preferences")
|
||||
if provider_prefs and is_openrouter:
|
||||
extra_body["provider"] = provider_prefs
|
||||
|
||||
# Kimi extra_body.thinking
|
||||
if is_kimi:
|
||||
_kimi_thinking_enabled = True
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
_kimi_thinking_enabled = False
|
||||
extra_body["thinking"] = {
|
||||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
# Reasoning
|
||||
if params.get("supports_reasoning", False):
|
||||
if is_github_models:
|
||||
gh_reasoning = params.get("github_reasoning_extra")
|
||||
if gh_reasoning is not None:
|
||||
extra_body["reasoning"] = gh_reasoning
|
||||
else:
|
||||
if reasoning_config is not None:
|
||||
rc = dict(reasoning_config)
|
||||
if is_nous and rc.get("enabled") is False:
|
||||
pass # omit for Nous when disabled
|
||||
else:
|
||||
extra_body["reasoning"] = rc
|
||||
else:
|
||||
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
|
||||
|
||||
if is_nous:
|
||||
extra_body["tags"] = ["product=hermes-agent"]
|
||||
|
||||
# Ollama num_ctx
|
||||
ollama_ctx = params.get("ollama_num_ctx")
|
||||
if ollama_ctx:
|
||||
options = extra_body.get("options", {})
|
||||
options["num_ctx"] = ollama_ctx
|
||||
extra_body["options"] = options
|
||||
|
||||
# Ollama/custom think=false
|
||||
if params.get("is_custom_provider", False):
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||
_enabled = reasoning_config.get("enabled", True)
|
||||
if _effort == "none" or _enabled is False:
|
||||
extra_body["think"] = False
|
||||
|
||||
if is_qwen:
|
||||
extra_body["vl_high_resolution_images"] = True
|
||||
|
||||
# Merge any pre-built extra_body additions
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
extra_body.update(additions)
|
||||
|
||||
if extra_body:
|
||||
api_kwargs["extra_body"] = extra_body
|
||||
|
||||
# Request overrides last (service_tier etc.)
|
||||
overrides = params.get("request_overrides")
|
||||
if overrides:
|
||||
api_kwargs.update(overrides)
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
|
||||
|
||||
For chat_completions, this is near-identity — the response is already
|
||||
in OpenAI format. extra_content on tool_calls (Gemini thought_signature)
|
||||
is preserved via ToolCall.provider_data. reasoning_details (OpenRouter
|
||||
unified format) and reasoning_content (DeepSeek/Moonshot) are also
|
||||
preserved for downstream replay.
|
||||
"""
|
||||
choice = response.choices[0]
|
||||
msg = choice.message
|
||||
finish_reason = choice.finish_reason or "stop"
|
||||
|
||||
tool_calls = None
|
||||
if msg.tool_calls:
|
||||
tool_calls = []
|
||||
for tc in msg.tool_calls:
|
||||
# Preserve provider-specific extras on the tool call.
|
||||
# Gemini 3 thinking models attach extra_content with
|
||||
# thought_signature — without replay on the next turn the API
|
||||
# rejects the request with 400.
|
||||
tc_provider_data: Dict[str, Any] = {}
|
||||
extra = getattr(tc, "extra_content", None)
|
||||
if extra is None and hasattr(tc, "model_extra"):
|
||||
extra = (tc.model_extra or {}).get("extra_content")
|
||||
if extra is not None:
|
||||
if hasattr(extra, "model_dump"):
|
||||
try:
|
||||
extra = extra.model_dump()
|
||||
except Exception:
|
||||
pass
|
||||
tc_provider_data["extra_content"] = extra
|
||||
tool_calls.append(ToolCall(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
provider_data=tc_provider_data or None,
|
||||
))
|
||||
|
||||
usage = None
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
u = response.usage
|
||||
usage = Usage(
|
||||
prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
|
||||
completion_tokens=getattr(u, "completion_tokens", 0) or 0,
|
||||
total_tokens=getattr(u, "total_tokens", 0) or 0,
|
||||
)
|
||||
|
||||
# Preserve reasoning fields separately. DeepSeek/Moonshot use
|
||||
# ``reasoning_content``; others use ``reasoning``. Downstream code
|
||||
# (_extract_reasoning, thinking-prefill retry) reads both distinctly,
|
||||
# so keep them apart in provider_data rather than merging.
|
||||
reasoning = getattr(msg, "reasoning", None)
|
||||
reasoning_content = getattr(msg, "reasoning_content", None)
|
||||
|
||||
provider_data: Dict[str, Any] = {}
|
||||
if reasoning_content:
|
||||
provider_data["reasoning_content"] = reasoning_content
|
||||
rd = getattr(msg, "reasoning_details", None)
|
||||
if rd:
|
||||
provider_data["reasoning_details"] = rd
|
||||
|
||||
return NormalizedResponse(
|
||||
content=msg.content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
reasoning=reasoning,
|
||||
usage=usage,
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check that response has valid choices."""
|
||||
if response is None:
|
||||
return False
|
||||
if not hasattr(response, "choices") or response.choices is None:
|
||||
return False
|
||||
if not response.choices:
|
||||
return False
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
|
||||
usage = getattr(response, "usage", None)
|
||||
if usage is None:
|
||||
return None
|
||||
details = getattr(usage, "prompt_tokens_details", None)
|
||||
if details is None:
|
||||
return None
|
||||
cached = getattr(details, "cached_tokens", 0) or 0
|
||||
written = getattr(details, "cache_write_tokens", 0) or 0
|
||||
if cached or written:
|
||||
return {"cached_tokens": cached, "creation_tokens": written}
|
||||
return None
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("chat_completions", ChatCompletionsTransport)
|
||||
@@ -1,217 +0,0 @@
|
||||
"""OpenAI Responses API (Codex) transport.
|
||||
|
||||
Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
|
||||
This transport owns format conversion and normalization — NOT client lifecycle,
|
||||
streaming, or the _run_codex_stream() call path.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class ResponsesApiTransport(ProviderTransport):
|
||||
"""Transport for api_mode='codex_responses'.
|
||||
|
||||
Wraps the functions extracted into codex_responses_adapter.py (PR 1).
|
||||
"""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "codex_responses"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI chat messages to Responses API input items."""
|
||||
from agent.codex_responses_adapter import _chat_messages_to_responses_input
|
||||
return _chat_messages_to_responses_input(messages)
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI tool schemas to Responses API function definitions."""
|
||||
from agent.codex_responses_adapter import _responses_tools
|
||||
return _responses_tools(tools)
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build Responses API kwargs.
|
||||
|
||||
Calls convert_messages and convert_tools internally.
|
||||
|
||||
params:
|
||||
instructions: str — system prompt (extracted from messages[0] if not given)
|
||||
reasoning_config: dict | None — {effort, enabled}
|
||||
session_id: str | None — used for prompt_cache_key + xAI conv header
|
||||
max_tokens: int | None — max_output_tokens
|
||||
request_overrides: dict | None — extra kwargs merged in
|
||||
provider: str | None — provider name for backend-specific logic
|
||||
base_url: str | None — endpoint URL
|
||||
base_url_hostname: str | None — hostname for backend detection
|
||||
is_github_responses: bool — Copilot/GitHub models backend
|
||||
is_codex_backend: bool — chatgpt.com/backend-api/codex
|
||||
is_xai_responses: bool — xAI/Grok backend
|
||||
github_reasoning_extra: dict | None — Copilot reasoning params
|
||||
"""
|
||||
from agent.codex_responses_adapter import (
|
||||
_chat_messages_to_responses_input,
|
||||
_responses_tools,
|
||||
)
|
||||
|
||||
from run_agent import DEFAULT_AGENT_IDENTITY
|
||||
|
||||
instructions = params.get("instructions", "")
|
||||
payload_messages = messages
|
||||
if not instructions:
|
||||
if messages and messages[0].get("role") == "system":
|
||||
instructions = str(messages[0].get("content") or "").strip()
|
||||
payload_messages = messages[1:]
|
||||
if not instructions:
|
||||
instructions = DEFAULT_AGENT_IDENTITY
|
||||
|
||||
is_github_responses = params.get("is_github_responses", False)
|
||||
is_codex_backend = params.get("is_codex_backend", False)
|
||||
is_xai_responses = params.get("is_xai_responses", False)
|
||||
|
||||
# Resolve reasoning effort
|
||||
reasoning_effort = "medium"
|
||||
reasoning_enabled = True
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
reasoning_enabled = False
|
||||
elif reasoning_config.get("effort"):
|
||||
reasoning_effort = reasoning_config["effort"]
|
||||
|
||||
_effort_clamp = {"minimal": "low"}
|
||||
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
|
||||
|
||||
kwargs = {
|
||||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": _chat_messages_to_responses_input(payload_messages),
|
||||
"tools": _responses_tools(tools),
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": True,
|
||||
"store": False,
|
||||
}
|
||||
|
||||
session_id = params.get("session_id")
|
||||
if not is_github_responses and session_id:
|
||||
kwargs["prompt_cache_key"] = session_id
|
||||
|
||||
if reasoning_enabled and is_xai_responses:
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
elif reasoning_enabled:
|
||||
if is_github_responses:
|
||||
github_reasoning = params.get("github_reasoning_extra")
|
||||
if github_reasoning is not None:
|
||||
kwargs["reasoning"] = github_reasoning
|
||||
else:
|
||||
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
elif not is_github_responses and not is_xai_responses:
|
||||
kwargs["include"] = []
|
||||
|
||||
request_overrides = params.get("request_overrides")
|
||||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
|
||||
if is_xai_responses and session_id:
|
||||
kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
|
||||
|
||||
return kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize Codex Responses API response to NormalizedResponse."""
|
||||
from agent.codex_responses_adapter import (
|
||||
_normalize_codex_response,
|
||||
_extract_responses_message_text,
|
||||
_extract_responses_reasoning_text,
|
||||
)
|
||||
|
||||
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
|
||||
msg, finish_reason = _normalize_codex_response(response)
|
||||
|
||||
tool_calls = None
|
||||
if msg and msg.tool_calls:
|
||||
tool_calls = []
|
||||
for tc in msg.tool_calls:
|
||||
provider_data = {}
|
||||
if hasattr(tc, "call_id") and tc.call_id:
|
||||
provider_data["call_id"] = tc.call_id
|
||||
if hasattr(tc, "response_item_id") and tc.response_item_id:
|
||||
provider_data["response_item_id"] = tc.response_item_id
|
||||
tool_calls.append(ToolCall(
|
||||
id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
|
||||
name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
|
||||
arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
|
||||
provider_data=provider_data or None,
|
||||
))
|
||||
|
||||
# Extract reasoning items for provider_data
|
||||
provider_data = {}
|
||||
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
||||
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
||||
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
||||
provider_data["reasoning_details"] = msg.reasoning_details
|
||||
|
||||
return NormalizedResponse(
|
||||
content=msg.content if msg else None,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason or "stop",
|
||||
reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
|
||||
usage=None, # Codex usage is extracted separately in normalize_usage()
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check Codex Responses API response has valid output structure.
|
||||
|
||||
Returns True only if response.output is a non-empty list.
|
||||
Does NOT check output_text fallback — the caller handles that
|
||||
with diagnostic logging for stream backfill recovery.
|
||||
"""
|
||||
if response is None:
|
||||
return False
|
||||
output = getattr(response, "output", None)
|
||||
if not isinstance(output, list) or not output:
|
||||
return False
|
||||
return True
|
||||
|
||||
def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
|
||||
"""Validate and sanitize Codex API kwargs before the call.
|
||||
|
||||
Normalizes input items, strips unsupported fields, validates structure.
|
||||
"""
|
||||
from agent.codex_responses_adapter import _preflight_codex_api_kwargs
|
||||
return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
|
||||
|
||||
def map_finish_reason(self, raw_reason: str) -> str:
|
||||
"""Map Codex response.status to OpenAI finish_reason.
|
||||
|
||||
Codex uses response.status ('completed', 'incomplete') +
|
||||
response.incomplete_details.reason for granular mapping.
|
||||
This method handles the simple status string; the caller
|
||||
should check incomplete_details separately for 'max_output_tokens'.
|
||||
"""
|
||||
_MAP = {
|
||||
"completed": "stop",
|
||||
"incomplete": "length",
|
||||
"failed": "stop",
|
||||
"cancelled": "stop",
|
||||
}
|
||||
return _MAP.get(raw_reason, "stop")
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("codex_responses", ResponsesApiTransport)
|
||||
@@ -1,142 +0,0 @@
|
||||
"""Shared types for normalized provider responses.
|
||||
|
||||
These dataclasses define the canonical shape that all provider adapters
|
||||
normalize responses to. The shared surface is intentionally minimal —
|
||||
only fields that every downstream consumer reads are top-level.
|
||||
Protocol-specific state goes in ``provider_data`` dicts (response-level
|
||||
and per-tool-call) so that protocol-aware code paths can access it
|
||||
without polluting the shared type.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolCall:
|
||||
"""A normalized tool call from any provider.
|
||||
|
||||
``id`` is the protocol's canonical identifier — what gets used in
|
||||
``tool_call_id`` / ``tool_use_id`` when constructing tool result
|
||||
messages. May be ``None`` when the provider omits it; the agent
|
||||
fills it via ``_deterministic_call_id()`` before storing in history.
|
||||
|
||||
``provider_data`` carries per-tool-call protocol metadata that only
|
||||
protocol-aware code reads:
|
||||
|
||||
* Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
|
||||
* Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
id: Optional[str]
|
||||
name: str
|
||||
arguments: str # JSON string
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The agent loop reads tc.function.name / tc.function.arguments
|
||||
# throughout run_agent.py (45+ sites). These properties let
|
||||
# NormalizedResponse pass through without the _nr_to_assistant_message
|
||||
# shim, while keeping ToolCall's canonical fields flat.
|
||||
@property
|
||||
def type(self) -> str:
|
||||
return "function"
|
||||
|
||||
@property
|
||||
def function(self) -> "ToolCall":
|
||||
"""Return self so tc.function.name / tc.function.arguments work."""
|
||||
return self
|
||||
|
||||
@property
|
||||
def call_id(self) -> Optional[str]:
|
||||
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
|
||||
return (self.provider_data or {}).get("call_id")
|
||||
|
||||
@property
|
||||
def response_item_id(self) -> Optional[str]:
|
||||
"""Codex response_item_id from provider_data."""
|
||||
return (self.provider_data or {}).get("response_item_id")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Usage:
|
||||
"""Token usage from an API response."""
|
||||
|
||||
prompt_tokens: int = 0
|
||||
completion_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
cached_tokens: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class NormalizedResponse:
|
||||
"""Normalized API response from any provider.
|
||||
|
||||
Shared fields are truly cross-provider — every caller can rely on
|
||||
them without branching on api_mode. Protocol-specific state goes in
|
||||
``provider_data`` so that only protocol-aware code paths read it.
|
||||
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
content: Optional[str]
|
||||
tool_calls: Optional[List[ToolCall]]
|
||||
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
|
||||
reasoning: Optional[str] = None
|
||||
usage: Optional[Usage] = None
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
# ── Backward compatibility ──────────────────────────────────
|
||||
# The shim _nr_to_assistant_message() mapped these from provider_data.
|
||||
# These properties let NormalizedResponse pass through directly.
|
||||
@property
|
||||
def reasoning_content(self) -> Optional[str]:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_content")
|
||||
|
||||
@property
|
||||
def reasoning_details(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("reasoning_details")
|
||||
|
||||
@property
|
||||
def codex_reasoning_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_tool_call(
|
||||
id: Optional[str],
|
||||
name: str,
|
||||
arguments: Any,
|
||||
**provider_fields: Any,
|
||||
) -> ToolCall:
|
||||
"""Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
|
||||
|
||||
Any extra keyword arguments are collected into ``provider_data``.
|
||||
"""
|
||||
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
|
||||
pd = dict(provider_fields) if provider_fields else None
|
||||
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
|
||||
|
||||
|
||||
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
|
||||
"""Translate a provider-specific stop reason to the normalised set.
|
||||
|
||||
Falls back to ``"stop"`` for unknown or ``None`` reasons.
|
||||
"""
|
||||
if reason is None:
|
||||
return "stop"
|
||||
return mapping.get(reason, "stop")
|
||||
+1
-14
@@ -6,7 +6,6 @@ from decimal import Decimal
|
||||
from typing import Any, Dict, Literal, Optional
|
||||
|
||||
from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
|
||||
from utils import base_url_host_matches
|
||||
|
||||
DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
|
||||
|
||||
@@ -394,7 +393,7 @@ def resolve_billing_route(
|
||||
|
||||
if provider_name == "openai-codex":
|
||||
return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
|
||||
if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
|
||||
if provider_name == "openrouter" or "openrouter.ai" in base:
|
||||
return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
|
||||
if provider_name == "anthropic":
|
||||
return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
@@ -533,22 +532,10 @@ def normalize_usage(
|
||||
prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
|
||||
output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
|
||||
details = getattr(response_usage, "prompt_tokens_details", None)
|
||||
# Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
|
||||
# top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
|
||||
# AI Gateway, Cline) expose when routing Claude models — without this
|
||||
# fallback, cache writes are undercounted as 0 and cache reads can be
|
||||
# missed when the proxy only surfaces them at the top level.
|
||||
# Port of cline/cline#10266.
|
||||
cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
|
||||
if not cache_read_tokens:
|
||||
cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
|
||||
cache_write_tokens = _to_int(
|
||||
getattr(details, "cache_write_tokens", 0) if details else 0
|
||||
)
|
||||
if not cache_write_tokens:
|
||||
cache_write_tokens = _to_int(
|
||||
getattr(response_usage, "cache_creation_input_tokens", 0)
|
||||
)
|
||||
input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)
|
||||
|
||||
reasoning_tokens = 0
|
||||
|
||||
+4
-5
@@ -444,7 +444,6 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
|
||||
if not reasoning.get("has_any_reasoning", True):
|
||||
print(f" 🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
|
||||
discarded_no_reasoning += 1
|
||||
completed_in_batch.append(prompt_index)
|
||||
continue
|
||||
|
||||
# Get and normalize tool stats for consistent schema across all entries
|
||||
@@ -1190,12 +1189,12 @@ def main(
|
||||
"""
|
||||
# Handle list distributions
|
||||
if list_distributions:
|
||||
from toolset_distributions import print_distribution_info
|
||||
|
||||
from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
|
||||
|
||||
print("📊 Available Toolset Distributions")
|
||||
print("=" * 70)
|
||||
|
||||
all_dists = list_distributions()
|
||||
|
||||
all_dists = get_all_dists()
|
||||
for dist_name in sorted(all_dists.keys()):
|
||||
print_distribution_info(dist_name)
|
||||
|
||||
|
||||
+17
-85
@@ -63,38 +63,7 @@ model:
|
||||
# Leave unset to use the model's native output ceiling (recommended).
|
||||
# Set only if you want to deliberately limit individual response length.
|
||||
#
|
||||
# max_tokens: 8192
|
||||
|
||||
# Named provider overrides (optional)
|
||||
# Use this for per-provider request timeouts, non-stream stale timeouts,
|
||||
# and per-model exceptions.
|
||||
# Applies to the primary turn client on every api_mode (OpenAI-wire, native
|
||||
# Anthropic, and Anthropic-compatible providers), the fallback chain, and
|
||||
# client rebuilds during credential rotation. For OpenAI-wire chat
|
||||
# completions (streaming and non-streaming) the configured value is also
|
||||
# used as the per-request ``timeout=`` kwarg so it wins over the legacy
|
||||
# HERMES_API_TIMEOUT env var (which still applies when no config is set).
|
||||
# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and
|
||||
# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these
|
||||
# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
|
||||
# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s).
|
||||
#
|
||||
# Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock
|
||||
# SDK paths) — those use boto3 with its own timeout configuration.
|
||||
#
|
||||
# providers:
|
||||
# ollama-local:
|
||||
# request_timeout_seconds: 300 # Longer timeout for local cold-starts
|
||||
# stale_timeout_seconds: 900 # Explicitly re-enable stale detection on local endpoints
|
||||
# anthropic:
|
||||
# request_timeout_seconds: 30 # Fast-fail cloud requests
|
||||
# models:
|
||||
# claude-opus-4.6:
|
||||
# timeout_seconds: 600 # Longer timeout for extended-thinking Opus calls
|
||||
# openai-codex:
|
||||
# models:
|
||||
# gpt-5.4:
|
||||
# stale_timeout_seconds: 1800 # Longer non-stream stale timeout for slow large-context turns
|
||||
# max_tokens: 8192
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Provider Routing (only applies when using OpenRouter)
|
||||
@@ -122,6 +91,20 @@ model:
|
||||
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
|
||||
# # data_collection: "deny"
|
||||
|
||||
# =============================================================================
|
||||
# Smart Model Routing (optional)
|
||||
# =============================================================================
|
||||
# Use a cheaper model for short/simple turns while keeping your main model for
|
||||
# more complex requests. Disabled by default.
|
||||
#
|
||||
# smart_model_routing:
|
||||
# enabled: true
|
||||
# max_simple_chars: 160
|
||||
# max_simple_words: 28
|
||||
# cheap_model:
|
||||
# provider: openrouter
|
||||
# model: google/gemini-2.5-flash
|
||||
|
||||
# =============================================================================
|
||||
# Git Worktree Isolation
|
||||
# =============================================================================
|
||||
@@ -374,18 +357,6 @@ compression:
|
||||
# web_extract:
|
||||
# provider: "auto"
|
||||
# model: ""
|
||||
#
|
||||
# # Session search — summarizes matching past sessions
|
||||
# session_search:
|
||||
# provider: "auto"
|
||||
# model: ""
|
||||
# timeout: 30
|
||||
# max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s
|
||||
# extra_body: {} # Provider-specific OpenAI-compatible request fields
|
||||
# # Example for providers that support request-body
|
||||
# # reasoning controls:
|
||||
# # extra_body:
|
||||
# # enable_thinking: false
|
||||
|
||||
# =============================================================================
|
||||
# Persistent Memory
|
||||
@@ -770,13 +741,10 @@ code_execution:
|
||||
# Subagent Delegation
|
||||
# =============================================================================
|
||||
# The delegate_task tool spawns child agents with isolated context.
|
||||
# Supports single tasks and batch mode (default 3 parallel, configurable).
|
||||
# Supports single tasks and batch mode (up to 3 parallel).
|
||||
delegation:
|
||||
max_iterations: 50 # Max tool-calling turns per child (default: 50)
|
||||
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
|
||||
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
|
||||
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
|
||||
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
|
||||
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
|
||||
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
|
||||
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
|
||||
# # Resolves full credentials (base_url, api_key) automatically.
|
||||
@@ -920,39 +888,3 @@ display:
|
||||
# # Names and usernames are NOT affected (user-chosen, publicly visible).
|
||||
# # Routing/delivery still uses the original values internally.
|
||||
# redact_pii: false
|
||||
|
||||
# =============================================================================
|
||||
# Shell-script hooks
|
||||
# =============================================================================
|
||||
# Register shell scripts as plugin-hook callbacks. Each entry is executed as
|
||||
# a subprocess (shell=False, shlex.split) with a JSON payload on stdin. On
|
||||
# stdout the script may return JSON that either blocks the tool call or
|
||||
# injects context into the next LLM call.
|
||||
#
|
||||
# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
|
||||
# pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
|
||||
# pre_api_request, post_api_request, on_session_start, on_session_end,
|
||||
# on_session_finalize, on_session_reset, subagent_stop
|
||||
#
|
||||
# First-use consent: each (event, command) pair prompts once on a TTY, then
|
||||
# is persisted to ~/.hermes/shell-hooks-allowlist.json. Non-interactive
|
||||
# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
|
||||
# hooks_auto_accept key below.
|
||||
#
|
||||
# See website/docs/user-guide/features/hooks.md for the full JSON wire
|
||||
# protocol and worked examples.
|
||||
#
|
||||
# hooks:
|
||||
# pre_tool_call:
|
||||
# - matcher: "terminal"
|
||||
# command: "~/.hermes/agent-hooks/block-rm-rf.sh"
|
||||
# timeout: 10
|
||||
# post_tool_call:
|
||||
# - matcher: "write_file|patch"
|
||||
# command: "~/.hermes/agent-hooks/auto-format.sh"
|
||||
# pre_llm_call:
|
||||
# - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
|
||||
# subagent_stop:
|
||||
# - command: "~/.hermes/agent-hooks/log-orchestration.sh"
|
||||
#
|
||||
# hooks_auto_accept: false
|
||||
|
||||
+46
-54
@@ -9,7 +9,6 @@ import copy
|
||||
import json
|
||||
import logging
|
||||
import tempfile
|
||||
import threading
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
@@ -35,11 +34,6 @@ except ImportError:
|
||||
HERMES_DIR = get_hermes_home().resolve()
|
||||
CRON_DIR = HERMES_DIR / "cron"
|
||||
JOBS_FILE = CRON_DIR / "jobs.json"
|
||||
|
||||
# In-process lock protecting load_jobs→modify→save_jobs cycles.
|
||||
# Required when tick() runs jobs in parallel threads — without this,
|
||||
# concurrent mark_job_run / advance_next_run calls can clobber each other.
|
||||
_jobs_file_lock = threading.Lock()
|
||||
OUTPUT_DIR = CRON_DIR / "output"
|
||||
ONESHOT_GRACE_SECONDS = 120
|
||||
|
||||
@@ -600,44 +594,43 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
|
||||
``delivery_error`` is tracked separately from the agent error — a job
|
||||
can succeed (agent produced output) but fail delivery (platform down).
|
||||
"""
|
||||
with _jobs_file_lock:
|
||||
jobs = load_jobs()
|
||||
for i, job in enumerate(jobs):
|
||||
if job["id"] == job_id:
|
||||
now = _hermes_now().isoformat()
|
||||
job["last_run_at"] = now
|
||||
job["last_status"] = "ok" if success else "error"
|
||||
job["last_error"] = error if not success else None
|
||||
# Track delivery failures separately — cleared on successful delivery
|
||||
job["last_delivery_error"] = delivery_error
|
||||
jobs = load_jobs()
|
||||
for i, job in enumerate(jobs):
|
||||
if job["id"] == job_id:
|
||||
now = _hermes_now().isoformat()
|
||||
job["last_run_at"] = now
|
||||
job["last_status"] = "ok" if success else "error"
|
||||
job["last_error"] = error if not success else None
|
||||
# Track delivery failures separately — cleared on successful delivery
|
||||
job["last_delivery_error"] = delivery_error
|
||||
|
||||
# Increment completed count
|
||||
if job.get("repeat"):
|
||||
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
|
||||
|
||||
# Increment completed count
|
||||
if job.get("repeat"):
|
||||
job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
|
||||
|
||||
# Check if we've hit the repeat limit
|
||||
times = job["repeat"].get("times")
|
||||
completed = job["repeat"]["completed"]
|
||||
if times is not None and times > 0 and completed >= times:
|
||||
# Remove the job (limit reached)
|
||||
jobs.pop(i)
|
||||
save_jobs(jobs)
|
||||
return
|
||||
|
||||
# Compute next run
|
||||
job["next_run_at"] = compute_next_run(job["schedule"], now)
|
||||
# Check if we've hit the repeat limit
|
||||
times = job["repeat"].get("times")
|
||||
completed = job["repeat"]["completed"]
|
||||
if times is not None and times > 0 and completed >= times:
|
||||
# Remove the job (limit reached)
|
||||
jobs.pop(i)
|
||||
save_jobs(jobs)
|
||||
return
|
||||
|
||||
# Compute next run
|
||||
job["next_run_at"] = compute_next_run(job["schedule"], now)
|
||||
|
||||
# If no next run (one-shot completed), disable
|
||||
if job["next_run_at"] is None:
|
||||
job["enabled"] = False
|
||||
job["state"] = "completed"
|
||||
elif job.get("state") != "paused":
|
||||
job["state"] = "scheduled"
|
||||
# If no next run (one-shot completed), disable
|
||||
if job["next_run_at"] is None:
|
||||
job["enabled"] = False
|
||||
job["state"] = "completed"
|
||||
elif job.get("state") != "paused":
|
||||
job["state"] = "scheduled"
|
||||
|
||||
save_jobs(jobs)
|
||||
return
|
||||
save_jobs(jobs)
|
||||
return
|
||||
|
||||
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
|
||||
logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
|
||||
|
||||
|
||||
def advance_next_run(job_id: str) -> bool:
|
||||
@@ -652,21 +645,20 @@ def advance_next_run(job_id: str) -> bool:
|
||||
|
||||
Returns True if next_run_at was advanced, False otherwise.
|
||||
"""
|
||||
with _jobs_file_lock:
|
||||
jobs = load_jobs()
|
||||
for job in jobs:
|
||||
if job["id"] == job_id:
|
||||
kind = job.get("schedule", {}).get("kind")
|
||||
if kind not in ("cron", "interval"):
|
||||
return False
|
||||
now = _hermes_now().isoformat()
|
||||
new_next = compute_next_run(job["schedule"], now)
|
||||
if new_next and new_next != job.get("next_run_at"):
|
||||
job["next_run_at"] = new_next
|
||||
save_jobs(jobs)
|
||||
return True
|
||||
jobs = load_jobs()
|
||||
for job in jobs:
|
||||
if job["id"] == job_id:
|
||||
kind = job.get("schedule", {}).get("kind")
|
||||
if kind not in ("cron", "interval"):
|
||||
return False
|
||||
return False
|
||||
now = _hermes_now().isoformat()
|
||||
new_next = compute_next_run(job["schedule"], now)
|
||||
if new_next and new_next != job.get("next_run_at"):
|
||||
job["next_run_at"] = new_next
|
||||
save_jobs(jobs)
|
||||
return True
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def get_due_jobs() -> List[Dict[str, Any]]:
|
||||
|
||||
+57
-84
@@ -252,11 +252,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
|
||||
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
|
||||
|
||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
try:
|
||||
result = future.result(timeout=30)
|
||||
except TimeoutError:
|
||||
future.cancel()
|
||||
raise
|
||||
result = future.result(timeout=30)
|
||||
if result and not getattr(result, "success", True):
|
||||
logger.warning(
|
||||
"Job '%s': media send failed for %s: %s",
|
||||
@@ -386,11 +382,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
|
||||
loop,
|
||||
)
|
||||
try:
|
||||
send_result = future.result(timeout=60)
|
||||
except TimeoutError:
|
||||
future.cancel()
|
||||
raise
|
||||
send_result = future.result(timeout=60)
|
||||
if send_result and not getattr(send_result, "success", True):
|
||||
err = getattr(send_result, "error", "unknown")
|
||||
logger.warning(
|
||||
@@ -430,6 +422,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
# prevent "coroutine was never awaited" RuntimeWarning, then retry in a
|
||||
# fresh thread that has no running loop.
|
||||
coro.close()
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
|
||||
result = future.result(timeout=30)
|
||||
@@ -754,17 +747,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
# scheduler process — every job this process runs is a cron job.
|
||||
os.environ["HERMES_CRON_SESSION"] = "1"
|
||||
|
||||
# Use ContextVars for per-job session/delivery state so parallel jobs
|
||||
# don't clobber each other's targets (os.environ is process-global).
|
||||
from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
|
||||
|
||||
_ctx_tokens = set_session_vars(
|
||||
platform=origin["platform"] if origin else "",
|
||||
chat_id=str(origin["chat_id"]) if origin else "",
|
||||
chat_name=origin.get("chat_name", "") if origin else "",
|
||||
)
|
||||
|
||||
try:
|
||||
# Inject origin context so the agent's send_message tool knows the chat.
|
||||
# Must be INSIDE the try block so the finally cleanup always runs.
|
||||
if origin:
|
||||
os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
|
||||
os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
|
||||
if origin.get("chat_name"):
|
||||
os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
|
||||
# Re-read .env and config.yaml fresh every run so provider/key
|
||||
# changes take effect without a gateway restart.
|
||||
from dotenv import load_dotenv
|
||||
@@ -775,10 +765,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
|
||||
delivery_target = _resolve_delivery_target(job)
|
||||
if delivery_target:
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
|
||||
os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
|
||||
os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
|
||||
if delivery_target.get("thread_id") is not None:
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
|
||||
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
|
||||
|
||||
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
|
||||
|
||||
@@ -817,13 +807,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
prefill_messages = None
|
||||
prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
|
||||
if prefill_file:
|
||||
import json as _json
|
||||
pfpath = Path(prefill_file).expanduser()
|
||||
if not pfpath.is_absolute():
|
||||
pfpath = _hermes_home / pfpath
|
||||
if pfpath.exists():
|
||||
try:
|
||||
with open(pfpath, "r", encoding="utf-8") as _pf:
|
||||
prefill_messages = json.load(_pf)
|
||||
prefill_messages = _json.load(_pf)
|
||||
if not isinstance(prefill_messages, list):
|
||||
prefill_messages = None
|
||||
except Exception as e:
|
||||
@@ -835,6 +826,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
|
||||
# Provider routing
|
||||
pr = _cfg.get("provider_routing", {})
|
||||
smart_routing = _cfg.get("smart_model_routing", {}) or {}
|
||||
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
@@ -851,9 +843,24 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
message = format_runtime_provider_error(exc)
|
||||
raise RuntimeError(message) from exc
|
||||
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
turn_route = resolve_turn_route(
|
||||
prompt,
|
||||
smart_routing,
|
||||
{
|
||||
"model": model,
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
},
|
||||
)
|
||||
|
||||
fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
|
||||
credential_pool = None
|
||||
runtime_provider = str(runtime.get("provider") or "").strip().lower()
|
||||
runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower()
|
||||
if runtime_provider:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
@@ -870,13 +877,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)
|
||||
|
||||
agent = AIAgent(
|
||||
model=model,
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
acp_command=runtime.get("command"),
|
||||
acp_args=runtime.get("args"),
|
||||
model=turn_route["model"],
|
||||
api_key=turn_route["runtime"].get("api_key"),
|
||||
base_url=turn_route["runtime"].get("base_url"),
|
||||
provider=turn_route["runtime"].get("provider"),
|
||||
api_mode=turn_route["runtime"].get("api_mode"),
|
||||
acp_command=turn_route["runtime"].get("command"),
|
||||
acp_args=turn_route["runtime"].get("args"),
|
||||
max_iterations=max_iterations,
|
||||
reasoning_config=reasoning_config,
|
||||
prefill_messages=prefill_messages,
|
||||
@@ -972,12 +979,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
f"— last activity: {_last_desc}"
|
||||
)
|
||||
|
||||
# Guard against non-dict returns from run_conversation under error conditions
|
||||
if not isinstance(result, dict):
|
||||
raise RuntimeError(
|
||||
f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
|
||||
)
|
||||
|
||||
final_response = result.get("final_response", "") or ""
|
||||
# Strip leaked placeholder text that upstream may inject on empty completions.
|
||||
if final_response.strip() == "(No response generated)":
|
||||
@@ -1027,8 +1028,16 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
return False, output, "", error_msg
|
||||
|
||||
finally:
|
||||
# Clean up ContextVar session/delivery state for this job.
|
||||
clear_session_vars(_ctx_tokens)
|
||||
# Clean up injected env vars so they don't leak to other jobs
|
||||
for key in (
|
||||
"HERMES_SESSION_PLATFORM",
|
||||
"HERMES_SESSION_CHAT_ID",
|
||||
"HERMES_SESSION_CHAT_NAME",
|
||||
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
|
||||
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
|
||||
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
|
||||
):
|
||||
os.environ.pop(key, None)
|
||||
if _session_db:
|
||||
try:
|
||||
_session_db.end_session(_cron_session_id, "cron_complete")
|
||||
@@ -1081,41 +1090,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
if verbose:
|
||||
logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))
|
||||
|
||||
# Advance next_run_at for all recurring jobs FIRST, under the file lock,
|
||||
# before any execution begins. This preserves at-most-once semantics.
|
||||
executed = 0
|
||||
for job in due_jobs:
|
||||
advance_next_run(job["id"])
|
||||
|
||||
# Resolve max parallel workers: env var > config.yaml > unbounded.
|
||||
# Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
|
||||
_max_workers: Optional[int] = None
|
||||
try:
|
||||
_env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
|
||||
if _env_par:
|
||||
_max_workers = int(_env_par) or None
|
||||
except (ValueError, TypeError):
|
||||
logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
|
||||
if _max_workers is None:
|
||||
try:
|
||||
_ucfg = load_config() or {}
|
||||
_cfg_par = (
|
||||
_ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
|
||||
).get("max_parallel_jobs")
|
||||
if _cfg_par is not None:
|
||||
_max_workers = int(_cfg_par) or None
|
||||
except Exception:
|
||||
pass
|
||||
# For recurring jobs (cron/interval), advance next_run_at to the
|
||||
# next future occurrence BEFORE execution. This way, if the
|
||||
# process crashes mid-run, the job won't re-fire on restart.
|
||||
# One-shot jobs are left alone so they can retry on restart.
|
||||
advance_next_run(job["id"])
|
||||
|
||||
if verbose:
|
||||
logger.info(
|
||||
"Running %d job(s) in parallel (max_workers=%s)",
|
||||
len(due_jobs),
|
||||
_max_workers if _max_workers else "unbounded",
|
||||
)
|
||||
|
||||
def _process_job(job: dict) -> bool:
|
||||
"""Run one due job end-to-end: execute, save, deliver, mark."""
|
||||
try:
|
||||
success, output, final_response, error = run_job(job)
|
||||
|
||||
output_file = save_job_output(job["id"], output)
|
||||
@@ -1147,23 +1130,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"
|
||||
|
||||
mark_job_run(job["id"], success, error, delivery_error=delivery_error)
|
||||
return True
|
||||
executed += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error processing job %s: %s", job['id'], e)
|
||||
mark_job_run(job["id"], False, str(e))
|
||||
return False
|
||||
|
||||
# Run all due jobs concurrently, each in its own ContextVar copy
|
||||
# so session/delivery state stays isolated per-thread.
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in due_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results = [f.result() for f in _futures]
|
||||
|
||||
return sum(_results)
|
||||
return executed
|
||||
finally:
|
||||
if fcntl:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||
|
||||
@@ -58,13 +58,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
|
||||
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# Ensure the main config file remains accessible to the hermes runtime user
|
||||
# even if it was edited on the host after initial ownership setup.
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml"
|
||||
chmod 640 "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# SOUL.md
|
||||
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
|
||||
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
|
||||
@@ -75,19 +68,4 @@ if [ -d "$INSTALL_DIR/skills" ]; then
|
||||
python3 "$INSTALL_DIR/tools/skills_sync.py"
|
||||
fi
|
||||
|
||||
# Final exec: two supported invocation patterns.
|
||||
#
|
||||
# docker run <image> -> exec `hermes` with no args (legacy default)
|
||||
# docker run <image> chat -q "..." -> exec `hermes chat -q "..."` (legacy wrap)
|
||||
# docker run <image> sleep infinity -> exec `sleep infinity` directly
|
||||
# docker run <image> bash -> exec `bash` directly
|
||||
#
|
||||
# If the first positional arg resolves to an executable on PATH, we assume the
|
||||
# caller wants to run it directly (needed by the launcher which runs long-lived
|
||||
# `sleep infinity` sandbox containers — see tools/environments/docker.py).
|
||||
# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
|
||||
# preserving the documented `docker run <image> <subcommand>` behavior.
|
||||
if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
|
||||
exec "$@"
|
||||
fi
|
||||
exec hermes "$@"
|
||||
|
||||
@@ -53,6 +53,7 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
# We're in an async context -- need to run in thread
|
||||
import concurrent.futures
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
||||
future = pool.submit(
|
||||
handle_function_call, tool_name, arguments, task_id
|
||||
|
||||
+3
-25
@@ -576,14 +576,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["free_response_channels"] = platform_cfg["free_response_channels"]
|
||||
if "mention_patterns" in platform_cfg:
|
||||
bridged["mention_patterns"] = platform_cfg["mention_patterns"]
|
||||
if "dm_policy" in platform_cfg:
|
||||
bridged["dm_policy"] = platform_cfg["dm_policy"]
|
||||
if "allow_from" in platform_cfg:
|
||||
bridged["allow_from"] = platform_cfg["allow_from"]
|
||||
if "group_policy" in platform_cfg:
|
||||
bridged["group_policy"] = platform_cfg["group_policy"]
|
||||
if "group_allow_from" in platform_cfg:
|
||||
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
|
||||
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
|
||||
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
|
||||
if "channel_prompts" in platform_cfg:
|
||||
@@ -616,8 +608,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(frc, list):
|
||||
frc = ",".join(str(v) for v in frc)
|
||||
os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
|
||||
if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
|
||||
os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()
|
||||
|
||||
# Discord settings → env vars (env vars take precedence)
|
||||
discord_cfg = yaml_cfg.get("discord", {})
|
||||
@@ -672,7 +662,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
|
||||
os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
|
||||
if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
|
||||
os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
|
||||
import json as _json
|
||||
os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
|
||||
frc = telegram_cfg.get("free_response_chats")
|
||||
if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
|
||||
if isinstance(frc, list):
|
||||
@@ -709,20 +700,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(frc, list):
|
||||
frc = ",".join(str(v) for v in frc)
|
||||
os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
|
||||
if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
|
||||
os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
|
||||
af = whatsapp_cfg.get("allow_from")
|
||||
if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
|
||||
if isinstance(af, list):
|
||||
af = ",".join(str(v) for v in af)
|
||||
os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
|
||||
if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
|
||||
os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
|
||||
gaf = whatsapp_cfg.get("group_allow_from")
|
||||
if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
|
||||
if isinstance(gaf, list):
|
||||
gaf = ",".join(str(v) for v in gaf)
|
||||
os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)
|
||||
|
||||
# DingTalk settings → env vars (env vars take precedence)
|
||||
dingtalk_cfg = yaml_cfg.get("dingtalk", {})
|
||||
@@ -1260,6 +1237,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
if legacy_home:
|
||||
qq_home = legacy_home
|
||||
qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
|
||||
import logging
|
||||
logging.getLogger(__name__).warning(
|
||||
"QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
|
||||
"in your .env for consistency with the platform key."
|
||||
|
||||
+11
-44
@@ -135,22 +135,9 @@ class HookRegistry:
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)
|
||||
|
||||
def _resolve_handlers(self, event_type: str) -> List[Callable]:
|
||||
"""Return all handlers that should fire for ``event_type``.
|
||||
|
||||
Exact matches fire first, followed by wildcard matches (e.g.
|
||||
``command:*`` matches ``command:reset``).
|
||||
"""
|
||||
handlers = list(self._handlers.get(event_type, []))
|
||||
if ":" in event_type:
|
||||
base = event_type.split(":")[0]
|
||||
wildcard_key = f"{base}:*"
|
||||
handlers.extend(self._handlers.get(wildcard_key, []))
|
||||
return handlers
|
||||
|
||||
async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""
|
||||
Fire all handlers registered for an event, discarding return values.
|
||||
Fire all handlers registered for an event.
|
||||
|
||||
Supports wildcard matching: handlers registered for "command:*" will
|
||||
fire for any "command:..." event. Handlers registered for a base type
|
||||
@@ -164,7 +151,16 @@ class HookRegistry:
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
for fn in self._resolve_handlers(event_type):
|
||||
# Collect handlers: exact match + wildcard match
|
||||
handlers = list(self._handlers.get(event_type, []))
|
||||
|
||||
# Check for wildcard patterns (e.g., "command:*" matches "command:reset")
|
||||
if ":" in event_type:
|
||||
base = event_type.split(":")[0]
|
||||
wildcard_key = f"{base}:*"
|
||||
handlers.extend(self._handlers.get(wildcard_key, []))
|
||||
|
||||
for fn in handlers:
|
||||
try:
|
||||
result = fn(event_type, context)
|
||||
# Support both sync and async handlers
|
||||
@@ -172,32 +168,3 @@ class HookRegistry:
|
||||
await result
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
|
||||
|
||||
async def emit_collect(
|
||||
self,
|
||||
event_type: str,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
) -> List[Any]:
|
||||
"""Fire handlers and return their non-None return values in order.
|
||||
|
||||
Like :meth:`emit` but captures each handler's return value. Used for
|
||||
decision-style hooks (e.g. ``command:<name>`` policies that want to
|
||||
allow/deny/rewrite the command before normal dispatch).
|
||||
|
||||
Exceptions from individual handlers are logged but do not abort the
|
||||
remaining handlers.
|
||||
"""
|
||||
if context is None:
|
||||
context = {}
|
||||
|
||||
results: List[Any] = []
|
||||
for fn in self._resolve_handlers(event_type):
|
||||
try:
|
||||
result = fn(event_type, context)
|
||||
if asyncio.iscoroutine(result):
|
||||
result = await result
|
||||
if result is not None:
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
|
||||
return results
|
||||
|
||||
+60
-237
@@ -117,160 +117,6 @@ def _normalize_chat_content(
|
||||
return ""
|
||||
|
||||
|
||||
# Content part type aliases used by the OpenAI Chat Completions and Responses
|
||||
# APIs. We accept both spellings on input and emit a single canonical internal
|
||||
# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
|
||||
# rest of the agent pipeline already understands.
|
||||
_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
|
||||
_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
|
||||
_FILE_PART_TYPES = frozenset({"file", "input_file"})
|
||||
|
||||
|
||||
def _normalize_multimodal_content(content: Any) -> Any:
|
||||
"""Validate and normalize multimodal content for the API server.
|
||||
|
||||
Returns a plain string when the content is text-only, or a list of
|
||||
``{"type": "text"|"image_url", ...}`` parts when images are present.
|
||||
The output shape is the native OpenAI Chat Completions vision format,
|
||||
which the agent pipeline accepts verbatim (OpenAI-wire providers) or
|
||||
converts (``_preprocess_anthropic_content`` for Anthropic).
|
||||
|
||||
Raises ``ValueError`` with an OpenAI-style code on invalid input:
|
||||
* ``unsupported_content_type`` — file/input_file/file_id parts, or
|
||||
non-image ``data:`` URLs.
|
||||
* ``invalid_image_url`` — missing URL or unsupported scheme.
|
||||
* ``invalid_content_part`` — malformed text/image objects.
|
||||
|
||||
Callers translate the ValueError into a 400 response.
|
||||
"""
|
||||
# Scalar passthrough mirrors ``_normalize_chat_content``.
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
|
||||
if not isinstance(content, list):
|
||||
# Mirror the legacy text-normalizer's fallback so callers that
|
||||
# pre-existed image support still get a string back.
|
||||
return _normalize_chat_content(content)
|
||||
|
||||
items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
|
||||
normalized_parts: List[Dict[str, Any]] = []
|
||||
text_accum_len = 0
|
||||
|
||||
for part in items:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
|
||||
normalized_parts.append({"type": "text", "text": trimmed})
|
||||
text_accum_len += len(trimmed)
|
||||
continue
|
||||
|
||||
if not isinstance(part, dict):
|
||||
# Ignore unknown scalars for forward compatibility with future
|
||||
# Responses API additions (e.g. ``refusal``). The same policy
|
||||
# the text normalizer applies.
|
||||
continue
|
||||
|
||||
raw_type = part.get("type")
|
||||
part_type = str(raw_type or "").strip().lower()
|
||||
|
||||
if part_type in _TEXT_PART_TYPES:
|
||||
text = part.get("text")
|
||||
if text is None:
|
||||
continue
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
if text:
|
||||
trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
|
||||
normalized_parts.append({"type": "text", "text": trimmed})
|
||||
text_accum_len += len(trimmed)
|
||||
continue
|
||||
|
||||
if part_type in _IMAGE_PART_TYPES:
|
||||
detail = part.get("detail")
|
||||
image_ref = part.get("image_url")
|
||||
# OpenAI Responses sends ``input_image`` with a top-level
|
||||
# ``image_url`` string; Chat Completions sends ``image_url`` as
|
||||
# ``{"url": "...", "detail": "..."}``. Support both.
|
||||
if isinstance(image_ref, dict):
|
||||
url_value = image_ref.get("url")
|
||||
detail = image_ref.get("detail", detail)
|
||||
else:
|
||||
url_value = image_ref
|
||||
if not isinstance(url_value, str) or not url_value.strip():
|
||||
raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
|
||||
url_value = url_value.strip()
|
||||
lowered = url_value.lower()
|
||||
if lowered.startswith("data:"):
|
||||
if not lowered.startswith("data:image/") or "," not in url_value:
|
||||
raise ValueError(
|
||||
"unsupported_content_type:Only image data URLs are supported. "
|
||||
"Non-image data payloads are not supported."
|
||||
)
|
||||
elif not (lowered.startswith("http://") or lowered.startswith("https://")):
|
||||
raise ValueError(
|
||||
"invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
|
||||
)
|
||||
image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
|
||||
if detail is not None:
|
||||
if not isinstance(detail, str) or not detail.strip():
|
||||
raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
|
||||
image_part["image_url"]["detail"] = detail.strip()
|
||||
normalized_parts.append(image_part)
|
||||
continue
|
||||
|
||||
if part_type in _FILE_PART_TYPES:
|
||||
raise ValueError(
|
||||
"unsupported_content_type:Inline image inputs are supported, "
|
||||
"but uploaded files and document inputs are not supported on this endpoint."
|
||||
)
|
||||
|
||||
# Unknown part type — reject explicitly so clients get a clear error
|
||||
# instead of a silently dropped turn.
|
||||
raise ValueError(
|
||||
f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
|
||||
"Only text and image_url/input_image parts are supported."
|
||||
)
|
||||
|
||||
if not normalized_parts:
|
||||
return ""
|
||||
|
||||
# Text-only: collapse to a plain string so downstream logging/trajectory
|
||||
# code sees the native shape and prompt caching on text-only turns is
|
||||
# unaffected.
|
||||
if all(p.get("type") == "text" for p in normalized_parts):
|
||||
return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
|
||||
|
||||
return normalized_parts
|
||||
|
||||
|
||||
def _content_has_visible_payload(content: Any) -> bool:
|
||||
"""True when content has any text or image attachment. Used to reject empty turns."""
|
||||
if isinstance(content, str):
|
||||
return bool(content.strip())
|
||||
if isinstance(content, list):
|
||||
for part in content:
|
||||
if isinstance(part, dict):
|
||||
ptype = str(part.get("type") or "").strip().lower()
|
||||
if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
|
||||
return True
|
||||
if ptype in _IMAGE_PART_TYPES:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
|
||||
"""Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
|
||||
raw = str(exc)
|
||||
code, _, message = raw.partition(":")
|
||||
if not message:
|
||||
code, message = "invalid_content_part", raw
|
||||
return web.json_response(
|
||||
_openai_error(message, code=code, param=param),
|
||||
status=400,
|
||||
)
|
||||
|
||||
|
||||
def check_api_server_requirements() -> bool:
|
||||
"""Check if API server dependencies are available."""
|
||||
return AIOHTTP_AVAILABLE
|
||||
@@ -323,6 +169,7 @@ class ResponseStore:
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
import time
|
||||
self._conn.execute(
|
||||
"UPDATE responses SET accessed_at = ? WHERE response_id = ?",
|
||||
(time.time(), response_id),
|
||||
@@ -332,6 +179,7 @@ class ResponseStore:
|
||||
|
||||
def put(self, response_id: str, data: Dict[str, Any]) -> None:
|
||||
"""Store a response, evicting the oldest if at capacity."""
|
||||
import time
|
||||
self._conn.execute(
|
||||
"INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
|
||||
(response_id, json.dumps(data, default=str), time.time()),
|
||||
@@ -467,12 +315,12 @@ class _IdempotencyCache:
|
||||
def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
|
||||
from collections import OrderedDict
|
||||
self._store = OrderedDict()
|
||||
self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
|
||||
self._ttl = ttl_seconds
|
||||
self._max = max_items
|
||||
|
||||
def _purge(self):
|
||||
now = time.time()
|
||||
import time as _t
|
||||
now = _t.time()
|
||||
expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
|
||||
for k in expired:
|
||||
self._store.pop(k, None)
|
||||
@@ -484,27 +332,11 @@ class _IdempotencyCache:
|
||||
item = self._store.get(key)
|
||||
if item and item["fp"] == fingerprint:
|
||||
return item["resp"]
|
||||
|
||||
inflight_key = (key, fingerprint)
|
||||
task = self._inflight.get(inflight_key)
|
||||
if task is None:
|
||||
async def _compute_and_store():
|
||||
resp = await compute_coro()
|
||||
import time as _t
|
||||
self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
|
||||
self._purge()
|
||||
return resp
|
||||
|
||||
task = asyncio.create_task(_compute_and_store())
|
||||
self._inflight[inflight_key] = task
|
||||
|
||||
def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
|
||||
if self._inflight.get(inflight_key) is done_task:
|
||||
self._inflight.pop(inflight_key, None)
|
||||
|
||||
task.add_done_callback(_clear_inflight)
|
||||
|
||||
return await asyncio.shield(task)
|
||||
resp = await compute_coro()
|
||||
import time as _t
|
||||
self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
|
||||
self._purge()
|
||||
return resp
|
||||
|
||||
|
||||
_idem_cache = _IdempotencyCache()
|
||||
@@ -534,30 +366,6 @@ def _derive_chat_session_id(
|
||||
return f"api-{digest}"
|
||||
|
||||
|
||||
_CRON_AVAILABLE = False
|
||||
try:
|
||||
from cron.jobs import (
|
||||
list_jobs as _cron_list,
|
||||
get_job as _cron_get,
|
||||
create_job as _cron_create,
|
||||
update_job as _cron_update,
|
||||
remove_job as _cron_remove,
|
||||
pause_job as _cron_pause,
|
||||
resume_job as _cron_resume,
|
||||
trigger_job as _cron_trigger,
|
||||
)
|
||||
_CRON_AVAILABLE = True
|
||||
except ImportError:
|
||||
_cron_list = None
|
||||
_cron_get = None
|
||||
_cron_create = None
|
||||
_cron_update = None
|
||||
_cron_remove = None
|
||||
_cron_pause = None
|
||||
_cron_resume = None
|
||||
_cron_trigger = None
|
||||
|
||||
|
||||
class APIServerAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
OpenAI-compatible HTTP API server adapter.
|
||||
@@ -829,32 +637,26 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
system_prompt = None
|
||||
conversation_messages: List[Dict[str, str]] = []
|
||||
|
||||
for idx, msg in enumerate(messages):
|
||||
for msg in messages:
|
||||
role = msg.get("role", "")
|
||||
raw_content = msg.get("content", "")
|
||||
content = _normalize_chat_content(msg.get("content", ""))
|
||||
if role == "system":
|
||||
# System messages don't support images (Anthropic rejects, OpenAI
|
||||
# text-model systems don't render them). Flatten to text.
|
||||
content = _normalize_chat_content(raw_content)
|
||||
# Accumulate system messages
|
||||
if system_prompt is None:
|
||||
system_prompt = content
|
||||
else:
|
||||
system_prompt = system_prompt + "\n" + content
|
||||
elif role in ("user", "assistant"):
|
||||
try:
|
||||
content = _normalize_multimodal_content(raw_content)
|
||||
except ValueError as exc:
|
||||
return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
|
||||
conversation_messages.append({"role": role, "content": content})
|
||||
|
||||
# Extract the last user message as the primary input
|
||||
user_message: Any = ""
|
||||
user_message = ""
|
||||
history = []
|
||||
if conversation_messages:
|
||||
user_message = conversation_messages[-1].get("content", "")
|
||||
history = conversation_messages[:-1]
|
||||
|
||||
if not _content_has_visible_payload(user_message):
|
||||
if not user_message:
|
||||
return web.json_response(
|
||||
{"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
|
||||
status=400,
|
||||
@@ -1622,19 +1424,16 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# No error if conversation doesn't exist yet — it's a new conversation
|
||||
|
||||
# Normalize input to message list
|
||||
input_messages: List[Dict[str, Any]] = []
|
||||
input_messages: List[Dict[str, str]] = []
|
||||
if isinstance(raw_input, str):
|
||||
input_messages = [{"role": "user", "content": raw_input}]
|
||||
elif isinstance(raw_input, list):
|
||||
for idx, item in enumerate(raw_input):
|
||||
for item in raw_input:
|
||||
if isinstance(item, str):
|
||||
input_messages.append({"role": "user", "content": item})
|
||||
elif isinstance(item, dict):
|
||||
role = item.get("role", "user")
|
||||
try:
|
||||
content = _normalize_multimodal_content(item.get("content", ""))
|
||||
except ValueError as exc:
|
||||
return _multimodal_validation_error(exc, param=f"input[{idx}].content")
|
||||
content = _normalize_chat_content(item.get("content", ""))
|
||||
input_messages.append({"role": role, "content": content})
|
||||
else:
|
||||
return web.json_response(_openai_error("'input' must be a string or array"), status=400)
|
||||
@@ -1643,7 +1442,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# This lets stateless clients supply their own history instead of
|
||||
# relying on server-side response chaining via previous_response_id.
|
||||
# Precedence: explicit conversation_history > previous_response_id.
|
||||
conversation_history: List[Dict[str, Any]] = []
|
||||
conversation_history: List[Dict[str, str]] = []
|
||||
raw_history = body.get("conversation_history")
|
||||
if raw_history:
|
||||
if not isinstance(raw_history, list):
|
||||
@@ -1657,11 +1456,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
_openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
|
||||
status=400,
|
||||
)
|
||||
try:
|
||||
entry_content = _normalize_multimodal_content(entry["content"])
|
||||
except ValueError as exc:
|
||||
return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
|
||||
conversation_history.append({"role": str(entry["role"]), "content": entry_content})
|
||||
conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
|
||||
if previous_response_id:
|
||||
logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
|
||||
|
||||
@@ -1681,8 +1476,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
conversation_history.append(msg)
|
||||
|
||||
# Last input message is the user_message
|
||||
user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
|
||||
if not _content_has_visible_payload(user_message):
|
||||
user_message = input_messages[-1].get("content", "") if input_messages else ""
|
||||
if not user_message:
|
||||
return web.json_response(_openai_error("No user message found in input"), status=400)
|
||||
|
||||
# Truncation support
|
||||
@@ -1887,16 +1682,44 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# Cron jobs API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# Check cron module availability once (not per-request)
|
||||
_CRON_AVAILABLE = False
|
||||
try:
|
||||
from cron.jobs import (
|
||||
list_jobs as _cron_list,
|
||||
get_job as _cron_get,
|
||||
create_job as _cron_create,
|
||||
update_job as _cron_update,
|
||||
remove_job as _cron_remove,
|
||||
pause_job as _cron_pause,
|
||||
resume_job as _cron_resume,
|
||||
trigger_job as _cron_trigger,
|
||||
)
|
||||
# Wrap as staticmethod to prevent descriptor binding — these are plain
|
||||
# module functions, not instance methods. Without this, self._cron_*()
|
||||
# injects ``self`` as the first positional argument and every call
|
||||
# raises TypeError.
|
||||
_cron_list = staticmethod(_cron_list)
|
||||
_cron_get = staticmethod(_cron_get)
|
||||
_cron_create = staticmethod(_cron_create)
|
||||
_cron_update = staticmethod(_cron_update)
|
||||
_cron_remove = staticmethod(_cron_remove)
|
||||
_cron_pause = staticmethod(_cron_pause)
|
||||
_cron_resume = staticmethod(_cron_resume)
|
||||
_cron_trigger = staticmethod(_cron_trigger)
|
||||
_CRON_AVAILABLE = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
_JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
|
||||
# Allowed fields for update — prevents clients injecting arbitrary keys
|
||||
_UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
|
||||
_MAX_NAME_LENGTH = 200
|
||||
_MAX_PROMPT_LENGTH = 5000
|
||||
|
||||
@staticmethod
|
||||
def _check_jobs_available() -> Optional["web.Response"]:
|
||||
def _check_jobs_available(self) -> Optional["web.Response"]:
|
||||
"""Return error response if cron module isn't available."""
|
||||
if not _CRON_AVAILABLE:
|
||||
if not self._CRON_AVAILABLE:
|
||||
return web.json_response(
|
||||
{"error": "Cron module not available"}, status=501,
|
||||
)
|
||||
@@ -1921,7 +1744,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
return cron_err
|
||||
try:
|
||||
include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
|
||||
jobs = _cron_list(include_disabled=include_disabled)
|
||||
jobs = self._cron_list(include_disabled=include_disabled)
|
||||
return web.json_response({"jobs": jobs})
|
||||
except Exception as e:
|
||||
return web.json_response({"error": str(e)}, status=500)
|
||||
@@ -1969,7 +1792,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if repeat is not None:
|
||||
kwargs["repeat"] = repeat
|
||||
|
||||
job = _cron_create(**kwargs)
|
||||
job = self._cron_create(**kwargs)
|
||||
return web.json_response({"job": job})
|
||||
except Exception as e:
|
||||
return web.json_response({"error": str(e)}, status=500)
|
||||
@@ -1986,7 +1809,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if id_err:
|
||||
return id_err
|
||||
try:
|
||||
job = _cron_get(job_id)
|
||||
job = self._cron_get(job_id)
|
||||
if not job:
|
||||
return web.json_response({"error": "Job not found"}, status=404)
|
||||
return web.json_response({"job": job})
|
||||
@@ -2019,7 +1842,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
return web.json_response(
|
||||
{"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
|
||||
)
|
||||
job = _cron_update(job_id, sanitized)
|
||||
job = self._cron_update(job_id, sanitized)
|
||||
if not job:
|
||||
return web.json_response({"error": "Job not found"}, status=404)
|
||||
return web.json_response({"job": job})
|
||||
@@ -2038,7 +1861,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if id_err:
|
||||
return id_err
|
||||
try:
|
||||
success = _cron_remove(job_id)
|
||||
success = self._cron_remove(job_id)
|
||||
if not success:
|
||||
return web.json_response({"error": "Job not found"}, status=404)
|
||||
return web.json_response({"ok": True})
|
||||
@@ -2057,7 +1880,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if id_err:
|
||||
return id_err
|
||||
try:
|
||||
job = _cron_pause(job_id)
|
||||
job = self._cron_pause(job_id)
|
||||
if not job:
|
||||
return web.json_response({"error": "Job not found"}, status=404)
|
||||
return web.json_response({"job": job})
|
||||
@@ -2076,7 +1899,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if id_err:
|
||||
return id_err
|
||||
try:
|
||||
job = _cron_resume(job_id)
|
||||
job = self._cron_resume(job_id)
|
||||
if not job:
|
||||
return web.json_response({"error": "Job not found"}, status=404)
|
||||
return web.json_response({"job": job})
|
||||
@@ -2095,7 +1918,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if id_err:
|
||||
return id_err
|
||||
try:
|
||||
job = _cron_trigger(job_id)
|
||||
job = self._cron_trigger(job_id)
|
||||
if not job:
|
||||
return web.json_response({"error": "Job not found"}, status=404)
|
||||
return web.json_response({"job": job})
|
||||
|
||||
+52
-428
@@ -6,7 +6,6 @@ and implement the required methods.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
@@ -19,8 +18,6 @@ import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from utils import normalize_proxy_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -161,13 +158,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
if platform_env_var:
|
||||
value = (os.environ.get(platform_env_var) or "").strip()
|
||||
if value:
|
||||
return normalize_proxy_url(value)
|
||||
return value
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
return normalize_proxy_url(value)
|
||||
return normalize_proxy_url(_detect_macos_system_proxy())
|
||||
return value
|
||||
return _detect_macos_system_proxy()
|
||||
|
||||
|
||||
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
@@ -393,9 +390,12 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
||||
if not is_safe_url(url):
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
_log = logging.getLogger(__name__)
|
||||
import logging as _logging
|
||||
_log = _logging.getLogger(__name__)
|
||||
|
||||
last_exc = None
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
@@ -413,6 +413,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
||||
response.raise_for_status()
|
||||
return cache_image_from_bytes(response.content, ext)
|
||||
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
|
||||
last_exc = exc
|
||||
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
|
||||
raise
|
||||
if attempt < retries:
|
||||
@@ -428,6 +429,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
raise
|
||||
raise last_exc
|
||||
|
||||
|
||||
def cleanup_image_cache(max_age_hours: int = 24) -> int:
|
||||
@@ -507,9 +509,12 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
||||
if not is_safe_url(url):
|
||||
raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
_log = logging.getLogger(__name__)
|
||||
import logging as _logging
|
||||
_log = _logging.getLogger(__name__)
|
||||
|
||||
last_exc = None
|
||||
async with httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
@@ -527,6 +532,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
||||
response.raise_for_status()
|
||||
return cache_audio_from_bytes(response.content, ext)
|
||||
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
|
||||
last_exc = exc
|
||||
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
|
||||
raise
|
||||
if attempt < retries:
|
||||
@@ -542,39 +548,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Video cache utilities
|
||||
#
|
||||
# Same pattern as image/audio cache -- videos from platforms are downloaded
|
||||
# here so the agent can reference them by local file path.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
|
||||
|
||||
SUPPORTED_VIDEO_TYPES = {
|
||||
".mp4": "video/mp4",
|
||||
".mov": "video/quicktime",
|
||||
".webm": "video/webm",
|
||||
".mkv": "video/x-matroska",
|
||||
".avi": "video/x-msvideo",
|
||||
}
|
||||
|
||||
|
||||
def get_video_cache_dir() -> Path:
|
||||
"""Return the video cache directory, creating it if it doesn't exist."""
|
||||
VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
return VIDEO_CACHE_DIR
|
||||
|
||||
|
||||
def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
|
||||
"""Save raw video bytes to the cache and return the absolute file path."""
|
||||
cache_dir = get_video_cache_dir()
|
||||
filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
|
||||
filepath = cache_dir / filename
|
||||
filepath.write_bytes(data)
|
||||
return str(filepath)
|
||||
raise last_exc
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -752,10 +726,7 @@ class MessageEvent:
|
||||
if not self.is_command():
|
||||
return self.text
|
||||
parts = self.text.split(maxsplit=1)
|
||||
args = parts[1] if len(parts) > 1 else ""
|
||||
# iOS auto-corrects -- to — (em dash) and - to – (en dash)
|
||||
args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
|
||||
return args
|
||||
return parts[1] if len(parts) > 1 else ""
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -900,26 +871,19 @@ class BasePlatformAdapter(ABC):
|
||||
self._fatal_error_retryable = True
|
||||
self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
|
||||
|
||||
# Track active message handlers per session for interrupt support.
|
||||
# _active_sessions stores the per-session interrupt Event; _session_tasks
|
||||
# maps session → the specific Task currently processing it so that
|
||||
# session-terminating commands (/stop, /new, /reset) can cancel the
|
||||
# right task and release the adapter-level guard deterministically.
|
||||
# Without the owner-task map, an old task's finally block could delete
|
||||
# a newer task's guard, leaving stale busy state.
|
||||
# Track active message handlers per session for interrupt support
|
||||
# Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
|
||||
self._active_sessions: Dict[str, asyncio.Event] = {}
|
||||
self._pending_messages: Dict[str, MessageEvent] = {}
|
||||
self._session_tasks: Dict[str, asyncio.Task] = {}
|
||||
# Background message-processing tasks spawned by handle_message().
|
||||
# Gateway shutdown cancels these so an old gateway instance doesn't keep
|
||||
# working on a task after --replace or manual restarts.
|
||||
self._background_tasks: set[asyncio.Task] = set()
|
||||
# One-shot callbacks to fire after the main response is delivered.
|
||||
# Keyed by session_key. Values are either a bare callback (legacy) or
|
||||
# a ``(generation, callback)`` tuple so GatewayRunner can make deferred
|
||||
# deliveries generation-aware and avoid stale runs clearing callbacks
|
||||
# registered by a fresher run for the same session.
|
||||
self._post_delivery_callbacks: Dict[str, Any] = {}
|
||||
# Keyed by session_key. GatewayRunner uses this to defer
|
||||
# background-review notifications ("💾 Skill created") until the
|
||||
# primary reply has been sent.
|
||||
self._post_delivery_callbacks: Dict[str, Callable] = {}
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
@@ -1352,7 +1316,7 @@ class BasePlatformAdapter(ABC):
|
||||
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
|
||||
# and quoted/backticked paths for LLM-formatted outputs.
|
||||
media_pattern = re.compile(
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
|
||||
)
|
||||
for match in media_pattern.finditer(content):
|
||||
path = match.group("path").strip()
|
||||
@@ -1437,13 +1401,7 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
return paths, cleaned
|
||||
|
||||
async def _keep_typing(
|
||||
self,
|
||||
chat_id: str,
|
||||
interval: float = 2.0,
|
||||
metadata=None,
|
||||
stop_event: asyncio.Event | None = None,
|
||||
) -> None:
|
||||
async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
|
||||
"""
|
||||
Continuously send typing indicator until cancelled.
|
||||
|
||||
@@ -1457,18 +1415,9 @@ class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
try:
|
||||
while True:
|
||||
if stop_event is not None and stop_event.is_set():
|
||||
return
|
||||
if chat_id not in self._typing_paused:
|
||||
await self.send_typing(chat_id, metadata=metadata)
|
||||
if stop_event is None:
|
||||
await asyncio.sleep(interval)
|
||||
continue
|
||||
try:
|
||||
await asyncio.wait_for(stop_event.wait(), timeout=interval)
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
return
|
||||
await asyncio.sleep(interval)
|
||||
except asyncio.CancelledError:
|
||||
pass # Normal cancellation when handler completes
|
||||
finally:
|
||||
@@ -1495,59 +1444,6 @@ class BasePlatformAdapter(ABC):
|
||||
"""Resume typing indicator for a chat after approval resolves."""
|
||||
self._typing_paused.discard(chat_id)
|
||||
|
||||
async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None:
|
||||
"""Signal the active session loop to stop and clear typing immediately."""
|
||||
if session_key:
|
||||
interrupt_event = self._active_sessions.get(session_key)
|
||||
if interrupt_event is not None:
|
||||
interrupt_event.set()
|
||||
try:
|
||||
await self.stop_typing(chat_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def register_post_delivery_callback(
|
||||
self,
|
||||
session_key: str,
|
||||
callback: Callable,
|
||||
*,
|
||||
generation: int | None = None,
|
||||
) -> None:
|
||||
"""Register a deferred callback to fire after the main response.
|
||||
|
||||
``generation`` lets callers tie the callback to a specific gateway run
|
||||
generation so stale runs cannot clear callbacks owned by a fresher run.
|
||||
"""
|
||||
if not session_key or not callable(callback):
|
||||
return
|
||||
if generation is None:
|
||||
self._post_delivery_callbacks[session_key] = callback
|
||||
else:
|
||||
self._post_delivery_callbacks[session_key] = (int(generation), callback)
|
||||
|
||||
def pop_post_delivery_callback(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
generation: int | None = None,
|
||||
) -> Callable | None:
|
||||
"""Pop a deferred callback, optionally requiring generation ownership."""
|
||||
if not session_key:
|
||||
return None
|
||||
entry = self._post_delivery_callbacks.get(session_key)
|
||||
if entry is None:
|
||||
return None
|
||||
if isinstance(entry, tuple) and len(entry) == 2:
|
||||
entry_generation, callback = entry
|
||||
if generation is not None and int(entry_generation) != int(generation):
|
||||
return None
|
||||
self._post_delivery_callbacks.pop(session_key, None)
|
||||
return callback if callable(callback) else None
|
||||
if generation is not None:
|
||||
return None
|
||||
self._post_delivery_callbacks.pop(session_key, None)
|
||||
return entry if callable(entry) else None
|
||||
|
||||
# ── Processing lifecycle hooks ──────────────────────────────────────────
|
||||
# Subclasses override these to react to message processing events
|
||||
# (e.g. Discord adds 👀/✅/❌ reactions).
|
||||
@@ -1686,222 +1582,6 @@ class BasePlatformAdapter(ABC):
|
||||
return f"{existing_text}\n\n{new_text}".strip()
|
||||
return existing_text
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Session task + guard ownership helpers
|
||||
# ------------------------------------------------------------------
|
||||
# These were introduced together with the _session_tasks owner map to
|
||||
# make session lifecycle reconciliation deterministic across (a) the
|
||||
# normal completion path, (b) /stop/ /new/ /reset bypass commands,
|
||||
# and (c) stale-lock self-heal on the next inbound message.
|
||||
|
||||
def _release_session_guard(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
guard: Optional[asyncio.Event] = None,
|
||||
) -> None:
|
||||
"""Release the adapter-level guard for a session.
|
||||
|
||||
When ``guard`` is provided, only release the entry if it still points
|
||||
at that exact Event. This lets reset-like commands swap in a temporary
|
||||
guard while the old processing task unwinds, without having the old
|
||||
task's cleanup accidentally clear the replacement guard.
|
||||
"""
|
||||
current_guard = self._active_sessions.get(session_key)
|
||||
if current_guard is None:
|
||||
return
|
||||
if guard is not None and current_guard is not guard:
|
||||
return
|
||||
del self._active_sessions[session_key]
|
||||
|
||||
def _session_task_is_stale(self, session_key: str) -> bool:
|
||||
"""Return True if the owner task for ``session_key`` is done/cancelled.
|
||||
|
||||
A lock is "stale" when the adapter still has ``_active_sessions[key]``
|
||||
AND a known owner task in ``_session_tasks`` that has already exited.
|
||||
When there is no owner task at all, that usually means the guard was
|
||||
installed by some path other than handle_message() (tests sometimes
|
||||
install guards directly) — don't treat that as stale. The on-entry
|
||||
self-heal only needs to handle the production split-brain case where
|
||||
an owner task was recorded, then exited without clearing its guard.
|
||||
"""
|
||||
task = self._session_tasks.get(session_key)
|
||||
if task is None:
|
||||
return False
|
||||
done = getattr(task, "done", None)
|
||||
return bool(done and done())
|
||||
|
||||
def _heal_stale_session_lock(self, session_key: str) -> bool:
|
||||
"""Clear a stale session lock if the owner task is already gone.
|
||||
|
||||
Returns True if a stale lock was healed. Returns False if there is
|
||||
no lock, or the owner task is still alive (the normal busy case).
|
||||
|
||||
This is the on-entry safety net sidbin's issue #11016 analysis calls
|
||||
for: without it, a split-brain — adapter still thinks the session is
|
||||
active, but nothing is actually processing — traps the chat in
|
||||
infinite "Interrupting current task..." until the gateway is
|
||||
restarted.
|
||||
"""
|
||||
if session_key not in self._active_sessions:
|
||||
return False
|
||||
if not self._session_task_is_stale(session_key):
|
||||
return False
|
||||
logger.warning(
|
||||
"[%s] Healing stale session lock for %s (owner task is done/absent)",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
self._active_sessions.pop(session_key, None)
|
||||
self._pending_messages.pop(session_key, None)
|
||||
self._session_tasks.pop(session_key, None)
|
||||
return True
|
||||
|
||||
def _start_session_processing(
|
||||
self,
|
||||
event: MessageEvent,
|
||||
session_key: str,
|
||||
*,
|
||||
interrupt_event: Optional[asyncio.Event] = None,
|
||||
) -> bool:
|
||||
"""Spawn a background processing task under the given session guard.
|
||||
|
||||
Returns True on success. If the runtime stubs ``create_task`` with a
|
||||
non-Task sentinel (some tests do this), the guard is rolled back and
|
||||
False is returned so the caller isn't left holding a half-installed
|
||||
session lock.
|
||||
"""
|
||||
guard = interrupt_event or asyncio.Event()
|
||||
self._active_sessions[session_key] = guard
|
||||
|
||||
task = asyncio.create_task(self._process_message_background(event, session_key))
|
||||
self._session_tasks[session_key] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
# Tests stub create_task() with lightweight sentinels that are not
|
||||
# hashable and do not support lifecycle callbacks.
|
||||
self._session_tasks.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=guard)
|
||||
return False
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
return True
|
||||
|
||||
async def cancel_session_processing(
|
||||
self,
|
||||
session_key: str,
|
||||
*,
|
||||
release_guard: bool = True,
|
||||
discard_pending: bool = True,
|
||||
) -> None:
|
||||
"""Cancel in-flight processing for a single session.
|
||||
|
||||
``release_guard=False`` keeps the adapter-level session guard in place
|
||||
so reset-like commands can finish atomically before follow-up messages
|
||||
are allowed to start a fresh background task.
|
||||
"""
|
||||
task = self._session_tasks.pop(session_key, None)
|
||||
if task is not None and not task.done():
|
||||
logger.debug(
|
||||
"[%s] Cancelling active processing for session %s",
|
||||
self.name,
|
||||
session_key,
|
||||
)
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Session cancellation raised while unwinding %s",
|
||||
self.name,
|
||||
session_key,
|
||||
exc_info=True,
|
||||
)
|
||||
if discard_pending:
|
||||
self._pending_messages.pop(session_key, None)
|
||||
if release_guard:
|
||||
self._release_session_guard(session_key)
|
||||
|
||||
async def _drain_pending_after_session_command(
|
||||
self,
|
||||
session_key: str,
|
||||
command_guard: asyncio.Event,
|
||||
) -> None:
|
||||
"""Resume the latest queued follow-up once a session command completes.
|
||||
|
||||
Called at the tail of /stop, /new, and /reset dispatch. Releases the
|
||||
command-scoped guard, then — if a follow-up message landed while the
|
||||
command was running — spawns a fresh processing task for it.
|
||||
"""
|
||||
pending_event = self._pending_messages.pop(session_key, None)
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
if pending_event is None:
|
||||
return
|
||||
self._start_session_processing(pending_event, session_key)
|
||||
|
||||
async def _dispatch_active_session_command(
|
||||
self,
|
||||
event: MessageEvent,
|
||||
session_key: str,
|
||||
cmd: str,
|
||||
) -> None:
|
||||
"""Dispatch a reset-like bypass command while preserving guard ordering.
|
||||
|
||||
/stop, /new, and /reset must:
|
||||
1. Keep the session guard installed while the runner processes the
|
||||
command (so a racing follow-up message stays queued, not
|
||||
dispatched as a second parallel run).
|
||||
2. Cancel the old in-flight adapter task only AFTER the runner has
|
||||
finished handling the command (so the runner sees consistent
|
||||
state and its response is sent in order).
|
||||
3. Release the command-scoped guard and drain the latest queued
|
||||
follow-up exactly once, after 1 and 2 complete.
|
||||
"""
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name,
|
||||
cmd,
|
||||
session_key,
|
||||
)
|
||||
|
||||
current_guard = self._active_sessions.get(session_key)
|
||||
command_guard = asyncio.Event()
|
||||
self._active_sessions[session_key] = command_guard
|
||||
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
|
||||
try:
|
||||
response = await self._message_handler(event)
|
||||
# Old adapter task (if any) is cancelled AFTER the runner has
|
||||
# fully handled the command — keeps ordering deterministic.
|
||||
await self.cancel_session_processing(
|
||||
session_key,
|
||||
release_guard=False,
|
||||
discard_pending=False,
|
||||
)
|
||||
if response:
|
||||
await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=response,
|
||||
reply_to=event.message_id,
|
||||
metadata=thread_meta,
|
||||
)
|
||||
except Exception:
|
||||
# On failure, restore the original guard if one still exists so
|
||||
# we don't leave the session in a half-reset state.
|
||||
if self._active_sessions.get(session_key) is command_guard:
|
||||
if session_key in self._session_tasks and current_guard is not None:
|
||||
self._active_sessions[session_key] = current_guard
|
||||
else:
|
||||
self._release_session_guard(session_key, guard=command_guard)
|
||||
raise
|
||||
|
||||
await self._drain_pending_after_session_command(session_key, command_guard)
|
||||
|
||||
async def handle_message(self, event: MessageEvent) -> None:
|
||||
"""
|
||||
Process an incoming message.
|
||||
@@ -1918,15 +1598,7 @@ class BasePlatformAdapter(ABC):
|
||||
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
|
||||
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
|
||||
)
|
||||
|
||||
# On-entry self-heal: if the adapter still has an _active_sessions
|
||||
# entry for this key but the owner task has already exited (done or
|
||||
# cancelled), the lock is stale. Clear it and fall through to
|
||||
# normal dispatch so the user isn't trapped behind a dead guard —
|
||||
# this is the split-brain tail described in issue #11016.
|
||||
if session_key in self._active_sessions:
|
||||
self._heal_stale_session_lock(session_key)
|
||||
|
||||
|
||||
# Check if there's already an active handler for this session
|
||||
if session_key in self._active_sessions:
|
||||
# Certain commands must bypass the active-session guard and be
|
||||
@@ -1943,23 +1615,6 @@ class BasePlatformAdapter(ABC):
|
||||
from hermes_cli.commands import should_bypass_active_session
|
||||
|
||||
if should_bypass_active_session(cmd):
|
||||
# /stop, /new, /reset must cancel the in-flight adapter task
|
||||
# and preserve ordering of queued follow-ups. Route those
|
||||
# through the dedicated handoff path that serializes
|
||||
# cancellation + runner response + pending drain.
|
||||
if cmd in ("stop", "new", "reset"):
|
||||
try:
|
||||
await self._dispatch_active_session_command(event, session_key, cmd)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[%s] Command '/%s' dispatch failed: %s",
|
||||
self.name, cmd, e, exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
# Other bypass commands (/approve, /deny, /status,
|
||||
# /background, /restart) just need direct dispatch — they
|
||||
# don't cancel the running task.
|
||||
logger.debug(
|
||||
"[%s] Command '/%s' bypassing active-session guard for %s",
|
||||
self.name, cmd, session_key,
|
||||
@@ -2005,9 +1660,19 @@ class BasePlatformAdapter(ABC):
|
||||
# starts would also pass the _active_sessions check and spawn a
|
||||
# duplicate task. (grammY sequentialize / aiogram EventIsolation
|
||||
# pattern — set the guard synchronously, not inside the task.)
|
||||
# _start_session_processing installs the guard AND the owner-task
|
||||
# mapping atomically so stale-lock detection works.
|
||||
self._start_session_processing(event, session_key)
|
||||
self._active_sessions[session_key] = asyncio.Event()
|
||||
|
||||
# Spawn background task to process this message
|
||||
task = asyncio.create_task(self._process_message_background(event, session_key))
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
# Some tests stub create_task() with lightweight sentinels that are not
|
||||
# hashable and do not support lifecycle callbacks.
|
||||
return
|
||||
if hasattr(task, "add_done_callback"):
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
task.add_done_callback(self._expected_cancelled_tasks.discard)
|
||||
|
||||
@staticmethod
|
||||
def _get_human_delay() -> float:
|
||||
@@ -2019,6 +1684,8 @@ class BasePlatformAdapter(ABC):
|
||||
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
|
||||
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
|
||||
"""
|
||||
import random
|
||||
|
||||
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
|
||||
if mode == "off":
|
||||
return 0.0
|
||||
@@ -2047,23 +1714,10 @@ class BasePlatformAdapter(ABC):
|
||||
# Fall back to a new Event only if the entry was removed externally.
|
||||
interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
|
||||
self._active_sessions[session_key] = interrupt_event
|
||||
callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
|
||||
|
||||
# Start continuous typing indicator (refreshes every 2 seconds)
|
||||
_thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
_keep_typing_kwargs = {"metadata": _thread_metadata}
|
||||
try:
|
||||
_keep_typing_sig = inspect.signature(self._keep_typing)
|
||||
except (TypeError, ValueError):
|
||||
_keep_typing_sig = None
|
||||
if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters:
|
||||
_keep_typing_kwargs["stop_event"] = interrupt_event
|
||||
typing_task = asyncio.create_task(
|
||||
self._keep_typing(
|
||||
event.source.chat_id,
|
||||
**_keep_typing_kwargs,
|
||||
)
|
||||
)
|
||||
typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata))
|
||||
|
||||
try:
|
||||
await self._run_processing_hook("on_processing_start", event)
|
||||
@@ -2322,14 +1976,7 @@ class BasePlatformAdapter(ABC):
|
||||
finally:
|
||||
# Fire any one-shot post-delivery callback registered for this
|
||||
# session (e.g. deferred background-review notifications).
|
||||
_callback_generation = callback_generation
|
||||
if hasattr(self, "pop_post_delivery_callback"):
|
||||
_post_cb = self.pop_post_delivery_callback(
|
||||
session_key,
|
||||
generation=_callback_generation,
|
||||
)
|
||||
else:
|
||||
_post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
|
||||
_post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
|
||||
if callable(_post_cb):
|
||||
try:
|
||||
_post_cb()
|
||||
@@ -2367,9 +2014,6 @@ class BasePlatformAdapter(ABC):
|
||||
drain_task = asyncio.create_task(
|
||||
self._process_message_background(late_pending, session_key)
|
||||
)
|
||||
# Hand ownership of the session to the drain task so stale-lock
|
||||
# detection keeps working while it runs.
|
||||
self._session_tasks[session_key] = drain_task
|
||||
try:
|
||||
self._background_tasks.add(drain_task)
|
||||
drain_task.add_done_callback(self._background_tasks.discard)
|
||||
@@ -2378,15 +2022,10 @@ class BasePlatformAdapter(ABC):
|
||||
pass
|
||||
# Leave _active_sessions[session_key] populated — the drain
|
||||
# task's own lifecycle will clean it up.
|
||||
else:
|
||||
# Clean up session tracking. Guard-match both deletes so a
|
||||
# reset-like command that already swapped in its own
|
||||
# command_guard (and cancelled us) can't be accidentally
|
||||
# cleared by our unwind. The command owns the session now.
|
||||
current_task = asyncio.current_task()
|
||||
if current_task is not None and self._session_tasks.get(session_key) is current_task:
|
||||
del self._session_tasks[session_key]
|
||||
self._release_session_guard(session_key, guard=interrupt_event)
|
||||
return
|
||||
# Clean up session tracking
|
||||
if session_key in self._active_sessions:
|
||||
del self._active_sessions[session_key]
|
||||
|
||||
async def cancel_background_tasks(self) -> None:
|
||||
"""Cancel any in-flight background message-processing tasks.
|
||||
@@ -2394,29 +2033,14 @@ class BasePlatformAdapter(ABC):
|
||||
Used during gateway shutdown/replacement so active sessions from the old
|
||||
process do not keep running after adapters are being torn down.
|
||||
"""
|
||||
# Loop until no new tasks appear. Without this, a message
|
||||
# arriving during the `await asyncio.gather` below would spawn
|
||||
# a fresh _process_message_background task (added to
|
||||
# self._background_tasks at line ~1668 via handle_message),
|
||||
# and the _background_tasks.clear() at the end of this method
|
||||
# would drop the reference — the task runs untracked against a
|
||||
# disconnecting adapter, logs send-failures, and may linger
|
||||
# until it completes on its own. Retrying the drain until the
|
||||
# task set stabilizes closes the window.
|
||||
MAX_DRAIN_ROUNDS = 5
|
||||
for _ in range(MAX_DRAIN_ROUNDS):
|
||||
tasks = [task for task in self._background_tasks if not task.done()]
|
||||
if not tasks:
|
||||
break
|
||||
for task in tasks:
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
tasks = [task for task in self._background_tasks if not task.done()]
|
||||
for task in tasks:
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
if tasks:
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
# Loop: late-arrival tasks spawned during the gather above
|
||||
# will be in self._background_tasks now. Re-check.
|
||||
self._background_tasks.clear()
|
||||
self._expected_cancelled_tasks.clear()
|
||||
self._session_tasks.clear()
|
||||
self._pending_messages.clear()
|
||||
self._active_sessions.clear()
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
|
||||
def check_bluebubbles_requirements() -> bool:
|
||||
try:
|
||||
import aiohttp # noqa: F401
|
||||
import httpx # noqa: F401
|
||||
import httpx as _httpx # noqa: F401
|
||||
except ImportError:
|
||||
return False
|
||||
return True
|
||||
|
||||
+82
-137
@@ -498,7 +498,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering
|
||||
# Voice channel state (per-guild)
|
||||
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
|
||||
self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave
|
||||
# Text batching: merge rapid successive messages (Telegram-style)
|
||||
self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6"))
|
||||
self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0"))
|
||||
@@ -527,7 +526,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# Reply threading mode: "off" (no replies), "first" (reply on first
|
||||
# chunk only, default), "all" (reply-reference on every chunk).
|
||||
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
|
||||
self._slash_commands: bool = self.config.extra.get("slash_commands", True)
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
@@ -542,6 +540,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# ctypes.util.find_library fails on macOS with Homebrew-installed libs,
|
||||
# so fall back to known Homebrew paths if needed.
|
||||
if not opus_path:
|
||||
import sys
|
||||
_homebrew_paths = (
|
||||
"/opt/homebrew/lib/libopus.dylib", # Apple Silicon
|
||||
"/usr/local/lib/libopus.dylib", # Intel Mac
|
||||
@@ -637,15 +636,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
@self._client.event
|
||||
async def on_message(message: DiscordMessage):
|
||||
# Block until _resolve_allowed_usernames has swapped
|
||||
# any raw usernames in DISCORD_ALLOWED_USERS for numeric
|
||||
# IDs (otherwise on_message's author.id lookup can miss).
|
||||
if not adapter_self._ready_event.is_set():
|
||||
try:
|
||||
await asyncio.wait_for(adapter_self._ready_event.wait(), timeout=30.0)
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
# Dedup: Discord RESUME replays events after reconnects (#4777)
|
||||
if adapter_self._dedup.is_duplicate(str(message.id)):
|
||||
return
|
||||
@@ -745,8 +735,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
)
|
||||
|
||||
# Register slash commands
|
||||
if self._slash_commands:
|
||||
self._register_slash_commands()
|
||||
self._register_slash_commands()
|
||||
|
||||
# Start the bot in background
|
||||
self._bot_task = asyncio.create_task(self._client.start(self.config.token))
|
||||
@@ -1082,8 +1071,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
*,
|
||||
finalize: bool = False,
|
||||
) -> SendResult:
|
||||
"""Edit a previously sent Discord message."""
|
||||
if not self._client:
|
||||
@@ -1250,53 +1237,51 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
guild_id = channel.guild.id
|
||||
|
||||
async with self._voice_locks.setdefault(guild_id, asyncio.Lock()):
|
||||
# Already connected in this guild?
|
||||
existing = self._voice_clients.get(guild_id)
|
||||
if existing and existing.is_connected():
|
||||
if existing.channel.id == channel.id:
|
||||
self._reset_voice_timeout(guild_id)
|
||||
return True
|
||||
await existing.move_to(channel)
|
||||
# Already connected in this guild?
|
||||
existing = self._voice_clients.get(guild_id)
|
||||
if existing and existing.is_connected():
|
||||
if existing.channel.id == channel.id:
|
||||
self._reset_voice_timeout(guild_id)
|
||||
return True
|
||||
|
||||
vc = await channel.connect()
|
||||
self._voice_clients[guild_id] = vc
|
||||
await existing.move_to(channel)
|
||||
self._reset_voice_timeout(guild_id)
|
||||
|
||||
# Start voice receiver (Phase 2: listen to users)
|
||||
try:
|
||||
receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
|
||||
receiver.start()
|
||||
self._voice_receivers[guild_id] = receiver
|
||||
self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
|
||||
self._voice_listen_loop(guild_id)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Voice receiver failed to start: %s", e)
|
||||
|
||||
return True
|
||||
|
||||
vc = await channel.connect()
|
||||
self._voice_clients[guild_id] = vc
|
||||
self._reset_voice_timeout(guild_id)
|
||||
|
||||
# Start voice receiver (Phase 2: listen to users)
|
||||
try:
|
||||
receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
|
||||
receiver.start()
|
||||
self._voice_receivers[guild_id] = receiver
|
||||
self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
|
||||
self._voice_listen_loop(guild_id)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Voice receiver failed to start: %s", e)
|
||||
|
||||
return True
|
||||
|
||||
async def leave_voice_channel(self, guild_id: int) -> None:
|
||||
"""Disconnect from the voice channel in a guild."""
|
||||
async with self._voice_locks.setdefault(guild_id, asyncio.Lock()):
|
||||
# Stop voice receiver first
|
||||
receiver = self._voice_receivers.pop(guild_id, None)
|
||||
if receiver:
|
||||
receiver.stop()
|
||||
listen_task = self._voice_listen_tasks.pop(guild_id, None)
|
||||
if listen_task:
|
||||
listen_task.cancel()
|
||||
# Stop voice receiver first
|
||||
receiver = self._voice_receivers.pop(guild_id, None)
|
||||
if receiver:
|
||||
receiver.stop()
|
||||
listen_task = self._voice_listen_tasks.pop(guild_id, None)
|
||||
if listen_task:
|
||||
listen_task.cancel()
|
||||
|
||||
vc = self._voice_clients.pop(guild_id, None)
|
||||
if vc and vc.is_connected():
|
||||
await vc.disconnect()
|
||||
task = self._voice_timeout_tasks.pop(guild_id, None)
|
||||
if task:
|
||||
task.cancel()
|
||||
self._voice_text_channels.pop(guild_id, None)
|
||||
self._voice_sources.pop(guild_id, None)
|
||||
vc = self._voice_clients.pop(guild_id, None)
|
||||
if vc and vc.is_connected():
|
||||
await vc.disconnect()
|
||||
task = self._voice_timeout_tasks.pop(guild_id, None)
|
||||
if task:
|
||||
task.cancel()
|
||||
self._voice_text_channels.pop(guild_id, None)
|
||||
self._voice_sources.pop(guild_id, None)
|
||||
|
||||
# Maximum seconds to wait for voice playback before giving up
|
||||
PLAYBACK_TIMEOUT = 120
|
||||
@@ -1423,7 +1408,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
speaking_user_ids: set = set()
|
||||
receiver = self._voice_receivers.get(guild_id)
|
||||
if receiver:
|
||||
now = time.monotonic()
|
||||
import time as _time
|
||||
now = _time.monotonic()
|
||||
with receiver._lock:
|
||||
for ssrc, last_t in receiver._last_packet_time.items():
|
||||
# Consider "speaking" if audio received within last 2 seconds
|
||||
@@ -2131,42 +2117,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# This ensures new commands added to COMMAND_REGISTRY in
|
||||
# hermes_cli/commands.py automatically appear as Discord slash
|
||||
# commands without needing a manual entry here.
|
||||
def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
|
||||
"""Build a discord.app_commands.Command that proxies to _run_simple_slash."""
|
||||
discord_name = _name.lower()[:32]
|
||||
desc = (_description or f"Run /{_name}")[:100]
|
||||
has_args = bool(_args_hint)
|
||||
|
||||
if has_args:
|
||||
def _make_args_handler(__name: str, __hint: str):
|
||||
@discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
|
||||
async def _handler(interaction: discord.Interaction, args: str = ""):
|
||||
await self._run_simple_slash(
|
||||
interaction, f"/{__name} {args}".strip()
|
||||
)
|
||||
_handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
|
||||
return _handler
|
||||
|
||||
handler = _make_args_handler(_name, _args_hint)
|
||||
else:
|
||||
def _make_simple_handler(__name: str):
|
||||
async def _handler(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, f"/{__name}")
|
||||
_handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
|
||||
return _handler
|
||||
|
||||
handler = _make_simple_handler(_name)
|
||||
|
||||
return discord.app_commands.Command(
|
||||
name=discord_name,
|
||||
description=desc,
|
||||
callback=handler,
|
||||
)
|
||||
|
||||
already_registered: set[str] = set()
|
||||
try:
|
||||
from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates
|
||||
|
||||
already_registered = set()
|
||||
try:
|
||||
already_registered = {cmd.name for cmd in tree.get_commands()}
|
||||
except Exception:
|
||||
@@ -2181,10 +2135,38 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
discord_name = cmd_def.name.lower()[:32]
|
||||
if discord_name in already_registered:
|
||||
continue
|
||||
auto_cmd = _build_auto_slash_command(
|
||||
cmd_def.name,
|
||||
cmd_def.description,
|
||||
cmd_def.args_hint,
|
||||
# Skip aliases that overlap with already-registered names
|
||||
# (aliases for explicitly registered commands are handled above).
|
||||
desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
|
||||
has_args = bool(cmd_def.args_hint)
|
||||
|
||||
if has_args:
|
||||
# Command takes optional arguments — create handler with
|
||||
# an optional ``args`` string parameter.
|
||||
def _make_args_handler(_name: str, _hint: str):
|
||||
@discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
|
||||
async def _handler(interaction: discord.Interaction, args: str = ""):
|
||||
await self._run_simple_slash(
|
||||
interaction, f"/{_name} {args}".strip()
|
||||
)
|
||||
_handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
|
||||
return _handler
|
||||
|
||||
handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
|
||||
else:
|
||||
# Parameterless command.
|
||||
def _make_simple_handler(_name: str):
|
||||
async def _handler(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, f"/{_name}")
|
||||
_handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
|
||||
return _handler
|
||||
|
||||
handler = _make_simple_handler(cmd_def.name)
|
||||
|
||||
auto_cmd = discord.app_commands.Command(
|
||||
name=discord_name,
|
||||
description=desc,
|
||||
callback=handler,
|
||||
)
|
||||
try:
|
||||
tree.add_command(auto_cmd)
|
||||
@@ -2201,35 +2183,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)
|
||||
|
||||
# ── Plugin-registered slash commands ──
|
||||
# Plugins register via PluginContext.register_command(); we mirror
|
||||
# those into Discord's native slash picker so users get the same
|
||||
# autocomplete UX as for built-in commands. No per-platform plugin
|
||||
# API needed — plugin commands are platform-agnostic.
|
||||
try:
|
||||
from hermes_cli.commands import _iter_plugin_command_entries
|
||||
|
||||
for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
|
||||
discord_name = plugin_name.lower()[:32]
|
||||
if discord_name in already_registered:
|
||||
continue
|
||||
auto_cmd = _build_auto_slash_command(
|
||||
plugin_name,
|
||||
plugin_desc,
|
||||
plugin_args_hint,
|
||||
)
|
||||
try:
|
||||
tree.add_command(auto_cmd)
|
||||
already_registered.add(discord_name)
|
||||
except Exception:
|
||||
# Silently skip commands that fail registration (e.g.
|
||||
# name conflict with a subcommand group).
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Discord auto-register from plugin commands failed: %s", e
|
||||
)
|
||||
|
||||
# Register skills under a single /skill command group with category
|
||||
# subcommand groups. This uses 1 top-level slot instead of N,
|
||||
# supporting up to 25 categories × 25 skills = 625 skills.
|
||||
@@ -2995,17 +2948,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
parent_channel_id = self._get_parent_channel_id(message.channel)
|
||||
|
||||
is_voice_linked_channel = False
|
||||
|
||||
# Save mention-stripped text before auto-threading since create_thread()
|
||||
# can clobber message.content, breaking /command detection in channels.
|
||||
raw_content = message.content.strip()
|
||||
normalized_content = raw_content
|
||||
mention_prefix = False
|
||||
if self._client.user and self._client.user in message.mentions:
|
||||
mention_prefix = True
|
||||
normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
|
||||
normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
|
||||
message.content = normalized_content
|
||||
if not isinstance(message.channel, discord.DMChannel):
|
||||
channel_ids = {str(message.channel.id)}
|
||||
if parent_channel_id:
|
||||
@@ -3043,8 +2985,13 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
in_bot_thread = is_thread and thread_id in self._threads
|
||||
|
||||
if require_mention and not is_free_channel and not in_bot_thread:
|
||||
if self._client.user not in message.mentions and not mention_prefix:
|
||||
if self._client.user not in message.mentions:
|
||||
return
|
||||
|
||||
if self._client.user and self._client.user in message.mentions:
|
||||
message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
|
||||
message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
|
||||
|
||||
# Auto-thread: when enabled, automatically create a thread for every
|
||||
# @mention in a text channel so each conversation is isolated (like Slack).
|
||||
# Messages already inside threads or DMs are unaffected.
|
||||
@@ -3066,7 +3013,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
|
||||
# Determine message type
|
||||
msg_type = MessageType.TEXT
|
||||
if normalized_content.startswith("/"):
|
||||
if message.content.startswith("/"):
|
||||
msg_type = MessageType.COMMAND
|
||||
elif message.attachments:
|
||||
# Check attachment types
|
||||
@@ -3206,9 +3153,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
att.filename, e, exc_info=True,
|
||||
)
|
||||
|
||||
# Use normalized_content (saved before auto-threading) instead of message.content,
|
||||
# to detect /slash commands in channel messages.
|
||||
event_text = normalized_content
|
||||
event_text = message.content
|
||||
if pending_text_injection:
|
||||
event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection
|
||||
|
||||
|
||||
@@ -545,7 +545,6 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
"""Send a file as an email attachment."""
|
||||
try:
|
||||
|
||||
+108
-581
File diff suppressed because it is too large
Load Diff
@@ -825,7 +825,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
|
||||
|
||||
async def edit_message(
|
||||
self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
|
||||
self, chat_id: str, message_id: str, content: str
|
||||
) -> SendResult:
|
||||
"""Edit an existing message (via m.replace)."""
|
||||
|
||||
|
||||
@@ -304,7 +304,7 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
)
|
||||
|
||||
async def edit_message(
|
||||
self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
|
||||
self, chat_id: str, message_id: str, content: str
|
||||
) -> SendResult:
|
||||
"""Edit an existing post."""
|
||||
formatted = self.format_message(content)
|
||||
@@ -410,6 +410,7 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
|
||||
return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
|
||||
|
||||
import asyncio
|
||||
import aiohttp
|
||||
|
||||
last_exc = None
|
||||
|
||||
@@ -26,8 +26,9 @@ from .adapter import ( # noqa: F401
|
||||
# -- Onboard (QR-code scan-to-configure) -----------------------------------
|
||||
from .onboard import ( # noqa: F401
|
||||
BindStatus,
|
||||
create_bind_task,
|
||||
poll_bind_result,
|
||||
build_connect_url,
|
||||
qr_register,
|
||||
)
|
||||
from .crypto import decrypt_secret, generate_bind_key # noqa: F401
|
||||
|
||||
@@ -43,8 +44,9 @@ __all__ = [
|
||||
"_ssrf_redirect_guard",
|
||||
# onboard
|
||||
"BindStatus",
|
||||
"create_bind_task",
|
||||
"poll_bind_result",
|
||||
"build_connect_url",
|
||||
"qr_register",
|
||||
# crypto
|
||||
"decrypt_secret",
|
||||
"generate_bind_key",
|
||||
|
||||
@@ -535,9 +535,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
quick_disconnect_count = 0
|
||||
else:
|
||||
backoff_idx += 1
|
||||
if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
|
||||
logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
|
||||
return
|
||||
|
||||
except Exception as exc:
|
||||
if not self._running:
|
||||
@@ -1089,8 +1086,11 @@ class QQAdapter(BasePlatformAdapter):
|
||||
return MessageType.VIDEO
|
||||
if "image" in first_type or "photo" in first_type:
|
||||
return MessageType.PHOTO
|
||||
# Unknown content type with an attachment — don't assume PHOTO
|
||||
# to prevent non-image files from being sent to vision analysis.
|
||||
logger.debug(
|
||||
"Unknown media content_type '%s', defaulting to TEXT",
|
||||
"[%s] Unknown media content_type '%s', defaulting to TEXT",
|
||||
self._log_tag,
|
||||
first_type,
|
||||
)
|
||||
return MessageType.TEXT
|
||||
@@ -1826,12 +1826,14 @@ class QQAdapter(BasePlatformAdapter):
|
||||
body["file_name"] = file_name
|
||||
|
||||
# Retry transient upload failures
|
||||
last_exc = None
|
||||
for attempt in range(3):
|
||||
try:
|
||||
return await self._api_request(
|
||||
"POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
|
||||
)
|
||||
except RuntimeError as exc:
|
||||
last_exc = exc
|
||||
err_msg = str(exc)
|
||||
if any(
|
||||
kw in err_msg
|
||||
@@ -1840,8 +1842,8 @@ class QQAdapter(BasePlatformAdapter):
|
||||
raise
|
||||
if attempt < 2:
|
||||
await asyncio.sleep(1.5 * (attempt + 1))
|
||||
else:
|
||||
raise
|
||||
|
||||
raise last_exc # type: ignore[misc]
|
||||
|
||||
# Maximum time (seconds) to wait for reconnection before giving up on send.
|
||||
_RECONNECT_WAIT_SECONDS = 15.0
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
"""
|
||||
QQBot scan-to-configure (QR code onboard) module.
|
||||
|
||||
Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
|
||||
entry-point ``qr_register()`` that handles the full flow (create task →
|
||||
display QR code → poll → decrypt credentials).
|
||||
|
||||
Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
|
||||
generate a QR-code URL and poll for scan completion. On success the caller
|
||||
receives the bot's *app_id*, *client_secret* (decrypted locally), and the
|
||||
@@ -16,20 +12,18 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from enum import IntEnum
|
||||
from typing import Optional, Tuple
|
||||
from typing import Tuple
|
||||
from urllib.parse import quote
|
||||
|
||||
from .constants import (
|
||||
ONBOARD_API_TIMEOUT,
|
||||
ONBOARD_CREATE_PATH,
|
||||
ONBOARD_POLL_INTERVAL,
|
||||
ONBOARD_POLL_PATH,
|
||||
PORTAL_HOST,
|
||||
QR_URL_TEMPLATE,
|
||||
)
|
||||
from .crypto import decrypt_secret, generate_bind_key
|
||||
from .crypto import generate_bind_key
|
||||
from .utils import get_api_headers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -41,7 +35,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BindStatus(IntEnum):
|
||||
"""Status codes returned by ``_poll_bind_result``."""
|
||||
"""Status codes returned by ``poll_bind_result``."""
|
||||
|
||||
NONE = 0
|
||||
PENDING = 1
|
||||
@@ -50,40 +44,18 @@ class BindStatus(IntEnum):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# QR rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
try:
|
||||
import qrcode as _qrcode_mod
|
||||
except (ImportError, TypeError):
|
||||
_qrcode_mod = None # type: ignore[assignment]
|
||||
|
||||
|
||||
def _render_qr(url: str) -> bool:
|
||||
"""Try to render a QR code in the terminal. Returns True if successful."""
|
||||
if _qrcode_mod is None:
|
||||
return False
|
||||
try:
|
||||
qr = _qrcode_mod.QRCode(
|
||||
error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
|
||||
border=2,
|
||||
)
|
||||
qr.add_data(url)
|
||||
qr.make(fit=True)
|
||||
qr.print_ascii(invert=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
|
||||
async def create_bind_task(
|
||||
timeout: float = ONBOARD_API_TIMEOUT,
|
||||
) -> Tuple[str, str]:
|
||||
"""Create a bind task and return *(task_id, aes_key_base64)*.
|
||||
|
||||
The AES key is generated locally and sent to the server so it can
|
||||
encrypt the bot credentials before returning them.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the API returns a non-zero ``retcode``.
|
||||
"""
|
||||
@@ -92,8 +64,8 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
|
||||
url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
|
||||
key = generate_bind_key()
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = client.post(url, json={"key": key}, headers=get_api_headers())
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = await client.post(url, json={"key": key}, headers=get_api_headers())
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -108,7 +80,7 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
|
||||
return task_id, key
|
||||
|
||||
|
||||
def _poll_bind_result(
|
||||
async def poll_bind_result(
|
||||
task_id: str,
|
||||
timeout: float = ONBOARD_API_TIMEOUT,
|
||||
) -> Tuple[BindStatus, str, str, str]:
|
||||
@@ -117,6 +89,12 @@ def _poll_bind_result(
|
||||
Returns:
|
||||
A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.
|
||||
|
||||
* ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
|
||||
:func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
|
||||
key from :func:`create_bind_task`.
|
||||
* ``user_openid`` is the OpenID of the person who scanned the code
|
||||
(available when ``status == COMPLETED``).
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the API returns a non-zero ``retcode``.
|
||||
"""
|
||||
@@ -124,8 +102,8 @@ def _poll_bind_result(
|
||||
|
||||
url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"
|
||||
|
||||
with httpx.Client(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
|
||||
async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
|
||||
resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -144,77 +122,3 @@ def _poll_bind_result(
|
||||
def build_connect_url(task_id: str) -> str:
|
||||
"""Build the QR-code target URL for a given *task_id*."""
|
||||
return QR_URL_TEMPLATE.format(task_id=quote(task_id))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entry-point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_MAX_REFRESHES = 3
|
||||
|
||||
|
||||
def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
|
||||
"""Run the QQBot scan-to-configure QR registration flow.
|
||||
|
||||
Mirrors ``feishu.qr_register()``: handles create → display → poll →
|
||||
decrypt in one call. Unexpected errors propagate to the caller.
|
||||
|
||||
:returns:
|
||||
``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
|
||||
success, or ``None`` on failure / expiry / cancellation.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
|
||||
for refresh_count in range(_MAX_REFRESHES + 1):
|
||||
# ── Create bind task ──
|
||||
try:
|
||||
task_id, aes_key = _create_bind_task()
|
||||
except Exception as exc:
|
||||
logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
|
||||
return None
|
||||
|
||||
url = build_connect_url(task_id)
|
||||
|
||||
# ── Display QR code + URL ──
|
||||
print()
|
||||
if _render_qr(url):
|
||||
print(f" Scan the QR code above, or open this URL directly:\n {url}")
|
||||
else:
|
||||
print(f" Open this URL in QQ on your phone:\n {url}")
|
||||
print(" Tip: pip install qrcode to display a scannable QR code here")
|
||||
print()
|
||||
|
||||
# ── Poll loop ──
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
|
||||
except Exception:
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
if status == BindStatus.COMPLETED:
|
||||
client_secret = decrypt_secret(encrypted_secret, aes_key)
|
||||
print()
|
||||
print(f" QR scan complete! (App ID: {app_id})")
|
||||
if user_openid:
|
||||
print(f" Scanner's OpenID: {user_openid}")
|
||||
return {
|
||||
"app_id": app_id,
|
||||
"client_secret": client_secret,
|
||||
"user_openid": user_openid,
|
||||
}
|
||||
|
||||
if status == BindStatus.EXPIRED:
|
||||
if refresh_count >= _MAX_REFRESHES:
|
||||
logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
|
||||
return None
|
||||
print(f"\n QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
|
||||
break # next for-loop iteration creates a new task
|
||||
|
||||
time.sleep(ONBOARD_POLL_INTERVAL)
|
||||
else:
|
||||
# deadline reached without completing
|
||||
logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
+18
-114
@@ -18,7 +18,6 @@ import logging
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
@@ -128,27 +127,6 @@ def _render_mentions(text: str, mentions: list) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def _is_signal_service_id(value: str) -> bool:
|
||||
"""Return True if *value* already looks like a Signal service identifier."""
|
||||
if not value:
|
||||
return False
|
||||
if value.startswith("PNI:") or value.startswith("u:"):
|
||||
return True
|
||||
try:
|
||||
uuid.UUID(value)
|
||||
return True
|
||||
except (ValueError, AttributeError, TypeError):
|
||||
return False
|
||||
|
||||
|
||||
def _looks_like_e164_number(value: str) -> bool:
|
||||
"""Return True for a plausible E.164 phone number."""
|
||||
if not value or not value.startswith("+"):
|
||||
return False
|
||||
digits = value[1:]
|
||||
return digits.isdigit() and 7 <= len(digits) <= 15
|
||||
|
||||
|
||||
def check_signal_requirements() -> bool:
|
||||
"""Check if Signal is configured (has URL and account)."""
|
||||
return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT"))
|
||||
@@ -201,12 +179,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
# in Note to Self / self-chat mode (mirrors WhatsApp recentlySentIds)
|
||||
self._recent_sent_timestamps: set = set()
|
||||
self._max_recent_timestamps = 50
|
||||
# Signal increasingly exposes ACI/PNI UUIDs as stable recipient IDs.
|
||||
# Keep a best-effort mapping so outbound sends can upgrade from a
|
||||
# phone number to the corresponding UUID when signal-cli prefers it.
|
||||
self._recipient_uuid_by_number: Dict[str, str] = {}
|
||||
self._recipient_number_by_uuid: Dict[str, str] = {}
|
||||
self._recipient_cache_lock = asyncio.Lock()
|
||||
|
||||
logger.info("Signal adapter initialized: url=%s account=%s groups=%s",
|
||||
self.http_url, redact_phone(self.account),
|
||||
@@ -223,40 +195,31 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
# Acquire scoped lock to prevent duplicate Signal listeners for the same phone
|
||||
lock_acquired = False
|
||||
try:
|
||||
if not self._acquire_platform_lock('signal-phone', self.account, 'Signal account'):
|
||||
return False
|
||||
lock_acquired = True
|
||||
except Exception as e:
|
||||
logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=30.0)
|
||||
|
||||
# Health check — verify signal-cli daemon is reachable
|
||||
try:
|
||||
# Health check — verify signal-cli daemon is reachable
|
||||
try:
|
||||
resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
|
||||
if resp.status_code != 200:
|
||||
logger.error("Signal: health check failed (status %d)", resp.status_code)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
|
||||
resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
|
||||
if resp.status_code != 200:
|
||||
logger.error("Signal: health check failed (status %d)", resp.status_code)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
|
||||
return False
|
||||
|
||||
self._running = True
|
||||
self._last_sse_activity = time.time()
|
||||
self._sse_task = asyncio.create_task(self._sse_listener())
|
||||
self._health_monitor_task = asyncio.create_task(self._health_monitor())
|
||||
self._running = True
|
||||
self._last_sse_activity = time.time()
|
||||
self._sse_task = asyncio.create_task(self._sse_listener())
|
||||
self._health_monitor_task = asyncio.create_task(self._health_monitor())
|
||||
|
||||
logger.info("Signal: connected to %s", self.http_url)
|
||||
return True
|
||||
finally:
|
||||
if not self._running:
|
||||
if self.client:
|
||||
await self.client.aclose()
|
||||
self.client = None
|
||||
if lock_acquired:
|
||||
self._release_platform_lock()
|
||||
logger.info("Signal: connected to %s", self.http_url)
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Stop SSE listener and clean up."""
|
||||
@@ -437,7 +400,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
)
|
||||
sender_name = envelope_data.get("sourceName", "")
|
||||
sender_uuid = envelope_data.get("sourceUuid", "")
|
||||
self._remember_recipient_identifiers(sender, sender_uuid)
|
||||
|
||||
if not sender:
|
||||
logger.debug("Signal: ignoring envelope with no sender")
|
||||
@@ -556,64 +518,6 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
def _remember_recipient_identifiers(self, number: Optional[str], service_id: Optional[str]) -> None:
|
||||
"""Cache any number↔UUID mapping observed from Signal envelopes."""
|
||||
if not number or not service_id or not _is_signal_service_id(service_id):
|
||||
return
|
||||
self._recipient_uuid_by_number[number] = service_id
|
||||
self._recipient_number_by_uuid[service_id] = number
|
||||
|
||||
def _extract_contact_uuid(self, contact: Any, phone_number: str) -> Optional[str]:
|
||||
"""Best-effort extraction of a Signal service ID from listContacts output."""
|
||||
if not isinstance(contact, dict):
|
||||
return None
|
||||
|
||||
number = contact.get("number")
|
||||
recipient = contact.get("recipient")
|
||||
service_id = contact.get("uuid") or contact.get("serviceId")
|
||||
if not service_id:
|
||||
profile = contact.get("profile")
|
||||
if isinstance(profile, dict):
|
||||
service_id = profile.get("serviceId") or profile.get("uuid")
|
||||
|
||||
if service_id and _is_signal_service_id(service_id):
|
||||
matches_number = number == phone_number or recipient == phone_number
|
||||
if matches_number:
|
||||
return service_id
|
||||
return None
|
||||
|
||||
async def _resolve_recipient(self, chat_id: str) -> str:
|
||||
"""Return the preferred Signal recipient identifier for a direct chat."""
|
||||
if (
|
||||
not chat_id
|
||||
or chat_id.startswith("group:")
|
||||
or _is_signal_service_id(chat_id)
|
||||
or not _looks_like_e164_number(chat_id)
|
||||
):
|
||||
return chat_id
|
||||
|
||||
cached = self._recipient_uuid_by_number.get(chat_id)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
async with self._recipient_cache_lock:
|
||||
cached = self._recipient_uuid_by_number.get(chat_id)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
contacts = await self._rpc("listContacts", {
|
||||
"account": self.account,
|
||||
"allRecipients": True,
|
||||
})
|
||||
if isinstance(contacts, list):
|
||||
for contact in contacts:
|
||||
number = contact.get("number") if isinstance(contact, dict) else None
|
||||
service_id = self._extract_contact_uuid(contact, chat_id)
|
||||
if number and service_id:
|
||||
self._remember_recipient_identifiers(number, service_id)
|
||||
|
||||
return self._recipient_uuid_by_number.get(chat_id, chat_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Attachment Handling
|
||||
# ------------------------------------------------------------------
|
||||
@@ -729,7 +633,7 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [await self._resolve_recipient(chat_id)]
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("send", params)
|
||||
|
||||
@@ -780,7 +684,7 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [await self._resolve_recipient(chat_id)]
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
fails = self._typing_failures.get(chat_id, 0)
|
||||
result = await self._rpc(
|
||||
@@ -841,7 +745,7 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [await self._resolve_recipient(chat_id)]
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("send", params)
|
||||
if result is not None:
|
||||
@@ -880,7 +784,7 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [await self._resolve_recipient(chat_id)]
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("send", params)
|
||||
if result is not None:
|
||||
|
||||
+15
-64
@@ -38,7 +38,6 @@ from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
safe_url_for_log,
|
||||
@@ -114,11 +113,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
|
||||
self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
|
||||
self._THREAD_CACHE_TTL = 60.0
|
||||
# Track message IDs that should get reaction lifecycle (DMs / @mentions).
|
||||
self._reacting_message_ids: set = set()
|
||||
# Track active assistant thread status indicators so stop_typing can
|
||||
# clear them (chat_id → thread_ts).
|
||||
self._active_status_threads: Dict[str, str] = {}
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Slack via Socket Mode."""
|
||||
@@ -156,11 +150,9 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
logger.warning("[Slack] Failed to read %s: %s", tokens_file, e)
|
||||
|
||||
lock_acquired = False
|
||||
try:
|
||||
if not self._acquire_platform_lock('slack-app-token', app_token, 'Slack app token'):
|
||||
return False
|
||||
lock_acquired = True
|
||||
|
||||
# First token is the primary — used for AsyncApp / Socket Mode
|
||||
primary_token = bot_tokens[0]
|
||||
@@ -236,9 +228,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
logger.error("[Slack] Connection failed: %s", e, exc_info=True)
|
||||
return False
|
||||
finally:
|
||||
if lock_acquired and not self._running:
|
||||
self._release_platform_lock()
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Disconnect from Slack."""
|
||||
@@ -327,8 +316,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
*,
|
||||
finalize: bool = False,
|
||||
) -> SendResult:
|
||||
"""Edit a previously sent Slack message."""
|
||||
if not self._app:
|
||||
@@ -368,7 +355,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
if not thread_ts:
|
||||
return # Can only set status in a thread context
|
||||
|
||||
self._active_status_threads[chat_id] = thread_ts
|
||||
try:
|
||||
await self._get_client(chat_id).assistant_threads_setStatus(
|
||||
channel_id=chat_id,
|
||||
@@ -380,22 +366,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# in an assistant-enabled context. Falls back to reactions.
|
||||
logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
|
||||
|
||||
async def stop_typing(self, chat_id: str) -> None:
|
||||
"""Clear the assistant thread status indicator."""
|
||||
if not self._app:
|
||||
return
|
||||
thread_ts = self._active_status_threads.pop(chat_id, None)
|
||||
if not thread_ts:
|
||||
return
|
||||
try:
|
||||
await self._get_client(chat_id).assistant_threads_setStatus(
|
||||
channel_id=chat_id,
|
||||
thread_ts=thread_ts,
|
||||
status="",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
|
||||
|
||||
def _dm_top_level_threads_as_sessions(self) -> bool:
|
||||
"""Whether top-level Slack DMs get per-message session threads.
|
||||
|
||||
@@ -607,38 +577,6 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
|
||||
return False
|
||||
|
||||
def _reactions_enabled(self) -> bool:
|
||||
"""Check if message reactions are enabled via config/env."""
|
||||
return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
|
||||
|
||||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""Add an in-progress reaction when message processing begins."""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
ts = getattr(event, "message_id", None)
|
||||
if not ts or ts not in self._reacting_message_ids:
|
||||
return
|
||||
channel_id = getattr(event.source, "chat_id", None)
|
||||
if channel_id:
|
||||
await self._add_reaction(channel_id, ts, "eyes")
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
|
||||
"""Swap the in-progress reaction for a final success/failure reaction."""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
ts = getattr(event, "message_id", None)
|
||||
if not ts or ts not in self._reacting_message_ids:
|
||||
return
|
||||
self._reacting_message_ids.discard(ts)
|
||||
channel_id = getattr(event.source, "chat_id", None)
|
||||
if not channel_id:
|
||||
return
|
||||
await self._remove_reaction(channel_id, ts, "eyes")
|
||||
if outcome == ProcessingOutcome.SUCCESS:
|
||||
await self._add_reaction(channel_id, ts, "white_check_mark")
|
||||
elif outcome == ProcessingOutcome.FAILURE:
|
||||
await self._add_reaction(channel_id, ts, "x")
|
||||
|
||||
# ----- User identity resolution -----
|
||||
|
||||
async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
|
||||
@@ -1268,12 +1206,17 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# Only react when bot is directly addressed (DM or @mention).
|
||||
# In listen-all channels (require_mention=false), reacting to every
|
||||
# casual message would be noisy.
|
||||
_should_react = (is_dm or is_mentioned) and self._reactions_enabled()
|
||||
_should_react = is_dm or is_mentioned
|
||||
|
||||
if _should_react:
|
||||
self._reacting_message_ids.add(ts)
|
||||
await self._add_reaction(channel_id, ts, "eyes")
|
||||
|
||||
await self.handle_message(msg_event)
|
||||
|
||||
if _should_react:
|
||||
await self._remove_reaction(channel_id, ts, "eyes")
|
||||
await self._add_reaction(channel_id, ts, "white_check_mark")
|
||||
|
||||
# ----- Approval button support (Block Kit) -----
|
||||
|
||||
async def send_exec_approval(
|
||||
@@ -1650,9 +1593,11 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
|
||||
"""Download a Slack file using the bot token for auth, with retry."""
|
||||
import asyncio
|
||||
import httpx
|
||||
|
||||
bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
|
||||
last_exc = None
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
for attempt in range(3):
|
||||
@@ -1682,6 +1627,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
from gateway.platforms.base import cache_image_from_bytes
|
||||
return cache_image_from_bytes(response.content, ext)
|
||||
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
|
||||
last_exc = exc
|
||||
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
|
||||
raise
|
||||
if attempt < 2:
|
||||
@@ -1690,12 +1636,15 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
await asyncio.sleep(1.5 * (attempt + 1))
|
||||
continue
|
||||
raise
|
||||
raise last_exc
|
||||
|
||||
async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
|
||||
"""Download a Slack file and return raw bytes, with retry."""
|
||||
import asyncio
|
||||
import httpx
|
||||
|
||||
bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
|
||||
last_exc = None
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
for attempt in range(3):
|
||||
@@ -1707,6 +1656,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
|
||||
last_exc = exc
|
||||
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
|
||||
raise
|
||||
if attempt < 2:
|
||||
@@ -1715,6 +1665,7 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
await asyncio.sleep(1.5 * (attempt + 1))
|
||||
continue
|
||||
raise
|
||||
raise last_exc
|
||||
|
||||
# ── Channel mention gating ─────────────────────────────────────────────
|
||||
|
||||
|
||||
+20
-119
@@ -11,7 +11,6 @@ import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import html as _html
|
||||
import re
|
||||
from typing import Dict, List, Optional, Any
|
||||
@@ -71,10 +70,8 @@ from gateway.platforms.base import (
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
cache_video_from_bytes,
|
||||
cache_document_from_bytes,
|
||||
resolve_proxy_url,
|
||||
SUPPORTED_VIDEO_TYPES,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
utf16_len,
|
||||
_prefix_within_utf16_limit,
|
||||
@@ -496,13 +493,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
|
||||
self.name, name, chat_id,
|
||||
)
|
||||
elif "not a forum" in error_text or "forums_disabled" in error_text:
|
||||
logger.warning(
|
||||
"[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
|
||||
"The user must open the DM with this bot in Telegram, tap the bot name "
|
||||
"at the top, and enable 'Topics' in chat settings before topics can be created.",
|
||||
self.name, name, chat_id,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"[%s] Failed to create DM topic '%s' in chat %s: %s",
|
||||
@@ -544,23 +534,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
break
|
||||
|
||||
if changed:
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
dir=str(config_path.parent),
|
||||
suffix=".tmp",
|
||||
prefix=".config_",
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, config_path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
with open(config_path, "w") as f:
|
||||
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||
logger.info(
|
||||
"[%s] Persisted thread_id=%s for topic '%s' in config.yaml",
|
||||
self.name, thread_id, topic_name,
|
||||
@@ -794,28 +769,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
# Telegram pushes updates to our HTTP endpoint. This
|
||||
# enables cloud platforms (Fly.io, Railway) to auto-wake
|
||||
# suspended machines on inbound HTTP traffic.
|
||||
#
|
||||
# SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
|
||||
# python-telegram-bot passes secret_token=None and the
|
||||
# webhook endpoint accepts any HTTP POST — attackers can
|
||||
# inject forged updates as if from Telegram. Refuse to
|
||||
# start rather than silently run in fail-open mode.
|
||||
# See GHSA-3vpc-7q5r-276h.
|
||||
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
|
||||
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
|
||||
if not webhook_secret:
|
||||
raise RuntimeError(
|
||||
"TELEGRAM_WEBHOOK_SECRET is required when "
|
||||
"TELEGRAM_WEBHOOK_URL is set. Without it, the "
|
||||
"webhook endpoint accepts forged updates from "
|
||||
"anyone who can reach it — see "
|
||||
"https://github.com/NousResearch/hermes-agent/"
|
||||
"security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
|
||||
"Generate a secret and set it in your .env:\n"
|
||||
" export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
|
||||
"Then register it with Telegram when setting the "
|
||||
"webhook via setWebhook's secret_token parameter."
|
||||
)
|
||||
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
|
||||
from urllib.parse import urlparse
|
||||
webhook_path = urlparse(webhook_url).path or "/telegram"
|
||||
|
||||
@@ -1126,8 +1081,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
*,
|
||||
finalize: bool = False,
|
||||
) -> SendResult:
|
||||
"""Edit a previously sent Telegram message."""
|
||||
if not self._bot:
|
||||
@@ -1704,21 +1657,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
except Exception as exc:
|
||||
logger.error("Failed to write update response from callback: %s", exc)
|
||||
|
||||
def _missing_media_path_error(self, label: str, path: str) -> str:
|
||||
"""Build an actionable file-not-found error for gateway MEDIA delivery.
|
||||
|
||||
Paths like /workspace/... or /output/... often only exist inside the
|
||||
Docker sandbox, while the gateway process runs on the host.
|
||||
"""
|
||||
error = f"{label} file not found: {path}"
|
||||
if path.startswith(("/workspace/", "/output/", "/outputs/")):
|
||||
error += (
|
||||
" (path may only exist inside the Docker sandbox. "
|
||||
"Bind-mount a host directory and emit the host-visible "
|
||||
"path in MEDIA: for gateway file delivery.)"
|
||||
)
|
||||
return error
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -1733,8 +1671,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
import os
|
||||
if not os.path.exists(audio_path):
|
||||
return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
|
||||
return SendResult(success=False, error=f"Audio file not found: {audio_path}")
|
||||
|
||||
with open(audio_path, "rb") as audio_file:
|
||||
# .ogg files -> send as voice (round playable bubble)
|
||||
@@ -1781,8 +1720,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
import os
|
||||
if not os.path.exists(image_path):
|
||||
return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))
|
||||
return SendResult(success=False, error=f"Image file not found: {image_path}")
|
||||
|
||||
_thread = self._metadata_thread_id(metadata)
|
||||
with open(image_path, "rb") as image_file:
|
||||
@@ -1819,7 +1759,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
try:
|
||||
if not os.path.exists(file_path):
|
||||
return SendResult(success=False, error=self._missing_media_path_error("File", file_path))
|
||||
return SendResult(success=False, error=f"File not found: {file_path}")
|
||||
|
||||
display_name = file_name or os.path.basename(file_path)
|
||||
_thread = self._metadata_thread_id(metadata)
|
||||
@@ -1853,7 +1793,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
try:
|
||||
if not os.path.exists(video_path):
|
||||
return SendResult(success=False, error=self._missing_media_path_error("Video", video_path))
|
||||
return SendResult(success=False, error=f"Video file not found: {video_path}")
|
||||
|
||||
_thread = self._metadata_thread_id(metadata)
|
||||
with open(video_path, "rb") as f:
|
||||
@@ -2093,7 +2033,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
|
||||
return _ph(f'[{display}]({url})')
|
||||
|
||||
text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)
|
||||
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
|
||||
|
||||
# 4) Convert markdown headers (## Title) → bold *Title*
|
||||
def _convert_header(m):
|
||||
@@ -2301,27 +2241,22 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower()
|
||||
bot_id = getattr(self._bot, "id", None)
|
||||
expected = f"@{bot_username}" if bot_username else None
|
||||
|
||||
def _iter_sources():
|
||||
yield getattr(message, "text", None) or "", getattr(message, "entities", None) or []
|
||||
yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or []
|
||||
|
||||
# Telegram parses mentions server-side and emits MessageEntity objects
|
||||
# (type=mention for @username, type=text_mention for @FirstName targeting
|
||||
# a user without a public username). Only those entities are authoritative —
|
||||
# raw substring matches like "foo@hermes_bot.example" are not mentions
|
||||
# (bug #12545). Entities also correctly handle @handles inside URLs, code
|
||||
# blocks, and quoted text, where a regex scan would over-match.
|
||||
for source_text, entities in _iter_sources():
|
||||
if bot_username and f"@{bot_username}" in source_text.lower():
|
||||
return True
|
||||
for entity in entities:
|
||||
entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower()
|
||||
if entity_type == "mention" and expected:
|
||||
if entity_type == "mention" and bot_username:
|
||||
offset = int(getattr(entity, "offset", -1))
|
||||
length = int(getattr(entity, "length", 0))
|
||||
if offset < 0 or length <= 0:
|
||||
continue
|
||||
if source_text[offset:offset + length].strip().lower() == expected:
|
||||
if source_text[offset:offset + length].strip().lower() == f"@{bot_username}":
|
||||
return True
|
||||
elif entity_type == "text_mention":
|
||||
user = getattr(entity, "user", None)
|
||||
@@ -2353,16 +2288,10 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
DMs remain unrestricted. Group/supergroup messages are accepted when:
|
||||
- the chat is explicitly allowlisted in ``free_response_chats``
|
||||
- ``require_mention`` is disabled
|
||||
- the message is a command
|
||||
- the message replies to the bot
|
||||
- the bot is @mentioned
|
||||
- the text/caption matches a configured regex wake-word pattern
|
||||
|
||||
When ``require_mention`` is enabled, slash commands are not given
|
||||
special treatment — they must pass the same mention/reply checks
|
||||
as any other group message. Users can still trigger commands via
|
||||
the Telegram bot menu (``/command@botname``) or by explicitly
|
||||
mentioning the bot (``@botname /command``), both of which are
|
||||
recognised as mentions by :meth:`_message_mentions_bot`.
|
||||
"""
|
||||
if not self._is_group_chat(message):
|
||||
return True
|
||||
@@ -2377,6 +2306,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return True
|
||||
if not self._telegram_require_mention():
|
||||
return True
|
||||
if is_command:
|
||||
return True
|
||||
if self._is_reply_to_bot(message):
|
||||
return True
|
||||
if self._message_mentions_bot(message):
|
||||
@@ -2659,23 +2590,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)
|
||||
|
||||
elif msg.video:
|
||||
try:
|
||||
file_obj = await msg.video.get_file()
|
||||
video_bytes = await file_obj.download_as_bytearray()
|
||||
ext = ".mp4"
|
||||
if getattr(file_obj, "file_path", None):
|
||||
for candidate in SUPPORTED_VIDEO_TYPES:
|
||||
if file_obj.file_path.lower().endswith(candidate):
|
||||
ext = candidate
|
||||
break
|
||||
cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
|
||||
logger.info("[Telegram] Cached user video at %s", cached_path)
|
||||
except Exception as e:
|
||||
logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
|
||||
|
||||
# Download document files to cache for agent processing
|
||||
elif msg.document:
|
||||
doc = msg.document
|
||||
@@ -2692,21 +2606,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
|
||||
ext = mime_to_ext.get(doc.mime_type, "")
|
||||
|
||||
if not ext and doc.mime_type:
|
||||
video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
|
||||
ext = video_mime_to_ext.get(doc.mime_type, "")
|
||||
|
||||
if ext in SUPPORTED_VIDEO_TYPES:
|
||||
file_obj = await doc.get_file()
|
||||
video_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
|
||||
event.message_type = MessageType.VIDEO
|
||||
logger.info("[Telegram] Cached user video document at %s", cached_path)
|
||||
await self.handle_message(event)
|
||||
return
|
||||
|
||||
# Check if supported
|
||||
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
||||
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
|
||||
@@ -2845,11 +2744,13 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
logger.info("[Telegram] Analyzing sticker at %s", cached_path)
|
||||
|
||||
from tools.vision_tools import vision_analyze_tool
|
||||
import json as _json
|
||||
|
||||
result_json = await vision_analyze_tool(
|
||||
image_url=cached_path,
|
||||
user_prompt=STICKER_VISION_PROMPT,
|
||||
)
|
||||
result = json.loads(result_json)
|
||||
result = _json.loads(result_json)
|
||||
|
||||
if result.get("success"):
|
||||
description = result.get("analysis", "a sticker")
|
||||
|
||||
+12
-115
@@ -13,10 +13,6 @@ Each route defines:
|
||||
- skills: optional list of skills to load for the agent
|
||||
- deliver: where to send the response (github_comment, telegram, etc.)
|
||||
- deliver_extra: additional delivery config (repo, pr_number, chat_id)
|
||||
- deliver_only: if true, skip the agent — the rendered prompt IS the
|
||||
message that gets delivered. Use for external push notifications
|
||||
(Supabase, monitoring alerts, inter-agent pings) where zero LLM cost
|
||||
and sub-second delivery matter more than agent reasoning.
|
||||
|
||||
Security:
|
||||
- HMAC secret is required per route (validated at startup)
|
||||
@@ -126,19 +122,6 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'."
|
||||
)
|
||||
|
||||
# deliver_only routes bypass the agent — the POST body becomes a
|
||||
# direct push notification via the configured delivery target.
|
||||
# Validate up-front so misconfiguration surfaces at startup rather
|
||||
# than on the first webhook POST.
|
||||
if route.get("deliver_only"):
|
||||
deliver = route.get("deliver", "log")
|
||||
if not deliver or deliver == "log":
|
||||
raise ValueError(
|
||||
f"[webhook] Route '{name}' has deliver_only=true but "
|
||||
f"deliver is '{deliver}'. Direct delivery requires a "
|
||||
f"real target (telegram, discord, slack, github_comment, etc.)."
|
||||
)
|
||||
|
||||
app = web.Application()
|
||||
app.router.add_get("/health", self._handle_health)
|
||||
app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
|
||||
@@ -313,14 +296,24 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
{"error": "Payload too large"}, status=413
|
||||
)
|
||||
|
||||
# Read body (must be done before any validation)
|
||||
# ── Rate limiting ────────────────────────────────────────
|
||||
now = time.time()
|
||||
window = self._rate_counts.setdefault(route_name, [])
|
||||
window[:] = [t for t in window if now - t < 60]
|
||||
if len(window) >= self._rate_limit:
|
||||
return web.json_response(
|
||||
{"error": "Rate limit exceeded"}, status=429
|
||||
)
|
||||
window.append(now)
|
||||
|
||||
# Read body
|
||||
try:
|
||||
raw_body = await request.read()
|
||||
except Exception as e:
|
||||
logger.error("[webhook] Failed to read body: %s", e)
|
||||
return web.json_response({"error": "Bad request"}, status=400)
|
||||
|
||||
# Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode)
|
||||
# Validate HMAC signature (skip for INSECURE_NO_AUTH testing mode)
|
||||
secret = route_config.get("secret", self._global_secret)
|
||||
if secret and secret != _INSECURE_NO_AUTH:
|
||||
if not self._validate_signature(request, raw_body, secret):
|
||||
@@ -331,16 +324,6 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
{"error": "Invalid signature"}, status=401
|
||||
)
|
||||
|
||||
# ── Rate limiting (after auth) ───────────────────────────
|
||||
now = time.time()
|
||||
window = self._rate_counts.setdefault(route_name, [])
|
||||
window[:] = [t for t in window if now - t < 60]
|
||||
if len(window) >= self._rate_limit:
|
||||
return web.json_response(
|
||||
{"error": "Rate limit exceeded"}, status=429
|
||||
)
|
||||
window.append(now)
|
||||
|
||||
# Parse payload
|
||||
try:
|
||||
payload = json.loads(raw_body)
|
||||
@@ -436,64 +419,6 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
)
|
||||
self._seen_deliveries[delivery_id] = now
|
||||
|
||||
# ── Direct delivery mode (deliver_only) ─────────────────
|
||||
# Skip the agent entirely — the rendered prompt IS the message we
|
||||
# deliver. Use case: external services (Supabase, monitoring,
|
||||
# cron jobs, other agents) that need to push a plain notification
|
||||
# to a user's chat with zero LLM cost. Reuses the same HMAC auth,
|
||||
# rate limiting, idempotency, and template rendering as agent mode.
|
||||
if route_config.get("deliver_only"):
|
||||
delivery = {
|
||||
"deliver": route_config.get("deliver", "log"),
|
||||
"deliver_extra": self._render_delivery_extra(
|
||||
route_config.get("deliver_extra", {}), payload
|
||||
),
|
||||
"payload": payload,
|
||||
}
|
||||
logger.info(
|
||||
"[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s",
|
||||
event_type,
|
||||
route_name,
|
||||
delivery["deliver"],
|
||||
len(prompt),
|
||||
delivery_id,
|
||||
)
|
||||
try:
|
||||
result = await self._direct_deliver(prompt, delivery)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"[webhook] direct-deliver failed route=%s delivery=%s",
|
||||
route_name,
|
||||
delivery_id,
|
||||
)
|
||||
return web.json_response(
|
||||
{"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
|
||||
status=502,
|
||||
)
|
||||
|
||||
if result.success:
|
||||
return web.json_response(
|
||||
{
|
||||
"status": "delivered",
|
||||
"route": route_name,
|
||||
"target": delivery["deliver"],
|
||||
"delivery_id": delivery_id,
|
||||
},
|
||||
status=200,
|
||||
)
|
||||
# Delivery attempted but target rejected it — surface as 502
|
||||
# with a generic error (don't leak adapter-level detail).
|
||||
logger.warning(
|
||||
"[webhook] direct-deliver target rejected route=%s target=%s error=%s",
|
||||
route_name,
|
||||
delivery["deliver"],
|
||||
result.error,
|
||||
)
|
||||
return web.json_response(
|
||||
{"status": "error", "error": "Delivery failed", "delivery_id": delivery_id},
|
||||
status=502,
|
||||
)
|
||||
|
||||
# Use delivery_id in session key so concurrent webhooks on the
|
||||
# same route get independent agent runs (not queued/interrupted).
|
||||
session_chat_id = f"webhook:{route_name}:{delivery_id}"
|
||||
@@ -647,34 +572,6 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
# Response delivery
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _direct_deliver(
|
||||
self, content: str, delivery: dict
|
||||
) -> SendResult:
|
||||
"""Deliver *content* directly without invoking the agent.
|
||||
|
||||
Used by ``deliver_only`` routes: the rendered template becomes the
|
||||
literal message body, and we dispatch to the same delivery helpers
|
||||
that the agent-mode ``send()`` flow uses. All target types that
|
||||
work in agent mode work here — Telegram, Discord, Slack, GitHub
|
||||
PR comments, etc.
|
||||
"""
|
||||
deliver_type = delivery.get("deliver", "log")
|
||||
|
||||
if deliver_type == "log":
|
||||
# Shouldn't reach here — startup validation rejects deliver_only
|
||||
# with deliver=log — but guard defensively.
|
||||
logger.info("[webhook] direct-deliver log-only: %s", content[:200])
|
||||
return SendResult(success=True)
|
||||
|
||||
if deliver_type == "github_comment":
|
||||
return await self._deliver_github_comment(content, delivery)
|
||||
|
||||
# Fall through to the cross-platform dispatcher, which validates the
|
||||
# target name and routes via the gateway runner.
|
||||
return await self._deliver_cross_platform(
|
||||
deliver_type, content, delivery
|
||||
)
|
||||
|
||||
async def _deliver_github_comment(
|
||||
self, content: str, delivery: dict
|
||||
) -> SendResult:
|
||||
|
||||
+5
-146
@@ -508,11 +508,6 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
self._remember_chat_req_id(chat_id, self._payload_req_id(payload))
|
||||
|
||||
text, reply_text = self._extract_text(body)
|
||||
# Strip leading @mention in group chats so slash commands like
|
||||
# "@BotName /approve" are correctly recognized as "/approve".
|
||||
# Mirrors what the Telegram adapter does (re.sub @botname).
|
||||
if is_group and text:
|
||||
text = re.sub(r"^@\S+\s*", "", text).strip()
|
||||
media_urls, media_types = await self._extract_media(body)
|
||||
message_type = self._derive_message_type(body, text, media_types)
|
||||
has_reply_context = bool(reply_text and (text or media_urls))
|
||||
@@ -629,16 +624,13 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
msgtype = str(body.get("msgtype") or "").lower()
|
||||
|
||||
if msgtype == "mixed":
|
||||
_raw_mixed = body.get("mixed")
|
||||
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
|
||||
_raw_items = mixed.get("msg_item")
|
||||
items = _raw_items if isinstance(_raw_items, list) else []
|
||||
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
|
||||
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if str(item.get("msgtype") or "").lower() == "text":
|
||||
_raw_text = item.get("text")
|
||||
text_block = _raw_text if isinstance(_raw_text, dict) else {}
|
||||
text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
|
||||
content = str(text_block.get("content") or "").strip()
|
||||
if content:
|
||||
text_parts.append(content)
|
||||
@@ -680,10 +672,8 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
msgtype = str(body.get("msgtype") or "").lower()
|
||||
|
||||
if msgtype == "mixed":
|
||||
_raw_mixed = body.get("mixed")
|
||||
mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
|
||||
_raw_items = mixed.get("msg_item")
|
||||
items = _raw_items if isinstance(_raw_items, list) else []
|
||||
mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
|
||||
items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
@@ -1469,134 +1459,3 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
"name": chat_id,
|
||||
"type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# QR code scan flow for obtaining bot credentials
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
|
||||
_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
|
||||
_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
|
||||
_QR_POLL_INTERVAL = 3 # seconds
|
||||
_QR_POLL_TIMEOUT = 300 # 5 minutes
|
||||
|
||||
|
||||
def qr_scan_for_bot_info(
|
||||
*,
|
||||
timeout_seconds: int = _QR_POLL_TIMEOUT,
|
||||
) -> Optional[Dict[str, str]]:
|
||||
"""Run the WeCom QR scan flow to obtain bot_id and secret.
|
||||
|
||||
Fetches a QR code from WeCom, renders it in the terminal, and polls
|
||||
until the user scans it or the timeout expires.
|
||||
|
||||
Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
|
||||
failure or timeout.
|
||||
|
||||
Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
|
||||
used here are not part of WeCom's public developer API — they back the
|
||||
admin-console web UI's bot-creation flow and may change without notice.
|
||||
The same pattern is used by the feishu/dingtalk QR setup wizards.
|
||||
"""
|
||||
try:
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
except ImportError: # pragma: no cover
|
||||
logger.error("urllib is required for WeCom QR scan")
|
||||
return None
|
||||
|
||||
generate_url = f"{_QR_GENERATE_URL}?source=hermes"
|
||||
|
||||
# ── Step 1: Fetch QR code ──
|
||||
print(" Connecting to WeCom...", end="", flush=True)
|
||||
try:
|
||||
req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
raw = json.loads(resp.read().decode("utf-8"))
|
||||
except Exception as exc:
|
||||
logger.error("WeCom QR: failed to fetch QR code: %s", exc)
|
||||
print(f" failed: {exc}")
|
||||
return None
|
||||
|
||||
data = raw.get("data") or {}
|
||||
scode = str(data.get("scode") or "").strip()
|
||||
auth_url = str(data.get("auth_url") or "").strip()
|
||||
|
||||
if not scode or not auth_url:
|
||||
logger.error("WeCom QR: unexpected response format: %s", raw)
|
||||
print(" failed: unexpected response format")
|
||||
return None
|
||||
|
||||
print(" done.")
|
||||
|
||||
# ── Step 2: Render QR code in terminal ──
|
||||
print()
|
||||
qr_rendered = False
|
||||
try:
|
||||
import qrcode as _qrcode
|
||||
qr = _qrcode.QRCode()
|
||||
qr.add_data(auth_url)
|
||||
qr.make(fit=True)
|
||||
qr.print_ascii(invert=True)
|
||||
qr_rendered = True
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
|
||||
if qr_rendered:
|
||||
print(f"\n Scan the QR code above, or open this URL directly:\n {page_url}")
|
||||
else:
|
||||
print(f" Open this URL in WeCom on your phone:\n\n {page_url}\n")
|
||||
print(" Tip: pip install qrcode to display a scannable QR code here next time")
|
||||
print()
|
||||
print(" Fetching configuration results...", end="", flush=True)
|
||||
|
||||
# ── Step 3: Poll for result ──
|
||||
import time
|
||||
deadline = time.time() + timeout_seconds
|
||||
query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
|
||||
poll_count = 0
|
||||
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
result = json.loads(resp.read().decode("utf-8"))
|
||||
except Exception as exc:
|
||||
logger.debug("WeCom QR poll error: %s", exc)
|
||||
time.sleep(_QR_POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
poll_count += 1
|
||||
# Print a dot on every poll so progress is visible within 3s.
|
||||
print(".", end="", flush=True)
|
||||
|
||||
result_data = result.get("data") or {}
|
||||
status = str(result_data.get("status") or "").lower()
|
||||
|
||||
if status == "success":
|
||||
print() # newline after "Fetching configuration results..." dots
|
||||
bot_info = result_data.get("bot_info") or {}
|
||||
bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
|
||||
secret = str(bot_info.get("secret") or "").strip()
|
||||
if bot_id and secret:
|
||||
return {"bot_id": bot_id, "secret": secret}
|
||||
logger.warning(
|
||||
"WeCom QR: scan reported success but bot_info missing or incomplete: %s",
|
||||
result_data,
|
||||
)
|
||||
print(
|
||||
" QR scan reported success but no bot credentials were returned.\n"
|
||||
" This usually means the bot was not actually created on the WeCom side.\n"
|
||||
" Falling back to manual credential entry."
|
||||
)
|
||||
return None
|
||||
|
||||
time.sleep(_QR_POLL_INTERVAL)
|
||||
|
||||
print() # newline after dots
|
||||
print(f" QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
|
||||
return None
|
||||
|
||||
+34
-119
@@ -66,37 +66,6 @@ def _kill_port_process(port: int) -> None:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _terminate_bridge_process(proc, *, force: bool = False) -> None:
|
||||
"""Terminate the bridge process using process-tree semantics where possible."""
|
||||
if _IS_WINDOWS:
|
||||
cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
|
||||
if force:
|
||||
cmd.append("/F")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
if force:
|
||||
proc.kill()
|
||||
else:
|
||||
proc.terminate()
|
||||
return
|
||||
|
||||
if result.returncode != 0:
|
||||
details = (result.stderr or result.stdout or "").strip()
|
||||
raise OSError(details or f"taskkill failed for PID {proc.pid}")
|
||||
return
|
||||
|
||||
import signal
|
||||
|
||||
sig = signal.SIGTERM if not force else signal.SIGKILL
|
||||
os.killpg(os.getpgid(proc.pid), sig)
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
|
||||
@@ -149,10 +118,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
- bridge_script: Path to the Node.js bridge script
|
||||
- bridge_port: Port for HTTP communication (default: 3000)
|
||||
- session_path: Path to store WhatsApp session data
|
||||
- dm_policy: "open" | "allowlist" | "disabled" — how DMs are handled (default: "open")
|
||||
- allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
|
||||
- group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
|
||||
- group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
|
||||
"""
|
||||
|
||||
# WhatsApp message limits — practical UX limit, not protocol max.
|
||||
@@ -175,10 +140,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
|
||||
))
|
||||
self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
|
||||
self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
|
||||
self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
|
||||
self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
|
||||
self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
|
||||
self._mention_patterns = self._compile_mention_patterns()
|
||||
self._message_queue: asyncio.Queue = asyncio.Queue()
|
||||
self._bridge_log_fh = None
|
||||
@@ -202,33 +163,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
@staticmethod
|
||||
def _coerce_allow_list(raw) -> set[str]:
|
||||
"""Parse allow_from / group_allow_from from config or env var."""
|
||||
if raw is None:
|
||||
return set()
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
def _is_dm_allowed(self, sender_id: str) -> bool:
|
||||
"""Check whether a DM from the given sender should be processed."""
|
||||
if self._dm_policy == "disabled":
|
||||
return False
|
||||
if self._dm_policy == "allowlist":
|
||||
return sender_id in self._allow_from
|
||||
# "open" — all DMs allowed
|
||||
return True
|
||||
|
||||
def _is_group_allowed(self, chat_id: str) -> bool:
|
||||
"""Check whether a group chat should be processed."""
|
||||
if self._group_policy == "disabled":
|
||||
return False
|
||||
if self._group_policy == "allowlist":
|
||||
return chat_id in self._group_allow_from
|
||||
# "open" — all groups allowed
|
||||
return True
|
||||
|
||||
def _compile_mention_patterns(self):
|
||||
patterns = self.config.extra.get("mention_patterns")
|
||||
if patterns is None:
|
||||
@@ -321,18 +255,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
return cleaned.strip() or text
|
||||
|
||||
def _should_process_message(self, data: Dict[str, Any]) -> bool:
|
||||
is_group = data.get("isGroup", False)
|
||||
if is_group:
|
||||
chat_id = str(data.get("chatId") or "")
|
||||
if not self._is_group_allowed(chat_id):
|
||||
return False
|
||||
else:
|
||||
sender_id = str(data.get("senderId") or data.get("from") or "")
|
||||
if not self._is_dm_allowed(sender_id):
|
||||
return False
|
||||
# DMs that pass the policy gate are always processed
|
||||
if not data.get("isGroup"):
|
||||
return True
|
||||
# Group messages: check mention / free-response settings
|
||||
chat_id = str(data.get("chatId") or "")
|
||||
if chat_id in self._whatsapp_free_response_chats():
|
||||
return True
|
||||
@@ -365,40 +289,39 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
logger.info("[%s] Bridge found at %s", self.name, bridge_path)
|
||||
|
||||
# Acquire scoped lock to prevent duplicate sessions
|
||||
lock_acquired = False
|
||||
try:
|
||||
if not self._acquire_platform_lock('whatsapp-session', str(self._session_path), 'WhatsApp session'):
|
||||
return False
|
||||
lock_acquired = True
|
||||
except Exception as e:
|
||||
logger.warning("[%s] Could not acquire session lock (non-fatal): %s", self.name, e)
|
||||
|
||||
try:
|
||||
# Auto-install npm dependencies if node_modules doesn't exist
|
||||
bridge_dir = bridge_path.parent
|
||||
if not (bridge_dir / "node_modules").exists():
|
||||
print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
|
||||
try:
|
||||
install_result = subprocess.run(
|
||||
["npm", "install", "--silent"],
|
||||
cwd=str(bridge_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if install_result.returncode != 0:
|
||||
print(f"[{self.name}] npm install failed: {install_result.stderr}")
|
||||
return False
|
||||
print(f"[{self.name}] Dependencies installed")
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to install dependencies: {e}")
|
||||
# Auto-install npm dependencies if node_modules doesn't exist
|
||||
bridge_dir = bridge_path.parent
|
||||
if not (bridge_dir / "node_modules").exists():
|
||||
print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
|
||||
try:
|
||||
install_result = subprocess.run(
|
||||
["npm", "install", "--silent"],
|
||||
cwd=str(bridge_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if install_result.returncode != 0:
|
||||
print(f"[{self.name}] npm install failed: {install_result.stderr}")
|
||||
return False
|
||||
|
||||
print(f"[{self.name}] Dependencies installed")
|
||||
except Exception as e:
|
||||
print(f"[{self.name}] Failed to install dependencies: {e}")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Ensure session directory exists
|
||||
self._session_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Check if bridge is already running and connected
|
||||
import aiohttp
|
||||
import asyncio
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
@@ -529,13 +452,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self._release_platform_lock()
|
||||
logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True)
|
||||
self._close_bridge_log()
|
||||
return False
|
||||
finally:
|
||||
if not self._running:
|
||||
if lock_acquired:
|
||||
self._release_platform_lock()
|
||||
self._close_bridge_log()
|
||||
|
||||
def _close_bridge_log(self) -> None:
|
||||
"""Close the bridge log file handle if open."""
|
||||
@@ -567,14 +487,22 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
|
||||
if self._bridge_process:
|
||||
try:
|
||||
# Kill the entire process group so child node processes die too
|
||||
import signal
|
||||
try:
|
||||
_terminate_bridge_process(self._bridge_process, force=False)
|
||||
if _IS_WINDOWS:
|
||||
self._bridge_process.terminate()
|
||||
else:
|
||||
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
self._bridge_process.terminate()
|
||||
await asyncio.sleep(1)
|
||||
if self._bridge_process.poll() is None:
|
||||
try:
|
||||
_terminate_bridge_process(self._bridge_process, force=True)
|
||||
if _IS_WINDOWS:
|
||||
self._bridge_process.kill()
|
||||
else:
|
||||
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
|
||||
except (ProcessLookupError, PermissionError):
|
||||
self._bridge_process.kill()
|
||||
except Exception as e:
|
||||
@@ -727,8 +655,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
content: str,
|
||||
*,
|
||||
finalize: bool = False,
|
||||
) -> SendResult:
|
||||
"""Edit a previously sent message via the WhatsApp bridge."""
|
||||
if not self._running or not self._http_session:
|
||||
@@ -840,17 +766,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
"""Send a video natively via bridge — plays inline in WhatsApp."""
|
||||
return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
audio_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> SendResult:
|
||||
"""Send an audio file as a WhatsApp voice message via bridge."""
|
||||
return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
||||
+194
-824
File diff suppressed because it is too large
Load Diff
+15
-52
@@ -80,7 +80,7 @@ class SessionSource:
|
||||
user_name: Optional[str] = None
|
||||
thread_id: Optional[str] = None # For forum topics, Discord threads, etc.
|
||||
chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack)
|
||||
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
|
||||
user_id_alt: Optional[str] = None # Signal UUID (alternative to phone number)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
|
||||
|
||||
@@ -152,7 +152,6 @@ class SessionContext:
|
||||
source: SessionSource
|
||||
connected_platforms: List[Platform]
|
||||
home_channels: Dict[Platform, HomeChannel]
|
||||
shared_multi_user_session: bool = False
|
||||
|
||||
# Session metadata
|
||||
session_key: str = ""
|
||||
@@ -167,7 +166,6 @@ class SessionContext:
|
||||
"home_channels": {
|
||||
p.value: hc.to_dict() for p, hc in self.home_channels.items()
|
||||
},
|
||||
"shared_multi_user_session": self.shared_multi_user_session,
|
||||
"session_key": self.session_key,
|
||||
"session_id": self.session_id,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
@@ -242,16 +240,18 @@ def build_session_context_prompt(
|
||||
lines.append(f"**Channel Topic:** {context.source.chat_topic}")
|
||||
|
||||
# User identity.
|
||||
# In shared multi-user sessions (shared threads OR shared non-thread groups
|
||||
# when group_sessions_per_user=False), multiple users contribute to the same
|
||||
# conversation. Don't pin a single user name in the system prompt — it
|
||||
# changes per-turn and would bust the prompt cache. Instead, note that
|
||||
# this is a multi-user session; individual sender names are prefixed on
|
||||
# each user message by the gateway.
|
||||
if context.shared_multi_user_session:
|
||||
session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
|
||||
# In shared thread sessions (non-DM with thread_id), multiple users
|
||||
# contribute to the same conversation. Don't pin a single user name
|
||||
# in the system prompt — it changes per-turn and would bust the prompt
|
||||
# cache. Instead, note that this is a multi-user thread; individual
|
||||
# sender names are prefixed on each user message by the gateway.
|
||||
_is_shared_thread = (
|
||||
context.source.chat_type != "dm"
|
||||
and context.source.thread_id
|
||||
)
|
||||
if _is_shared_thread:
|
||||
lines.append(
|
||||
f"**Session type:** {session_label} — messages are prefixed "
|
||||
"**Session type:** Multi-user thread — messages are prefixed "
|
||||
"with [sender name]. Multiple users may participate."
|
||||
)
|
||||
elif context.source.user_name:
|
||||
@@ -467,27 +467,6 @@ class SessionEntry:
|
||||
)
|
||||
|
||||
|
||||
def is_shared_multi_user_session(
|
||||
source: SessionSource,
|
||||
*,
|
||||
group_sessions_per_user: bool = True,
|
||||
thread_sessions_per_user: bool = False,
|
||||
) -> bool:
|
||||
"""Return True when a non-DM session is shared across participants.
|
||||
|
||||
Mirrors the isolation rules in :func:`build_session_key`:
|
||||
- DMs are never shared.
|
||||
- Threads are shared unless ``thread_sessions_per_user`` is True.
|
||||
- Non-thread group/channel sessions are shared unless
|
||||
``group_sessions_per_user`` is True (default: True = isolated).
|
||||
"""
|
||||
if source.chat_type == "dm":
|
||||
return False
|
||||
if source.thread_id:
|
||||
return not thread_sessions_per_user
|
||||
return not group_sessions_per_user
|
||||
|
||||
|
||||
def build_session_key(
|
||||
source: SessionSource,
|
||||
group_sessions_per_user: bool = True,
|
||||
@@ -947,18 +926,12 @@ class SessionStore:
|
||||
continue
|
||||
# Never prune sessions with an active background process
|
||||
# attached — the user may still be waiting on output.
|
||||
# The callback is keyed by session_key (see process_registry.
|
||||
# has_active_for_session); passing session_id here used to
|
||||
# never match, so active sessions got pruned anyway.
|
||||
if self._has_active_processes_fn is not None:
|
||||
try:
|
||||
if self._has_active_processes_fn(entry.session_key):
|
||||
if self._has_active_processes_fn(entry.session_id):
|
||||
continue
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"has_active_processes_fn raised during prune for %s: %s",
|
||||
entry.session_key, exc,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if entry.updated_at < cutoff:
|
||||
removed_keys.append(key)
|
||||
for key in removed_keys:
|
||||
@@ -1147,10 +1120,6 @@ class SessionStore:
|
||||
tool_name=message.get("tool_name"),
|
||||
tool_calls=message.get("tool_calls"),
|
||||
tool_call_id=message.get("tool_call_id"),
|
||||
reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
|
||||
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
|
||||
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
|
||||
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
@@ -1180,7 +1149,6 @@ class SessionStore:
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
reasoning=msg.get("reasoning") if role == "assistant" else None,
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
)
|
||||
@@ -1264,11 +1232,6 @@ def build_session_context(
|
||||
source=source,
|
||||
connected_platforms=connected,
|
||||
home_channels=home_channels,
|
||||
shared_multi_user_session=is_shared_multi_user_session(
|
||||
source,
|
||||
group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
|
||||
thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
|
||||
),
|
||||
)
|
||||
|
||||
if session_entry:
|
||||
|
||||
@@ -56,12 +56,6 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
|
||||
_SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
|
||||
_SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)
|
||||
|
||||
# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
|
||||
# don't clobber each other's delivery targets.
|
||||
_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
|
||||
_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
|
||||
_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
|
||||
|
||||
_VAR_MAP = {
|
||||
"HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
|
||||
"HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
|
||||
@@ -70,9 +64,6 @@ _VAR_MAP = {
|
||||
"HERMES_SESSION_USER_ID": _SESSION_USER_ID,
|
||||
"HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
|
||||
"HERMES_SESSION_KEY": _SESSION_KEY,
|
||||
"HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
|
||||
"HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
|
||||
"HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
|
||||
}
|
||||
|
||||
|
||||
|
||||
+29
-196
@@ -22,18 +22,11 @@ from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Optional
|
||||
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
else:
|
||||
import fcntl
|
||||
|
||||
_GATEWAY_KIND = "hermes-gateway"
|
||||
_RUNTIME_STATUS_FILE = "gateway_state.json"
|
||||
_LOCKS_DIRNAME = "gateway-locks"
|
||||
_IS_WINDOWS = sys.platform == "win32"
|
||||
_UNSET = object()
|
||||
_GATEWAY_LOCK_FILENAME = "gateway.lock"
|
||||
_gateway_lock_handle = None
|
||||
|
||||
|
||||
def _get_pid_path() -> Path:
|
||||
@@ -42,14 +35,6 @@ def _get_pid_path() -> Path:
|
||||
return home / "gateway.pid"
|
||||
|
||||
|
||||
def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
|
||||
"""Return the path to the runtime gateway lock file."""
|
||||
if pid_path is not None:
|
||||
return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
|
||||
home = get_hermes_home()
|
||||
return home / _GATEWAY_LOCK_FILENAME
|
||||
|
||||
|
||||
def _get_runtime_status_path() -> Path:
|
||||
"""Return the persisted runtime health/status file path."""
|
||||
return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
|
||||
@@ -136,7 +121,6 @@ def _looks_like_gateway_process(pid: int) -> bool:
|
||||
"hermes_cli.main gateway",
|
||||
"hermes_cli/main.py gateway",
|
||||
"hermes gateway",
|
||||
"hermes-gateway",
|
||||
"gateway/run.py",
|
||||
)
|
||||
return any(pattern in cmdline for pattern in patterns)
|
||||
@@ -228,160 +212,21 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
|
||||
return None
|
||||
|
||||
|
||||
def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
|
||||
return _read_pid_record(lock_path or _get_gateway_lock_path())
|
||||
|
||||
|
||||
def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
|
||||
if not record:
|
||||
return None
|
||||
try:
|
||||
return int(record["pid"])
|
||||
except (KeyError, TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
|
||||
"""Delete a stale gateway PID file (and its sibling lock metadata).
|
||||
|
||||
Called from ``get_running_pid()`` after the runtime lock has already been
|
||||
confirmed inactive, so the on-disk metadata is known to belong to a dead
|
||||
process. Unlike ``remove_pid_file()`` (which defensively refuses to delete
|
||||
a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
|
||||
``--replace`` handoffs), this path force-unlinks both files so the next
|
||||
startup sees a clean slate.
|
||||
"""
|
||||
if not cleanup_stale:
|
||||
return
|
||||
try:
|
||||
pid_path.unlink(missing_ok=True)
|
||||
if pid_path == _get_pid_path():
|
||||
remove_pid_file()
|
||||
else:
|
||||
pid_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
_get_gateway_lock_path(pid_path).unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _write_gateway_lock_record(handle) -> None:
|
||||
handle.seek(0)
|
||||
handle.truncate()
|
||||
json.dump(_build_pid_record(), handle)
|
||||
handle.flush()
|
||||
try:
|
||||
os.fsync(handle.fileno())
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _try_acquire_file_lock(handle) -> bool:
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
handle.seek(0, os.SEEK_END)
|
||||
if handle.tell() == 0:
|
||||
handle.write("\n")
|
||||
handle.flush()
|
||||
handle.seek(0)
|
||||
msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
else:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
return True
|
||||
except (BlockingIOError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
def _release_file_lock(handle) -> None:
|
||||
try:
|
||||
if _IS_WINDOWS:
|
||||
handle.seek(0)
|
||||
msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def acquire_gateway_runtime_lock() -> bool:
|
||||
"""Claim the cross-process runtime lock for the gateway.
|
||||
|
||||
Unlike the PID file, the lock is owned by the live process itself. If the
|
||||
process dies abruptly, the OS releases the lock automatically.
|
||||
"""
|
||||
global _gateway_lock_handle
|
||||
if _gateway_lock_handle is not None:
|
||||
return True
|
||||
|
||||
path = _get_gateway_lock_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
handle = open(path, "a+", encoding="utf-8")
|
||||
if not _try_acquire_file_lock(handle):
|
||||
handle.close()
|
||||
return False
|
||||
_write_gateway_lock_record(handle)
|
||||
_gateway_lock_handle = handle
|
||||
return True
|
||||
|
||||
|
||||
def release_gateway_runtime_lock() -> None:
|
||||
"""Release the gateway runtime lock when owned by this process."""
|
||||
global _gateway_lock_handle
|
||||
handle = _gateway_lock_handle
|
||||
if handle is None:
|
||||
return
|
||||
_gateway_lock_handle = None
|
||||
_release_file_lock(handle)
|
||||
try:
|
||||
handle.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
|
||||
"""Return True when some process currently owns the gateway runtime lock."""
|
||||
global _gateway_lock_handle
|
||||
resolved_lock_path = lock_path or _get_gateway_lock_path()
|
||||
if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
|
||||
return True
|
||||
|
||||
if not resolved_lock_path.exists():
|
||||
return False
|
||||
|
||||
handle = open(resolved_lock_path, "a+", encoding="utf-8")
|
||||
try:
|
||||
if _try_acquire_file_lock(handle):
|
||||
_release_file_lock(handle)
|
||||
return False
|
||||
return True
|
||||
finally:
|
||||
try:
|
||||
handle.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def write_pid_file() -> None:
|
||||
"""Write the current process PID and metadata to the gateway PID file.
|
||||
|
||||
Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
|
||||
invocations race: exactly one process wins and the rest get
|
||||
FileExistsError.
|
||||
"""
|
||||
path = _get_pid_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
record = json.dumps(_build_pid_record())
|
||||
try:
|
||||
fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
||||
except FileExistsError:
|
||||
raise # Let caller decide: another gateway is racing us
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
f.write(record)
|
||||
except Exception:
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
"""Write the current process PID and metadata to the gateway PID file."""
|
||||
_write_json_file(_get_pid_path(), _build_pid_record())
|
||||
|
||||
|
||||
def write_runtime_status(
|
||||
@@ -496,8 +341,7 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
|
||||
if not stale:
|
||||
try:
|
||||
os.kill(existing_pid, 0)
|
||||
except (ProcessLookupError, PermissionError, OSError):
|
||||
# Windows raises OSError with WinError 87 for invalid pid check
|
||||
except (ProcessLookupError, PermissionError):
|
||||
stale = True
|
||||
else:
|
||||
current_start = _get_process_start_time(existing_pid)
|
||||
@@ -719,46 +563,35 @@ def get_running_pid(
|
||||
Cleans up stale PID files automatically.
|
||||
"""
|
||||
resolved_pid_path = pid_path or _get_pid_path()
|
||||
resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
|
||||
lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
|
||||
if not lock_active:
|
||||
record = _read_pid_record(resolved_pid_path)
|
||||
if not record:
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
primary_record = _read_pid_record(resolved_pid_path)
|
||||
fallback_record = _read_gateway_lock_record(resolved_lock_path)
|
||||
try:
|
||||
pid = int(record["pid"])
|
||||
except (KeyError, TypeError, ValueError):
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
for record in (primary_record, fallback_record):
|
||||
pid = _pid_from_record(record)
|
||||
if pid is None:
|
||||
continue
|
||||
try:
|
||||
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
|
||||
except (ProcessLookupError, PermissionError):
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
try:
|
||||
os.kill(pid, 0) # signal 0 = existence check, no actual signal sent
|
||||
except ProcessLookupError:
|
||||
continue
|
||||
except PermissionError:
|
||||
# The process exists but belongs to another user/service scope.
|
||||
# With the runtime lock still held, prefer keeping it visible
|
||||
# rather than deleting the PID file as "stale".
|
||||
if _record_looks_like_gateway(record):
|
||||
return pid
|
||||
continue
|
||||
except OSError:
|
||||
# Windows raises OSError with WinError 87 for an invalid pid
|
||||
# (process is definitely gone). Treat as "process doesn't exist".
|
||||
continue
|
||||
recorded_start = record.get("start_time")
|
||||
current_start = _get_process_start_time(pid)
|
||||
if recorded_start is not None and current_start is not None and current_start != recorded_start:
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
recorded_start = record.get("start_time")
|
||||
current_start = _get_process_start_time(pid)
|
||||
if recorded_start is not None and current_start is not None and current_start != recorded_start:
|
||||
continue
|
||||
if not _looks_like_gateway_process(pid):
|
||||
if not _record_looks_like_gateway(record):
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
|
||||
if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
|
||||
return pid
|
||||
|
||||
_cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
|
||||
return None
|
||||
return pid
|
||||
|
||||
|
||||
def is_gateway_running(
|
||||
|
||||
@@ -571,30 +571,6 @@ class GatewayStreamConsumer:
|
||||
if final_text.strip() and final_text != self._visible_prefix():
|
||||
continuation = final_text
|
||||
else:
|
||||
# Defence-in-depth for #7183: the last edit may still show the
|
||||
# cursor character because fallback mode was entered after an
|
||||
# edit failure left it stuck. Try one final edit to strip it
|
||||
# so the message doesn't freeze with a visible ▉. Best-effort
|
||||
# — if this edit also fails (flood control still active),
|
||||
# _try_strip_cursor has already been called on fallback entry
|
||||
# and the adaptive-backoff retries will have had their shot.
|
||||
if (
|
||||
self._message_id
|
||||
and self._last_sent_text
|
||||
and self.cfg.cursor
|
||||
and self._last_sent_text.endswith(self.cfg.cursor)
|
||||
):
|
||||
clean_text = self._last_sent_text[:-len(self.cfg.cursor)]
|
||||
try:
|
||||
result = await self.adapter.edit_message(
|
||||
chat_id=self.chat_id,
|
||||
message_id=self._message_id,
|
||||
content=clean_text,
|
||||
)
|
||||
if result.success:
|
||||
self._last_sent_text = clean_text
|
||||
except Exception:
|
||||
pass
|
||||
self._already_sent = True
|
||||
self._final_response_sent = True
|
||||
return
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
"""Hermes Agent — The self-improving AI agent."""
|
||||
@@ -1,5 +0,0 @@
|
||||
"""Allow running the ACP adapter as ``python -m hermes_agent.acp``."""
|
||||
|
||||
from hermes_agent.acp.entry import main
|
||||
|
||||
main()
|
||||
+11
-53
@@ -20,7 +20,6 @@ import logging
|
||||
import os
|
||||
import shutil
|
||||
import shlex
|
||||
import ssl
|
||||
import stat
|
||||
import base64
|
||||
import hashlib
|
||||
@@ -72,8 +71,6 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
|
||||
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
|
||||
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
|
||||
DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
|
||||
STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
|
||||
STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
|
||||
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
|
||||
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
@@ -154,7 +151,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
id="gemini",
|
||||
name="Google AI Studio",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://generativelanguage.googleapis.com/v1beta",
|
||||
inference_base_url="https://generativelanguage.googleapis.com/v1beta/openai",
|
||||
api_key_env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
|
||||
base_url_env_var="GEMINI_BASE_URL",
|
||||
),
|
||||
@@ -170,11 +167,8 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
id="kimi-coding",
|
||||
name="Kimi / Moonshot",
|
||||
auth_type="api_key",
|
||||
# Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
|
||||
# sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
|
||||
# by _resolve_kimi_base_url() below.
|
||||
inference_base_url="https://api.moonshot.ai/v1",
|
||||
api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
|
||||
api_key_env_vars=("KIMI_API_KEY",),
|
||||
base_url_env_var="KIMI_BASE_URL",
|
||||
),
|
||||
"kimi-coding-cn": ProviderConfig(
|
||||
@@ -184,14 +178,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
inference_base_url="https://api.moonshot.cn/v1",
|
||||
api_key_env_vars=("KIMI_CN_API_KEY",),
|
||||
),
|
||||
"stepfun": ProviderConfig(
|
||||
id="stepfun",
|
||||
name="StepFun Step Plan",
|
||||
auth_type="api_key",
|
||||
inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
|
||||
api_key_env_vars=("STEPFUN_API_KEY",),
|
||||
base_url_env_var="STEPFUN_BASE_URL",
|
||||
),
|
||||
"arcee": ProviderConfig(
|
||||
id="arcee",
|
||||
name="Arcee AI",
|
||||
@@ -214,7 +200,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://api.anthropic.com",
|
||||
api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
|
||||
base_url_env_var="ANTHROPIC_BASE_URL",
|
||||
),
|
||||
"alibaba": ProviderConfig(
|
||||
id="alibaba",
|
||||
@@ -354,16 +339,10 @@ def get_anthropic_key() -> str:
|
||||
# =============================================================================
|
||||
|
||||
# Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
|
||||
# on api.kimi.com/coding. Legacy keys from platform.moonshot.ai work on
|
||||
# api.moonshot.ai/v1 (the old default). Auto-detect when user hasn't set
|
||||
# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on
|
||||
# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set
|
||||
# KIMI_BASE_URL explicitly.
|
||||
#
|
||||
# Note: the base URL intentionally has NO /v1 suffix. The /coding endpoint
|
||||
# speaks the Anthropic Messages protocol, and the anthropic SDK appends
|
||||
# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
|
||||
# (the correct target). Using "/coding/v1" here would produce
|
||||
# "/coding/v1/v1/messages" (a 404).
|
||||
KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"
|
||||
KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
|
||||
|
||||
|
||||
def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
|
||||
@@ -374,9 +353,6 @@ def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) ->
|
||||
"""
|
||||
if env_override:
|
||||
return env_override
|
||||
# No key → nothing to infer from. Return default without inspecting.
|
||||
if not api_key:
|
||||
return default_url
|
||||
if api_key.startswith("sk-kimi-"):
|
||||
return KIMI_CODE_BASE_URL
|
||||
return default_url
|
||||
@@ -504,14 +480,6 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) ->
|
||||
if env_override:
|
||||
return env_override
|
||||
|
||||
# No API key set → don't probe (would fire N×M HTTPS requests with an
|
||||
# empty Bearer token, all returning 401). This path is hit during
|
||||
# auxiliary-client auto-detection when the user has no Z.AI credentials
|
||||
# at all — the caller discards the result immediately, so the probe is
|
||||
# pure latency for every AIAgent construction.
|
||||
if not api_key:
|
||||
return default_url
|
||||
|
||||
# Check provider-state cache for a previously-detected endpoint.
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "zai") or {}
|
||||
@@ -1003,7 +971,6 @@ def resolve_provider(
|
||||
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
|
||||
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
|
||||
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
|
||||
"step": "stepfun", "stepfun-coding-plan": "stepfun",
|
||||
"arcee-ai": "arcee", "arceeai": "arcee",
|
||||
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic", "claude-code": "anthropic",
|
||||
@@ -1685,7 +1652,7 @@ def _resolve_verify(
|
||||
insecure: Optional[bool] = None,
|
||||
ca_bundle: Optional[str] = None,
|
||||
auth_state: Optional[Dict[str, Any]] = None,
|
||||
) -> bool | ssl.SSLContext:
|
||||
) -> bool | str:
|
||||
tls_state = auth_state.get("tls") if isinstance(auth_state, dict) else {}
|
||||
tls_state = tls_state if isinstance(tls_state, dict) else {}
|
||||
|
||||
@@ -1705,12 +1672,13 @@ def _resolve_verify(
|
||||
if effective_ca:
|
||||
ca_path = str(effective_ca)
|
||||
if not os.path.isfile(ca_path):
|
||||
logger.warning(
|
||||
import logging
|
||||
logging.getLogger("hermes.auth").warning(
|
||||
"CA bundle path does not exist: %s — falling back to default certificates",
|
||||
ca_path,
|
||||
)
|
||||
return True
|
||||
return ssl.create_default_context(cafile=ca_path)
|
||||
return ca_path
|
||||
return True
|
||||
|
||||
|
||||
@@ -2753,17 +2721,6 @@ def _update_config_for_provider(
|
||||
# Clear stale base_url to prevent contamination when switching providers
|
||||
model_cfg.pop("base_url", None)
|
||||
|
||||
# Clear stale api_key/api_mode left over from a previous custom provider.
|
||||
# When the user switches from e.g. a MiniMax custom endpoint
|
||||
# (api_mode=anthropic_messages, api_key=mxp-...) to a built-in provider
|
||||
# (e.g. OpenRouter), the stale api_key/api_mode would override the new
|
||||
# provider's credentials and transport choice. Built-in providers that
|
||||
# need a specific api_mode (copilot, xai) set it at request-resolution
|
||||
# time via `_copilot_runtime_api_mode` / `_detect_api_mode_for_url`, so
|
||||
# removing the persisted value here is safe.
|
||||
model_cfg.pop("api_key", None)
|
||||
model_cfg.pop("api_mode", None)
|
||||
|
||||
# When switching to a non-OpenRouter provider, ensure model.default is
|
||||
# valid for the new provider. An OpenRouter-formatted name like
|
||||
# "anthropic/claude-opus-4.6" will fail on direct-API providers.
|
||||
@@ -3396,7 +3353,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
)
|
||||
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS, get_pricing_for_provider,
|
||||
_PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
)
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
@@ -3405,6 +3362,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
unavailable_models: list = []
|
||||
if model_ids:
|
||||
pricing = get_pricing_for_provider("nous")
|
||||
model_ids = filter_nous_free_models(model_ids, pricing)
|
||||
free_tier = check_nous_free_tier()
|
||||
if free_tier:
|
||||
model_ids, unavailable_models = partition_nous_models_by_tier(
|
||||
|
||||
+63
-37
@@ -152,23 +152,6 @@ def auth_add_command(args) -> None:
|
||||
|
||||
pool = load_pool(provider)
|
||||
|
||||
# Clear ALL suppressions for this provider — re-adding a credential is
|
||||
# a strong signal the user wants auth re-enabled. This covers env:*
|
||||
# (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
|
||||
# device_code (codex), etc. One consistent re-engagement pattern.
|
||||
# Matches the Codex device_code re-link pattern that predates this.
|
||||
if not provider.startswith(CUSTOM_POOL_PREFIX):
|
||||
try:
|
||||
from hermes_cli.auth import (
|
||||
_load_auth_store,
|
||||
unsuppress_credential_source,
|
||||
)
|
||||
suppressed = _load_auth_store().get("suppressed_sources", {})
|
||||
for src in list(suppressed.get(provider, []) or []):
|
||||
unsuppress_credential_source(provider, src)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if requested_type == AUTH_TYPE_API_KEY:
|
||||
token = (getattr(args, "api_key", None) or "").strip()
|
||||
if not token:
|
||||
@@ -355,28 +338,71 @@ def auth_remove_command(args) -> None:
|
||||
raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
|
||||
print(f"Removed {provider} credential #{index} ({removed.label})")
|
||||
|
||||
# Unified removal dispatch. Every credential source Hermes reads from
|
||||
# (env vars, external OAuth files, auth.json blocks, custom config)
|
||||
# has a RemovalStep registered in agent.credential_sources. The step
|
||||
# handles its source-specific cleanup and we centralise suppression +
|
||||
# user-facing output here so every source behaves identically from
|
||||
# the user's perspective.
|
||||
from agent.credential_sources import find_removal_step
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
# If this was an env-seeded credential, also clear the env var from .env
|
||||
# so it doesn't get re-seeded on the next load_pool() call.
|
||||
if removed.source.startswith("env:"):
|
||||
env_var = removed.source[len("env:"):]
|
||||
if env_var:
|
||||
from hermes_cli.config import remove_env_value
|
||||
cleared = remove_env_value(env_var)
|
||||
if cleared:
|
||||
print(f"Cleared {env_var} from .env")
|
||||
|
||||
step = find_removal_step(provider, removed.source)
|
||||
if step is None:
|
||||
# Unregistered source — e.g. "manual", which has nothing external
|
||||
# to clean up. The pool entry is already gone; we're done.
|
||||
return
|
||||
# If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
|
||||
# clear the underlying auth store / credential file so it doesn't get
|
||||
# re-seeded on the next load_pool() call.
|
||||
elif provider == "openai-codex" and (
|
||||
removed.source == "device_code" or removed.source.endswith(":device_code")
|
||||
):
|
||||
# Codex tokens live in TWO places: the Hermes auth store and
|
||||
# ~/.codex/auth.json (the Codex CLI shared file). On every refresh,
|
||||
# refresh_codex_oauth_pure() writes to both. So clearing only the
|
||||
# Hermes auth store is not enough — _seed_from_singletons() will
|
||||
# auto-import from ~/.codex/auth.json on the next load_pool() and
|
||||
# the removal is instantly undone. Mark the source as suppressed
|
||||
# so auto-import is skipped; leave ~/.codex/auth.json untouched so
|
||||
# the Codex CLI itself keeps working.
|
||||
from hermes_cli.auth import (
|
||||
_load_auth_store, _save_auth_store, _auth_store_lock,
|
||||
suppress_credential_source,
|
||||
)
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
providers_dict = auth_store.get("providers")
|
||||
if isinstance(providers_dict, dict) and provider in providers_dict:
|
||||
del providers_dict[provider]
|
||||
_save_auth_store(auth_store)
|
||||
print(f"Cleared {provider} OAuth tokens from auth store")
|
||||
suppress_credential_source(provider, "device_code")
|
||||
print("Suppressed openai-codex device_code source — it will not be re-seeded.")
|
||||
print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
|
||||
print("Run `hermes auth add openai-codex` to re-enable if needed.")
|
||||
|
||||
result = step.remove_fn(provider, removed)
|
||||
for line in result.cleaned:
|
||||
print(line)
|
||||
if result.suppress:
|
||||
suppress_credential_source(provider, removed.source)
|
||||
for line in result.hints:
|
||||
print(line)
|
||||
elif removed.source == "device_code" and provider == "nous":
|
||||
from hermes_cli.auth import (
|
||||
_load_auth_store, _save_auth_store, _auth_store_lock,
|
||||
)
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
providers_dict = auth_store.get("providers")
|
||||
if isinstance(providers_dict, dict) and provider in providers_dict:
|
||||
del providers_dict[provider]
|
||||
_save_auth_store(auth_store)
|
||||
print(f"Cleared {provider} OAuth tokens from auth store")
|
||||
|
||||
elif removed.source == "hermes_pkce" and provider == "anthropic":
|
||||
from hermes_constants import get_hermes_home
|
||||
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
|
||||
if oauth_file.exists():
|
||||
oauth_file.unlink()
|
||||
print("Cleared Hermes Anthropic OAuth credentials")
|
||||
|
||||
elif removed.source == "claude_code" and provider == "anthropic":
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
suppress_credential_source(provider, "claude_code")
|
||||
print("Suppressed claude_code credential — it will not be re-seeded.")
|
||||
print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
|
||||
print("Run `hermes auth add anthropic` to re-enable if needed.")
|
||||
|
||||
|
||||
def auth_reset_command(args) -> None:
|
||||
|
||||
@@ -201,7 +201,7 @@ def run_backup(args) -> None:
|
||||
else:
|
||||
zf.write(abs_path, arcname=str(rel_path))
|
||||
total_bytes += abs_path.stat().st_size
|
||||
except (PermissionError, OSError, ValueError) as exc:
|
||||
except (PermissionError, OSError) as exc:
|
||||
errors.append(f" {rel_path}: {exc}")
|
||||
continue
|
||||
|
||||
|
||||
+1
-1
@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
|
||||
state_path = child / state_name
|
||||
if state_path.exists():
|
||||
kind = "directory" if state_path.is_dir() else "file"
|
||||
rel = state_path.relative_to(source_dir).as_posix()
|
||||
rel = state_path.relative_to(source_dir)
|
||||
findings.append((state_path, f"Workspace {kind}: {rel}"))
|
||||
|
||||
return findings
|
||||
|
||||
@@ -24,6 +24,7 @@ _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
|
||||
("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
("gpt-5.3-codex", ("gpt-5.2-codex",)),
|
||||
("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")),
|
||||
]
|
||||
|
||||
|
||||
|
||||
+11
-91
@@ -260,26 +260,6 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
|
||||
)
|
||||
|
||||
|
||||
def is_gateway_known_command(name: str | None) -> bool:
|
||||
"""Return True if ``name`` resolves to a gateway-dispatchable slash command.
|
||||
|
||||
This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
|
||||
from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
|
||||
looked up lazily so importing this module never forces plugin
|
||||
discovery. Gateway code uses this to decide whether to emit
|
||||
``command:<name>`` hooks — plugin commands get the same lifecycle
|
||||
events as built-ins.
|
||||
"""
|
||||
if not name:
|
||||
return False
|
||||
if name in GATEWAY_KNOWN_COMMANDS:
|
||||
return True
|
||||
for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
|
||||
if plugin_name == name:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# Commands with explicit Level-2 running-agent handlers in gateway/run.py.
|
||||
# Listed here for introspection / tests; semantically a subset of
|
||||
# "all resolvable commands" — which is the real bypass set (see
|
||||
@@ -391,47 +371,12 @@ def gateway_help_lines() -> list[str]:
|
||||
return lines
|
||||
|
||||
|
||||
def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
|
||||
"""Yield (name, description, args_hint) tuples for all plugin slash commands.
|
||||
|
||||
Plugin commands are registered via
|
||||
:func:`hermes_cli.plugins.PluginContext.register_command`. They behave
|
||||
like ``CommandDef`` entries for gateway surfacing: they appear in the
|
||||
Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
|
||||
(via :func:`gateway.platforms.discord._register_slash_commands`) in
|
||||
Discord's native slash command picker.
|
||||
|
||||
Lookup is lazy so importing this module never forces plugin discovery
|
||||
(which can trigger filesystem scans and environment-dependent
|
||||
behavior).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.plugins import get_plugin_commands
|
||||
except Exception:
|
||||
return []
|
||||
try:
|
||||
commands = get_plugin_commands() or {}
|
||||
except Exception:
|
||||
return []
|
||||
entries: list[tuple[str, str, str]] = []
|
||||
for name, meta in commands.items():
|
||||
if not isinstance(name, str) or not isinstance(meta, dict):
|
||||
continue
|
||||
description = str(meta.get("description") or f"Run /{name}")
|
||||
args_hint = str(meta.get("args_hint") or "").strip()
|
||||
entries.append((name, description, args_hint))
|
||||
return entries
|
||||
|
||||
|
||||
def telegram_bot_commands() -> list[tuple[str, str]]:
|
||||
"""Return (command_name, description) pairs for Telegram setMyCommands.
|
||||
|
||||
Telegram command names cannot contain hyphens, so they are replaced with
|
||||
underscores. Aliases are skipped -- Telegram shows one menu entry per
|
||||
canonical command.
|
||||
|
||||
Plugin-registered slash commands are included so plugins get native
|
||||
autocomplete in Telegram without touching core code.
|
||||
"""
|
||||
overrides = _resolve_config_gates()
|
||||
result: list[tuple[str, str]] = []
|
||||
@@ -441,10 +386,6 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
|
||||
tg_name = _sanitize_telegram_name(cmd.name)
|
||||
if tg_name:
|
||||
result.append((tg_name, cmd.description))
|
||||
for name, description, _args_hint in _iter_plugin_command_entries():
|
||||
tg_name = _sanitize_telegram_name(name)
|
||||
if tg_name:
|
||||
result.append((tg_name, description))
|
||||
return result
|
||||
|
||||
|
||||
@@ -556,8 +497,9 @@ def _collect_gateway_skill_entries(
|
||||
# --- Tier 1: Plugin slash commands (never trimmed) ---------------------
|
||||
plugin_pairs: list[tuple[str, str]] = []
|
||||
try:
|
||||
from hermes_cli.plugins import get_plugin_commands
|
||||
plugin_cmds = get_plugin_commands()
|
||||
from hermes_cli.plugins import get_plugin_manager
|
||||
pm = get_plugin_manager()
|
||||
plugin_cmds = getattr(pm, "_plugin_commands", {})
|
||||
for cmd_name in sorted(plugin_cmds):
|
||||
name = sanitize_name(cmd_name) if sanitize_name else cmd_name
|
||||
if not name:
|
||||
@@ -809,9 +751,6 @@ def slack_subcommand_map() -> dict[str, str]:
|
||||
|
||||
Maps both canonical names and aliases so /hermes bg do stuff works
|
||||
the same as /hermes background do stuff.
|
||||
|
||||
Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
|
||||
routes through the plugin handler.
|
||||
"""
|
||||
overrides = _resolve_config_gates()
|
||||
mapping: dict[str, str] = {}
|
||||
@@ -821,9 +760,6 @@ def slack_subcommand_map() -> dict[str, str]:
|
||||
mapping[cmd.name] = f"/{cmd.name}"
|
||||
for alias in cmd.aliases:
|
||||
mapping[alias] = f"/{alias}"
|
||||
for name, _description, _args_hint in _iter_plugin_command_entries():
|
||||
if name not in mapping:
|
||||
mapping[name] = f"/{name}"
|
||||
return mapping
|
||||
|
||||
|
||||
@@ -989,22 +925,12 @@ class SlashCommandCompleter(Completer):
|
||||
display_meta=meta,
|
||||
)
|
||||
|
||||
# If the user typed @file: / @folder: (or just @file / @folder with
|
||||
# no colon yet), delegate to path completions. Accepting the bare
|
||||
# form lets the picker surface directories as soon as the user has
|
||||
# typed `@folder`, without requiring them to first accept the static
|
||||
# `@folder:` hint and re-trigger completion.
|
||||
# If the user typed @file: or @folder:, delegate to path completions
|
||||
for prefix in ("@file:", "@folder:"):
|
||||
bare = prefix[:-1]
|
||||
|
||||
if word == bare or word.startswith(prefix):
|
||||
want_dir = prefix == "@folder:"
|
||||
path_part = '' if word == bare else word[len(prefix):]
|
||||
if word.startswith(prefix):
|
||||
path_part = word[len(prefix):] or "."
|
||||
expanded = os.path.expanduser(path_part)
|
||||
|
||||
if not expanded or expanded == ".":
|
||||
search_dir, match_prefix = ".", ""
|
||||
elif expanded.endswith("/"):
|
||||
if expanded.endswith("/"):
|
||||
search_dir, match_prefix = expanded, ""
|
||||
else:
|
||||
search_dir = os.path.dirname(expanded) or "."
|
||||
@@ -1020,21 +946,15 @@ class SlashCommandCompleter(Completer):
|
||||
for entry in sorted(entries):
|
||||
if match_prefix and not entry.lower().startswith(prefix_lower):
|
||||
continue
|
||||
full_path = os.path.join(search_dir, entry)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
# `@folder:` must only surface directories; `@file:` only
|
||||
# regular files. Without this filter `@folder:` listed
|
||||
# every .env / .gitignore in the cwd, defeating the
|
||||
# explicit prefix and confusing users expecting a
|
||||
# directory picker.
|
||||
if want_dir != is_dir:
|
||||
continue
|
||||
if count >= limit:
|
||||
break
|
||||
full_path = os.path.join(search_dir, entry)
|
||||
is_dir = os.path.isdir(full_path)
|
||||
display_path = os.path.relpath(full_path)
|
||||
suffix = "/" if is_dir else ""
|
||||
kind = "folder" if is_dir else "file"
|
||||
meta = "dir" if is_dir else _file_size_label(full_path)
|
||||
completion = f"{prefix}{display_path}{suffix}"
|
||||
completion = f"@{kind}:{display_path}{suffix}"
|
||||
yield Completion(
|
||||
completion,
|
||||
start_position=-len(word),
|
||||
|
||||
+42
-287
@@ -13,7 +13,6 @@ This module provides:
|
||||
"""
|
||||
|
||||
import copy
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
@@ -25,7 +24,6 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
||||
@@ -387,32 +385,6 @@ DEFAULT_CONFIG = {
|
||||
# (terminal and execute_code). Skill-declared required_environment_variables
|
||||
# are passed through automatically; this list is for non-skill use cases.
|
||||
"env_passthrough": [],
|
||||
# Extra files to source in the login shell when building the
|
||||
# per-session environment snapshot. Use this when tools like nvm,
|
||||
# pyenv, asdf, or custom PATH entries are registered by files that
|
||||
# a bash login shell would skip — most commonly ``~/.bashrc``
|
||||
# (bash doesn't source bashrc in non-interactive login mode) or
|
||||
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
|
||||
# Paths support ``~`` / ``${VAR}``. Missing files are silently
|
||||
# skipped. When empty, Hermes auto-sources ``~/.profile``,
|
||||
# ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
|
||||
# snapshot shell is bash (this is the ``auto_source_bashrc``
|
||||
# behaviour — disable with that key if you want strict login-only
|
||||
# semantics).
|
||||
"shell_init_files": [],
|
||||
# When true (default), Hermes sources the user's shell rc files
|
||||
# (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
|
||||
# login shell used to build the environment snapshot. This
|
||||
# captures PATH additions, shell functions, and aliases — which a
|
||||
# plain ``bash -l -c`` would otherwise miss because bash skips
|
||||
# bashrc in non-interactive login mode, and because a default
|
||||
# Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
|
||||
# sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
|
||||
# because ``n`` / ``nvm`` / ``asdf`` installers typically write
|
||||
# their PATH exports there without an interactivity guard. Turn
|
||||
# this off if your rc files misbehave when sourced
|
||||
# non-interactively (e.g. one that hard-exits on TTY checks).
|
||||
"auto_source_bashrc": True,
|
||||
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
"docker_forward_env": [],
|
||||
# Explicit environment variables to set inside Docker containers.
|
||||
@@ -431,11 +403,7 @@ DEFAULT_CONFIG = {
|
||||
"container_persistent": True, # Persist filesystem across sessions
|
||||
# Docker volume mounts — share host directories with the container.
|
||||
# Each entry is "host_path:container_path" (standard Docker -v syntax).
|
||||
# Example:
|
||||
# ["/home/user/projects:/workspace/projects",
|
||||
# "/home/user/.hermes/cache/documents:/output"]
|
||||
# For gateway MEDIA delivery, write inside Docker to /output/... and emit
|
||||
# the host-visible path in MEDIA:, not the container path.
|
||||
# Example: ["/home/user/projects:/workspace/projects", "/data:/data"]
|
||||
"docker_volumes": [],
|
||||
# Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
|
||||
# Default off because passing host directories into a sandbox weakens isolation.
|
||||
@@ -502,6 +470,13 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
},
|
||||
|
||||
"smart_model_routing": {
|
||||
"enabled": False,
|
||||
"max_simple_chars": 160,
|
||||
"max_simple_words": 28,
|
||||
"cheap_model": {},
|
||||
},
|
||||
|
||||
# Auxiliary model config — provider:model for each side task.
|
||||
# Format: provider is the provider name, model is the model slug.
|
||||
# "auto" for provider = auto-detect best available provider.
|
||||
@@ -515,7 +490,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
|
||||
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
|
||||
"timeout": 120, # seconds — LLM API call timeout; vision payloads need generous timeout
|
||||
"extra_body": {}, # OpenAI-compatible provider-specific request fields
|
||||
"download_timeout": 30, # seconds — image HTTP download timeout; increase for slow connections
|
||||
},
|
||||
"web_extract": {
|
||||
@@ -524,7 +498,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 360, # seconds (6min) — per-attempt LLM summarization timeout; increase for slow local models
|
||||
"extra_body": {},
|
||||
},
|
||||
"compression": {
|
||||
"provider": "auto",
|
||||
@@ -532,7 +505,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 120, # seconds — compression summarises large contexts; increase for local models
|
||||
"extra_body": {},
|
||||
},
|
||||
"session_search": {
|
||||
"provider": "auto",
|
||||
@@ -540,8 +512,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
"max_concurrency": 3, # Clamp parallel summaries to avoid request-burst 429s on small providers
|
||||
},
|
||||
"skills_hub": {
|
||||
"provider": "auto",
|
||||
@@ -549,7 +519,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"approval": {
|
||||
"provider": "auto",
|
||||
@@ -557,7 +526,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"mcp": {
|
||||
"provider": "auto",
|
||||
@@ -565,7 +533,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"flush_memories": {
|
||||
"provider": "auto",
|
||||
@@ -573,7 +540,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"title_generation": {
|
||||
"provider": "auto",
|
||||
@@ -581,7 +547,6 @@ DEFAULT_CONFIG = {
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -593,14 +558,9 @@ DEFAULT_CONFIG = {
|
||||
"bell_on_complete": False,
|
||||
"show_reasoning": False,
|
||||
"streaming": False,
|
||||
"final_response_markdown": "strip", # render | strip | raw
|
||||
"inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage)
|
||||
"show_cost": False, # Show $ cost in the status bar (off by default)
|
||||
"skin": "default",
|
||||
"user_message_preview": { # CLI: how many submitted user-message lines to echo back in scrollback
|
||||
"first_lines": 2,
|
||||
"last_lines": 2,
|
||||
},
|
||||
"interim_assistant_messages": True, # Gateway: show natural mid-turn assistant status messages
|
||||
"tool_progress_command": False, # Enable /verbose command in messaging gateway
|
||||
"tool_progress_overrides": {}, # DEPRECATED — use display.platforms instead
|
||||
@@ -619,10 +579,6 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
|
||||
# Text-to-speech configuration
|
||||
# Each provider supports an optional `max_text_length:` override for the
|
||||
# per-request input-character cap. Omit it to use the provider's documented
|
||||
# limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
|
||||
# Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
|
||||
"tts": {
|
||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
|
||||
"edge": {
|
||||
@@ -675,7 +631,6 @@ DEFAULT_CONFIG = {
|
||||
"record_key": "ctrl+b",
|
||||
"max_recording_seconds": 120,
|
||||
"auto_tts": False,
|
||||
"beep_enabled": True, # Play record start/stop beeps in CLI voice mode
|
||||
"silence_threshold": 200, # RMS below this = silence (0-32767)
|
||||
"silence_duration": 3.0, # Seconds of silence before auto-stop
|
||||
},
|
||||
@@ -718,22 +673,10 @@ DEFAULT_CONFIG = {
|
||||
"provider": "", # e.g. "openrouter" (empty = inherit parent provider + credentials)
|
||||
"base_url": "", # direct OpenAI-compatible endpoint for subagents
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
# When delegate_task narrows child toolsets explicitly, preserve any
|
||||
# MCP toolsets the parent already has enabled. On by default so
|
||||
# narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
|
||||
# extras" without silently stripping MCP tools the parent already has.
|
||||
# Set to false for strict intersection.
|
||||
"inherit_mcp_toolsets": True,
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
|
||||
# Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
|
||||
# and _get_orchestrator_enabled). Values are clamped to [1, 3] with a
|
||||
# warning log if out of range.
|
||||
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
|
||||
"orchestrator_enabled": True, # kill switch for role="orchestrator"
|
||||
},
|
||||
|
||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||
@@ -746,31 +689,6 @@ DEFAULT_CONFIG = {
|
||||
# always goes to ~/.hermes/skills/.
|
||||
"skills": {
|
||||
"external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"]
|
||||
# Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
|
||||
# content with the absolute skill directory and the active session id
|
||||
# before the agent sees it. Lets skill authors reference bundled
|
||||
# scripts without the agent having to join paths.
|
||||
"template_vars": True,
|
||||
# Pre-execute inline shell snippets written as !`cmd` in SKILL.md
|
||||
# body. Their stdout is inlined into the skill message before the
|
||||
# agent reads it, so skills can inject dynamic context (dates, git
|
||||
# state, detected tool versions, …). Off by default because any
|
||||
# content from the skill author runs on the host without approval;
|
||||
# only enable for skill sources you trust.
|
||||
"inline_shell": False,
|
||||
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
|
||||
"inline_shell_timeout": 10,
|
||||
# Run the keyword/pattern security scanner on skills the agent
|
||||
# writes via skill_manage (create/edit/patch). Off by default
|
||||
# because the agent can already execute the same code paths via
|
||||
# terminal() with no gate, so the scan adds friction (blocks
|
||||
# skills that mention risky keywords in prose) without meaningful
|
||||
# security. Turn on if you want the belt-and-suspenders — a
|
||||
# dangerous verdict will then surface as a tool error to the
|
||||
# agent, which can retry with the flagged content removed.
|
||||
# External hub installs (trusted/community sources) are always
|
||||
# scanned regardless of this setting.
|
||||
"guard_agent_created": False,
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
@@ -790,14 +708,6 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
# Actions: list_guilds, server_info, list_channels, channel_info,
|
||||
# list_roles, member_info, search_members, fetch_messages, list_pins,
|
||||
# pin_message, unpin_message, create_thread, add_role, remove_role.
|
||||
"server_actions": "",
|
||||
},
|
||||
|
||||
# WhatsApp platform settings (gateway mode)
|
||||
@@ -841,21 +751,6 @@ DEFAULT_CONFIG = {
|
||||
"command_allowlist": [],
|
||||
# User-defined quick commands that bypass the agent loop (type: exec only)
|
||||
"quick_commands": {},
|
||||
|
||||
# Shell-script hooks — declarative bridge that invokes shell scripts
|
||||
# on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
|
||||
# subagent_stop, etc.). Each entry maps an event name to a list of
|
||||
# {matcher, command, timeout} dicts. First registration of a new
|
||||
# command prompts the user for consent; subsequent runs reuse the
|
||||
# stored approval from ~/.hermes/shell-hooks-allowlist.json.
|
||||
# See `website/docs/user-guide/features/hooks.md` for schema + examples.
|
||||
"hooks": {},
|
||||
|
||||
# Auto-accept shell-hook registrations without a TTY prompt. Also
|
||||
# toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1.
|
||||
# Gateway / cron / non-interactive runs need this (or one of the other
|
||||
# channels) to pick up newly-added hooks.
|
||||
"hooks_auto_accept": False,
|
||||
# Custom personalities — add your own entries here
|
||||
# Supports string format: {"name": "system prompt"}
|
||||
# Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
|
||||
@@ -863,7 +758,6 @@ DEFAULT_CONFIG = {
|
||||
|
||||
# Pre-exec security scanning via tirith
|
||||
"security": {
|
||||
"allow_private_urls": False, # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
|
||||
"redact_secrets": True,
|
||||
"tirith_enabled": True,
|
||||
"tirith_path": "tirith",
|
||||
@@ -880,11 +774,6 @@ DEFAULT_CONFIG = {
|
||||
# Wrap delivered cron responses with a header (task name) and footer
|
||||
# ("The agent cannot see this message"). Set to false for clean output.
|
||||
"wrap_response": True,
|
||||
# Maximum number of due jobs to run in parallel per tick.
|
||||
# null/0 = unbounded (limited only by thread count).
|
||||
# 1 = serial (pre-v0.9 behaviour).
|
||||
# Also overridable via HERMES_CRON_MAX_PARALLEL env var.
|
||||
"max_parallel_jobs": None,
|
||||
},
|
||||
|
||||
# execute_code settings — controls the tool used for programmatic tool calls.
|
||||
@@ -917,36 +806,8 @@ DEFAULT_CONFIG = {
|
||||
"force_ipv4": False,
|
||||
},
|
||||
|
||||
# Session storage — controls automatic cleanup of ~/.hermes/state.db.
|
||||
# state.db accumulates every session, message, tool call, and FTS5 index
|
||||
# entry forever. Without auto-pruning, a heavy user (gateway + cron)
|
||||
# reports 384MB+ databases with 68K+ messages, which slows down FTS5
|
||||
# inserts, /resume listing, and insights queries.
|
||||
"sessions": {
|
||||
# When true, prune ended sessions older than retention_days once
|
||||
# per (roughly) min_interval_hours at CLI/gateway/cron startup.
|
||||
# Only touches ended sessions — active sessions are always preserved.
|
||||
# Default false: session history is valuable for search recall, and
|
||||
# silently deleting it could surprise users. Opt in explicitly.
|
||||
"auto_prune": False,
|
||||
# How many days of ended-session history to keep. Matches the
|
||||
# default of ``hermes sessions prune``.
|
||||
"retention_days": 90,
|
||||
# VACUUM after a prune that actually deleted rows. SQLite does not
|
||||
# reclaim disk space on DELETE — freed pages are just reused on
|
||||
# subsequent INSERTs — so without VACUUM the file stays bloated
|
||||
# even after pruning. VACUUM blocks writes for a few seconds per
|
||||
# 100MB, so it only runs at startup, and only when prune deleted
|
||||
# ≥1 session.
|
||||
"vacuum_after_prune": True,
|
||||
# Minimum hours between auto-maintenance runs (avoids repeating
|
||||
# the sweep on every CLI invocation). Tracked via state_meta in
|
||||
# state.db itself, so it's shared across all processes.
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
"_config_version": 19,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -1102,22 +963,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"STEPFUN_API_KEY": {
|
||||
"description": "StepFun Step Plan API key",
|
||||
"prompt": "StepFun Step Plan API key",
|
||||
"url": "https://platform.stepfun.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"STEPFUN_BASE_URL": {
|
||||
"description": "StepFun Step Plan base URL override",
|
||||
"prompt": "StepFun Step Plan base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"ARCEEAI_API_KEY": {
|
||||
"description": "Arcee AI API key",
|
||||
"prompt": "Arcee AI API key",
|
||||
@@ -1985,53 +1830,12 @@ def _normalize_custom_provider_entry(
|
||||
if not isinstance(entry, dict):
|
||||
return None
|
||||
|
||||
# Accept camelCase aliases commonly used in hand-written configs.
|
||||
_CAMEL_ALIASES: Dict[str, str] = {
|
||||
"apiKey": "api_key",
|
||||
"baseUrl": "base_url",
|
||||
"apiMode": "api_mode",
|
||||
"keyEnv": "key_env",
|
||||
"defaultModel": "default_model",
|
||||
"contextLength": "context_length",
|
||||
"rateLimitDelay": "rate_limit_delay",
|
||||
}
|
||||
_KNOWN_KEYS = {
|
||||
"name", "api", "url", "base_url", "api_key", "key_env",
|
||||
"api_mode", "transport", "model", "default_model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
}
|
||||
for camel, snake in _CAMEL_ALIASES.items():
|
||||
if camel in entry and snake not in entry:
|
||||
logger.warning(
|
||||
"providers.%s: camelCase key '%s' auto-mapped to '%s' "
|
||||
"(use snake_case to avoid this warning)",
|
||||
provider_key or "?", camel, snake,
|
||||
)
|
||||
entry[snake] = entry[camel]
|
||||
unknown = set(entry.keys()) - _KNOWN_KEYS - set(_CAMEL_ALIASES.keys())
|
||||
if unknown:
|
||||
logger.warning(
|
||||
"providers.%s: unknown config keys ignored: %s",
|
||||
provider_key or "?", ", ".join(sorted(unknown)),
|
||||
)
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
base_url = ""
|
||||
for url_key in ("base_url", "url", "api"):
|
||||
for url_key in ("api", "url", "base_url"):
|
||||
raw_url = entry.get(url_key)
|
||||
if isinstance(raw_url, str) and raw_url.strip():
|
||||
candidate = raw_url.strip()
|
||||
parsed = urlparse(candidate)
|
||||
if parsed.scheme and parsed.netloc:
|
||||
base_url = candidate
|
||||
break
|
||||
else:
|
||||
logger.warning(
|
||||
"providers.%s: '%s' value '%s' is not a valid URL "
|
||||
"(no scheme or host) — skipped",
|
||||
provider_key or "?", url_key, candidate,
|
||||
)
|
||||
base_url = raw_url.strip()
|
||||
break
|
||||
if not base_url:
|
||||
return None
|
||||
|
||||
@@ -2072,14 +1876,6 @@ def _normalize_custom_provider_entry(
|
||||
models = entry.get("models")
|
||||
if isinstance(models, dict) and models:
|
||||
normalized["models"] = models
|
||||
elif isinstance(models, list) and models:
|
||||
# Hand-edited configs (and older Hermes versions) write ``models`` as
|
||||
# a plain list of model ids. Preserve them by converting to the dict
|
||||
# shape downstream code expects; otherwise normalize silently drops
|
||||
# the list and /model shows the provider with (0) models.
|
||||
normalized["models"] = {
|
||||
str(m): {} for m in models if isinstance(m, str) and m.strip()
|
||||
}
|
||||
|
||||
context_length = entry.get("context_length")
|
||||
if isinstance(context_length, int) and context_length > 0:
|
||||
@@ -2178,7 +1974,6 @@ _KNOWN_ROOT_KEYS = {
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
"sessions",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
@@ -2336,6 +2131,7 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
|
||||
if not issues:
|
||||
return
|
||||
|
||||
import sys
|
||||
lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
|
||||
for ci in issues:
|
||||
marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
|
||||
@@ -2350,6 +2146,7 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
|
||||
These env vars are deprecated — the canonical setting is terminal.cwd
|
||||
in config.yaml. Prints a migration hint to stderr.
|
||||
"""
|
||||
import os, sys
|
||||
messaging_cwd = os.environ.get("MESSAGING_CWD")
|
||||
terminal_cwd_env = os.environ.get("TERMINAL_CWD")
|
||||
|
||||
@@ -2667,71 +2464,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
else:
|
||||
print(" ✓ Removed unused compression.summary_* keys")
|
||||
|
||||
# ── Version 20 → 21: plugins are now opt-in; grandfather existing user plugins ──
|
||||
# The loader now requires plugins to appear in ``plugins.enabled`` before
|
||||
# loading. Existing installs had all discovered plugins loading by default
|
||||
# (minus anything in ``plugins.disabled``). To avoid silently breaking
|
||||
# those setups on upgrade, populate ``plugins.enabled`` with the set of
|
||||
# currently-installed user plugins that aren't already disabled.
|
||||
#
|
||||
# Bundled plugins (shipped in the repo itself) are NOT grandfathered —
|
||||
# they ship off for everyone, including existing users, so any user who
|
||||
# wants one has to opt in explicitly.
|
||||
if current_ver < 21:
|
||||
config = read_raw_config()
|
||||
plugins_cfg = config.get("plugins")
|
||||
if not isinstance(plugins_cfg, dict):
|
||||
plugins_cfg = {}
|
||||
# Only migrate if the enabled allow-list hasn't been set yet.
|
||||
if "enabled" not in plugins_cfg:
|
||||
disabled = plugins_cfg.get("disabled", []) or []
|
||||
if not isinstance(disabled, list):
|
||||
disabled = []
|
||||
disabled_set = set(disabled)
|
||||
|
||||
# Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
|
||||
grandfathered: List[str] = []
|
||||
try:
|
||||
user_plugins_dir = get_hermes_home() / "plugins"
|
||||
if user_plugins_dir.is_dir():
|
||||
for child in sorted(user_plugins_dir.iterdir()):
|
||||
if not child.is_dir():
|
||||
continue
|
||||
manifest_file = child / "plugin.yaml"
|
||||
if not manifest_file.exists():
|
||||
manifest_file = child / "plugin.yml"
|
||||
if not manifest_file.exists():
|
||||
continue
|
||||
try:
|
||||
with open(manifest_file) as _mf:
|
||||
manifest = yaml.safe_load(_mf) or {}
|
||||
except Exception:
|
||||
manifest = {}
|
||||
name = manifest.get("name") or child.name
|
||||
if name in disabled_set:
|
||||
continue
|
||||
grandfathered.append(name)
|
||||
except Exception:
|
||||
grandfathered = []
|
||||
|
||||
plugins_cfg["enabled"] = grandfathered
|
||||
config["plugins"] = plugins_cfg
|
||||
save_config(config)
|
||||
results["config_added"].append(
|
||||
f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)"
|
||||
)
|
||||
if not quiet:
|
||||
if grandfathered:
|
||||
print(
|
||||
f" ✓ Plugins now opt-in: grandfathered "
|
||||
f"{len(grandfathered)} existing plugin(s) into plugins.enabled"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
" ✓ Plugins now opt-in: no existing plugins to grandfather. "
|
||||
"Use `hermes plugins enable <name>` to activate."
|
||||
)
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
@@ -3134,6 +2866,19 @@ _FALLBACK_COMMENT = """
|
||||
# fallback_model:
|
||||
# provider: openrouter
|
||||
# model: anthropic/claude-sonnet-4
|
||||
#
|
||||
# ── Smart Model Routing ────────────────────────────────────────────────
|
||||
# Optional cheap-vs-strong routing for simple turns.
|
||||
# Keeps the primary model for complex work, but can route short/simple
|
||||
# messages to a cheaper model across providers.
|
||||
#
|
||||
# smart_model_routing:
|
||||
# enabled: true
|
||||
# max_simple_chars: 160
|
||||
# max_simple_words: 28
|
||||
# cheap_model:
|
||||
# provider: openrouter
|
||||
# model: google/gemini-2.5-flash
|
||||
"""
|
||||
|
||||
|
||||
@@ -3165,6 +2910,19 @@ _COMMENTED_SECTIONS = """
|
||||
# fallback_model:
|
||||
# provider: openrouter
|
||||
# model: anthropic/claude-sonnet-4
|
||||
#
|
||||
# ── Smart Model Routing ────────────────────────────────────────────────
|
||||
# Optional cheap-vs-strong routing for simple turns.
|
||||
# Keeps the primary model for complex work, but can route short/simple
|
||||
# messages to a cheaper model across providers.
|
||||
#
|
||||
# smart_model_routing:
|
||||
# enabled: true
|
||||
# max_simple_chars: 160
|
||||
# max_simple_words: 28
|
||||
# cheap_model:
|
||||
# provider: openrouter
|
||||
# model: google/gemini-2.5-flash
|
||||
"""
|
||||
|
||||
|
||||
@@ -3194,7 +2952,7 @@ def save_config(config: Dict[str, Any]):
|
||||
if not sec or sec.get("redact_secrets") is None:
|
||||
parts.append(_SECURITY_COMMENT)
|
||||
fb = normalized.get("fallback_model", {})
|
||||
if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
|
||||
if not fb or not (fb.get("provider") and fb.get("model")):
|
||||
parts.append(_FALLBACK_COMMENT)
|
||||
|
||||
atomic_yaml_write(
|
||||
@@ -3357,6 +3115,7 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
|
||||
bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})")
|
||||
sanitized = value.encode("ascii", errors="ignore").decode("ascii")
|
||||
|
||||
import sys
|
||||
print(
|
||||
f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n"
|
||||
f" This usually happens when copy-pasting from a PDF, rich-text editor,\n"
|
||||
@@ -3626,10 +3385,6 @@ def show_config():
|
||||
print(f" Personality: {display.get('personality', 'kawaii')}")
|
||||
print(f" Reasoning: {'on' if display.get('show_reasoning', False) else 'off'}")
|
||||
print(f" Bell: {'on' if display.get('bell_on_complete', False) else 'off'}")
|
||||
ump = display.get('user_message_preview', {}) if isinstance(display.get('user_message_preview', {}), dict) else {}
|
||||
ump_first = ump.get('first_lines', 2)
|
||||
ump_last = ump.get('last_lines', 2)
|
||||
print(f" User preview: first {ump_first} line(s), last {ump_last} line(s)")
|
||||
|
||||
# Terminal
|
||||
print()
|
||||
|
||||
+48
-128
@@ -13,7 +13,6 @@ import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@@ -148,14 +147,6 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
|
||||
return (deleted, len(remaining))
|
||||
|
||||
|
||||
def _best_effort_sweep_expired_pastes() -> None:
|
||||
"""Attempt pending-paste cleanup without letting /debug fail offline."""
|
||||
try:
|
||||
_sweep_expired_pastes()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Privacy / delete helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -323,128 +314,72 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
|
||||
# Log file reading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class LogSnapshot:
|
||||
"""Single-read snapshot of a log file used by debug-share."""
|
||||
|
||||
path: Optional[Path]
|
||||
tail_text: str
|
||||
full_text: Optional[str]
|
||||
|
||||
|
||||
def _primary_log_path(log_name: str) -> Optional[Path]:
|
||||
"""Where *log_name* would live if present. Doesn't check existence."""
|
||||
from hermes_cli.logs import LOG_FILES
|
||||
|
||||
filename = LOG_FILES.get(log_name)
|
||||
return (get_hermes_home() / "logs" / filename) if filename else None
|
||||
|
||||
|
||||
def _resolve_log_path(log_name: str) -> Optional[Path]:
|
||||
"""Find the log file for *log_name*, falling back to the .1 rotation.
|
||||
|
||||
Returns the first non-empty candidate (primary, then .1), or None.
|
||||
Callers distinguish 'empty primary' from 'truly missing' via
|
||||
:func:`_primary_log_path`.
|
||||
Returns the path if found, or None.
|
||||
"""
|
||||
primary = _primary_log_path(log_name)
|
||||
if primary is None:
|
||||
from hermes_cli.logs import LOG_FILES
|
||||
|
||||
filename = LOG_FILES.get(log_name)
|
||||
if not filename:
|
||||
return None
|
||||
|
||||
log_dir = get_hermes_home() / "logs"
|
||||
primary = log_dir / filename
|
||||
if primary.exists() and primary.stat().st_size > 0:
|
||||
return primary
|
||||
|
||||
rotated = primary.parent / f"{primary.name}.1"
|
||||
# Fall back to the most recent rotated file (.1).
|
||||
rotated = log_dir / f"{filename}.1"
|
||||
if rotated.exists() and rotated.stat().st_size > 0:
|
||||
return rotated
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _capture_log_snapshot(
|
||||
log_name: str,
|
||||
*,
|
||||
tail_lines: int,
|
||||
max_bytes: int = _MAX_LOG_BYTES,
|
||||
) -> LogSnapshot:
|
||||
"""Capture a log once and derive summary/full-log views from it.
|
||||
def _read_log_tail(log_name: str, num_lines: int) -> str:
|
||||
"""Read the last *num_lines* from a log file, or return a placeholder."""
|
||||
from hermes_cli.logs import _read_last_n_lines
|
||||
|
||||
The report tail and standalone log upload must come from the same file
|
||||
snapshot. Otherwise a rotation/truncate between reads can make the report
|
||||
look newer than the uploaded ``agent.log`` paste.
|
||||
log_path = _resolve_log_path(log_name)
|
||||
if log_path is None:
|
||||
return "(file not found)"
|
||||
|
||||
try:
|
||||
lines = _read_last_n_lines(log_path, num_lines)
|
||||
return "".join(lines).rstrip("\n")
|
||||
except Exception as exc:
|
||||
return f"(error reading: {exc})"
|
||||
|
||||
|
||||
def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
|
||||
"""Read a log file for standalone upload.
|
||||
|
||||
Returns the file content (last *max_bytes* if truncated), or None if the
|
||||
file doesn't exist or is empty.
|
||||
"""
|
||||
log_path = _resolve_log_path(log_name)
|
||||
if log_path is None:
|
||||
primary = _primary_log_path(log_name)
|
||||
tail = "(file empty)" if primary and primary.exists() else "(file not found)"
|
||||
return LogSnapshot(path=None, tail_text=tail, full_text=None)
|
||||
return None
|
||||
|
||||
try:
|
||||
size = log_path.stat().st_size
|
||||
if size == 0:
|
||||
# race: file was truncated between _resolve_log_path and stat
|
||||
return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)
|
||||
return None
|
||||
|
||||
if size <= max_bytes:
|
||||
return log_path.read_text(encoding="utf-8", errors="replace")
|
||||
|
||||
# File is larger than max_bytes — read the tail.
|
||||
with open(log_path, "rb") as f:
|
||||
if size <= max_bytes:
|
||||
raw = f.read()
|
||||
truncated = False
|
||||
else:
|
||||
# Read from the end until we have enough bytes for the
|
||||
# standalone upload and enough newline context to render the
|
||||
# summary tail from the same snapshot.
|
||||
chunk_size = 8192
|
||||
pos = size
|
||||
chunks: list[bytes] = []
|
||||
total = 0
|
||||
newline_count = 0
|
||||
|
||||
while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
|
||||
read_size = min(chunk_size, pos)
|
||||
pos -= read_size
|
||||
f.seek(pos)
|
||||
chunk = f.read(read_size)
|
||||
chunks.insert(0, chunk)
|
||||
total += len(chunk)
|
||||
newline_count += chunk.count(b"\n")
|
||||
chunk_size = min(chunk_size * 2, 65536)
|
||||
|
||||
raw = b"".join(chunks)
|
||||
truncated = pos > 0
|
||||
|
||||
full_raw = raw
|
||||
if truncated and len(full_raw) > max_bytes:
|
||||
cut = len(full_raw) - max_bytes
|
||||
# Check whether the cut lands exactly on a line boundary. If the
|
||||
# byte just before the cut position is a newline the first retained
|
||||
# byte starts a complete line and we should keep it. Only drop a
|
||||
# partial first line when we're genuinely mid-line.
|
||||
on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
|
||||
full_raw = full_raw[cut:]
|
||||
if not on_boundary and b"\n" in full_raw:
|
||||
full_raw = full_raw.split(b"\n", 1)[1]
|
||||
|
||||
all_text = raw.decode("utf-8", errors="replace")
|
||||
tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
|
||||
|
||||
full_text = full_raw.decode("utf-8", errors="replace")
|
||||
if truncated:
|
||||
full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
|
||||
|
||||
return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
|
||||
except Exception as exc:
|
||||
return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
|
||||
|
||||
|
||||
def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
|
||||
"""Capture all logs used by debug-share exactly once."""
|
||||
errors_lines = min(log_lines, 100)
|
||||
return {
|
||||
"agent": _capture_log_snapshot("agent", tail_lines=log_lines),
|
||||
"errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
|
||||
"gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
|
||||
}
|
||||
f.seek(size - max_bytes)
|
||||
# Skip partial line at the seek point.
|
||||
f.readline()
|
||||
content = f.read().decode("utf-8", errors="replace")
|
||||
return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -470,12 +405,7 @@ def _capture_dump() -> str:
|
||||
return capture.getvalue()
|
||||
|
||||
|
||||
def collect_debug_report(
|
||||
*,
|
||||
log_lines: int = 200,
|
||||
dump_text: str = "",
|
||||
log_snapshots: Optional[dict[str, LogSnapshot]] = None,
|
||||
) -> str:
|
||||
def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
|
||||
"""Build the summary debug report: system dump + log tails.
|
||||
|
||||
Parameters
|
||||
@@ -494,22 +424,19 @@ def collect_debug_report(
|
||||
dump_text = _capture_dump()
|
||||
buf.write(dump_text)
|
||||
|
||||
if log_snapshots is None:
|
||||
log_snapshots = _capture_default_log_snapshots(log_lines)
|
||||
|
||||
# ── Recent log tails (summary only) ──────────────────────────────────
|
||||
buf.write("\n\n")
|
||||
buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
|
||||
buf.write(log_snapshots["agent"].tail_text)
|
||||
buf.write(_read_log_tail("agent", log_lines))
|
||||
buf.write("\n\n")
|
||||
|
||||
errors_lines = min(log_lines, 100)
|
||||
buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
|
||||
buf.write(log_snapshots["errors"].tail_text)
|
||||
buf.write(_read_log_tail("errors", errors_lines))
|
||||
buf.write("\n\n")
|
||||
|
||||
buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
|
||||
buf.write(log_snapshots["gateway"].tail_text)
|
||||
buf.write(_read_log_tail("gateway", errors_lines))
|
||||
buf.write("\n")
|
||||
|
||||
return buf.getvalue()
|
||||
@@ -521,8 +448,6 @@ def collect_debug_report(
|
||||
|
||||
def run_debug_share(args):
|
||||
"""Collect debug report + full logs, upload each, print URLs."""
|
||||
_best_effort_sweep_expired_pastes()
|
||||
|
||||
log_lines = getattr(args, "lines", 200)
|
||||
expiry = getattr(args, "expire", 7)
|
||||
local_only = getattr(args, "local", False)
|
||||
@@ -534,15 +459,10 @@ def run_debug_share(args):
|
||||
|
||||
# Capture dump once — prepended to every paste for context.
|
||||
dump_text = _capture_dump()
|
||||
log_snapshots = _capture_default_log_snapshots(log_lines)
|
||||
|
||||
report = collect_debug_report(
|
||||
log_lines=log_lines,
|
||||
dump_text=dump_text,
|
||||
log_snapshots=log_snapshots,
|
||||
)
|
||||
agent_log = log_snapshots["agent"].full_text
|
||||
gateway_log = log_snapshots["gateway"].full_text
|
||||
report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
|
||||
agent_log = _read_full_log("agent")
|
||||
gateway_log = _read_full_log("gateway")
|
||||
|
||||
# Prepend dump header to each full log so every paste is self-contained.
|
||||
if agent_log:
|
||||
|
||||
+5
-101
@@ -30,7 +30,6 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_constants import OPENROUTER_MODELS_URL
|
||||
from utils import base_url_host_matches
|
||||
|
||||
|
||||
_PROVIDER_ENV_HINTS = (
|
||||
@@ -278,86 +277,6 @@ def run_doctor(args):
|
||||
config_path = HERMES_HOME / 'config.yaml'
|
||||
if config_path.exists():
|
||||
check_ok(f"{_DHH}/config.yaml exists")
|
||||
|
||||
# Validate model.provider and model.default values
|
||||
try:
|
||||
import yaml as _yaml
|
||||
cfg = _yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
||||
model_section = cfg.get("model") or {}
|
||||
provider_raw = (model_section.get("provider") or "").strip()
|
||||
provider = provider_raw.lower()
|
||||
default_model = (model_section.get("default") or model_section.get("model") or "").strip()
|
||||
|
||||
known_providers: set = set()
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"}
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||||
except Exception:
|
||||
_resolve_provider = None
|
||||
|
||||
canonical_provider = provider
|
||||
if provider and _resolve_provider is not None and provider != "auto":
|
||||
try:
|
||||
canonical_provider = _resolve_provider(provider)
|
||||
except Exception:
|
||||
canonical_provider = None
|
||||
|
||||
if provider and provider != "auto":
|
||||
if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
|
||||
known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)"
|
||||
check_fail(
|
||||
f"model.provider '{provider_raw}' is not a recognised provider",
|
||||
f"(known: {known_list})",
|
||||
)
|
||||
issues.append(
|
||||
f"model.provider '{provider_raw}' is unknown. "
|
||||
f"Valid providers: {known_list}. "
|
||||
f"Fix: run 'hermes config set model.provider <valid_provider>'"
|
||||
)
|
||||
|
||||
# Warn if model is set to a provider-prefixed name on a provider that doesn't use them
|
||||
if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"):
|
||||
check_warn(
|
||||
f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
|
||||
"(vendor-prefixed slugs belong to aggregators like openrouter)",
|
||||
)
|
||||
issues.append(
|
||||
f"model.default '{default_model}' is vendor-prefixed but model.provider is '{provider_raw}'. "
|
||||
"Either set model.provider to 'openrouter', or drop the vendor prefix."
|
||||
)
|
||||
|
||||
# Check credentials for the configured provider.
|
||||
# Limit to API-key providers in PROVIDER_REGISTRY — other provider
|
||||
# types (OAuth, SDK, openrouter/anthropic/custom/auto) have their
|
||||
# own env-var checks elsewhere in doctor, and get_auth_status()
|
||||
# returns a bare {logged_in: False} for anything it doesn't
|
||||
# explicitly dispatch, which would produce false positives.
|
||||
if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"):
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
|
||||
pconfig = PROVIDER_REGISTRY.get(canonical_provider)
|
||||
if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
|
||||
status = get_auth_status(canonical_provider) or {}
|
||||
configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key"))
|
||||
if not configured:
|
||||
check_fail(
|
||||
f"model.provider '{canonical_provider}' is set but no API key is configured",
|
||||
"(check ~/.hermes/.env or run 'hermes setup')",
|
||||
)
|
||||
issues.append(
|
||||
f"No credentials found for provider '{canonical_provider}'. "
|
||||
f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
|
||||
f"or switch providers with 'hermes config set model.provider <name>'"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
check_warn("Could not validate model/provider config", f"({e})")
|
||||
else:
|
||||
fallback_config = PROJECT_ROOT / 'cli-config.yaml'
|
||||
if fallback_config.exists():
|
||||
@@ -859,16 +778,6 @@ def run_doctor(args):
|
||||
elif response.status_code == 401:
|
||||
print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ")
|
||||
issues.append("Check OPENROUTER_API_KEY in .env")
|
||||
elif response.status_code == 402:
|
||||
print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(out of credits — payment required)', Colors.DIM)}")
|
||||
issues.append(
|
||||
"OpenRouter account has insufficient credits. "
|
||||
"Fix: run 'hermes config set model.provider <provider>' to switch providers, "
|
||||
"or fund your OpenRouter account at https://openrouter.ai/settings/credits"
|
||||
)
|
||||
elif response.status_code == 429:
|
||||
print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(rate limited)', Colors.DIM)} ")
|
||||
issues.append("OpenRouter rate limit hit — consider switching to a different provider or waiting")
|
||||
else:
|
||||
print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ")
|
||||
except Exception as e:
|
||||
@@ -912,7 +821,6 @@ def run_doctor(args):
|
||||
_apikey_providers = [
|
||||
("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
|
||||
("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
|
||||
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
||||
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
@@ -944,22 +852,18 @@ def run_doctor(args):
|
||||
try:
|
||||
import httpx
|
||||
_base = os.getenv(_base_env, "") if _base_env else ""
|
||||
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
|
||||
# (OpenAI-compat surface, which exposes /models for health check).
|
||||
# Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
|
||||
if not _base and _key.startswith("sk-kimi-"):
|
||||
_base = "https://api.kimi.com/coding/v1"
|
||||
# Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
|
||||
# with no /v1) don't support /models. Rewrite to the OpenAI-compat
|
||||
# /v1 surface for health checks.
|
||||
# Anthropic-compat endpoints (/anthropic) don't support /models.
|
||||
# Rewrite to the OpenAI-compat /v1 surface for health checks.
|
||||
if _base and _base.rstrip("/").endswith("/anthropic"):
|
||||
from agent.auxiliary_client import _to_openai_base_url
|
||||
_base = _to_openai_base_url(_base)
|
||||
if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
|
||||
_base = _base.rstrip("/") + "/v1"
|
||||
_url = (_base.rstrip("/") + "/models") if _base else _default_url
|
||||
_headers = {"Authorization": f"Bearer {_key}"}
|
||||
if base_url_host_matches(_base, "api.kimi.com"):
|
||||
_headers["User-Agent"] = "claude-code/0.1.0"
|
||||
if "api.kimi.com" in _url.lower():
|
||||
_headers["User-Agent"] = "KimiCLI/1.30.0"
|
||||
_resp = httpx.get(
|
||||
_url,
|
||||
headers=_headers,
|
||||
|
||||
@@ -160,6 +160,7 @@ def _config_overrides(config: dict) -> dict[str, str]:
|
||||
("display", "streaming"),
|
||||
("display", "skin"),
|
||||
("display", "show_reasoning"),
|
||||
("smart_model_routing", "enabled"),
|
||||
("privacy", "redact_pii"),
|
||||
("tts", "provider"),
|
||||
]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user