Compare commits
442 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| d049d88dd7 | |||
| f4b76fa272 | |||
| 564a649e6a | |||
| 7913d6a90f | |||
| 8b290a5908 | |||
| cdf9793d6d | |||
| 29bcd2f6e9 | |||
| b9d9fa7df8 | |||
| d499d17271 | |||
| 2d3c041338 | |||
| 4e296dcdda | |||
| d954d6fbcf | |||
| e30de51ee9 | |||
| 285e9efb3f | |||
| cad7944b92 | |||
| 407dfbb021 | |||
| 9a14540603 | |||
| ae8930afa5 | |||
| 4178ab3c07 | |||
| 4c792865b4 | |||
| a845177ebe | |||
| eda1d516dc | |||
| e8e5985ce6 | |||
| d1d0ef6dbd | |||
| 87f5e1a25a | |||
| b50bc13ef9 | |||
| 3fc4c63d38 | |||
| 61fec7689d | |||
| 201f7caed8 | |||
| e0fa2cf972 | |||
| 70ae678af1 | |||
| 0dd373ec43 | |||
| fbb3775770 | |||
| cc5b9fb581 | |||
| 3de8e21683 | |||
| 04ea895ffb | |||
| 411f586c67 | |||
| ca87c822ed | |||
| 5af8fa5c8c | |||
| 19f9be1dff | |||
| 3858f9419e | |||
| 01d7c87ecc | |||
| 362996e269 | |||
| f54935738c | |||
| f44f1f9615 | |||
| 663ba9a58f | |||
| cb130bf776 | |||
| 8d302e37a8 | |||
| 2662bfb756 | |||
| 0da968e521 | |||
| 658947480a | |||
| d2536a72bf | |||
| 5d253e65b7 | |||
| 10e43edc09 | |||
| bff8ab0311 | |||
| 97a851bf97 | |||
| 25caaa4a70 | |||
| 0ad4f55aa8 | |||
| 2facea7f71 | |||
| 5b85a7d351 | |||
| fd0796947f | |||
| aa7bf329bc | |||
| 26787ce638 | |||
| e23bb18dac | |||
| 45780edbbf | |||
| 39b0bc377c | |||
| ca5bebef00 | |||
| a696bceafa | |||
| b3137d758c | |||
| 21e695fcb6 | |||
| 3c27efbb91 | |||
| 718e4e2e7e | |||
| 62a5d7207d | |||
| ce0c3ae493 | |||
| f73364b1c4 | |||
| e7beaaf184 | |||
| b06a06e608 | |||
| 828d3a320b | |||
| 4d363499db | |||
| 71c8ca17dc | |||
| 868bc1c242 | |||
| 6e42daf7dd | |||
| 1f1608067c | |||
| 52d9e57825 | |||
| e464cde58f | |||
| 457128d4e8 | |||
| 2e20f6ae2d | |||
| 8f144fe36b | |||
| 4d7fc0f37c | |||
| 7fae87bc00 | |||
| a7fb79efb2 | |||
| 502debed91 | |||
| ffa65291d1 | |||
| 16233711d9 | |||
| d69a0b2c29 | |||
| 763aadd6bf | |||
| 1f712173b2 | |||
| dd2d1ba5e6 | |||
| 7966560fb5 | |||
| 113239f6e3 | |||
| e6b05eaf63 | |||
| 289cc47631 | |||
| 51b44b6e3f | |||
| a7780fe05f | |||
| 7d48a16f14 | |||
| 3c673468b4 | |||
| 95f2802f84 | |||
| 22ff6ca32b | |||
| 8dcab19d02 | |||
| 49fcad8cf8 | |||
| 24b5279f43 | |||
| 0ba451d004 | |||
| 4cc6da84a1 | |||
| 87e259a678 | |||
| 31f70d1f2a | |||
| d05497f812 | |||
| 98a428fd61 | |||
| 8cce85b819 | |||
| fc0f358f37 | |||
| 7a4da315a2 | |||
| b978fd8b26 | |||
| 9fc9c15b4a | |||
| fc7f55f490 | |||
| 98f5be13fa | |||
| 5e6e8b6af3 | |||
| d9bf093728 | |||
| faa467ccaf | |||
| f45434d3c6 | |||
| 2a9a5fffa5 | |||
| c2cb6d1071 | |||
| b52b63396c | |||
| 528e7dc176 | |||
| 4899bd99c0 | |||
| 8652d47eaa | |||
| 7d96a5ab6e | |||
| d3ab2b2e13 | |||
| f7abcb4f01 | |||
| 10fcd620d2 | |||
| d8afafd22b | |||
| 456955c2e4 | |||
| 9be3ab1a5b | |||
| ffe1d660a0 | |||
| 9d7ece362d | |||
| 528a13b37a | |||
| 9835f57e9c | |||
| d7d1503595 | |||
| b81638d749 | |||
| 165d766891 | |||
| 258449c468 | |||
| 2e991770fc | |||
| c5a5e586d7 | |||
| 5a61c116e1 | |||
| 69d4800db7 | |||
| 9ee540a5e2 | |||
| 0e577fb1be | |||
| c61b2e0af7 | |||
| b01656d116 | |||
| 430302c197 | |||
| 40a98fb0fa | |||
| eafa637287 | |||
| f3aa989b1b | |||
| 0b2f1bb27b | |||
| 9eb16025bd | |||
| b2820cd207 | |||
| e0c0167428 | |||
| 6d8423761b | |||
| ec27f0a3fa | |||
| 8c8fc6c1ec | |||
| e120cd5941 | |||
| fa3338c171 | |||
| fd5479a4fc | |||
| fd7188a7c6 | |||
| 60c6b07128 | |||
| 0a5ee01e48 | |||
| c38dac742b | |||
| 1bedc836b5 | |||
| e0a03f3f40 | |||
| 13c238327e | |||
| 5a1d4f6804 | |||
| 810d98e892 | |||
| 83c288da01 | |||
| 398945e7b1 | |||
| 7141cda967 | |||
| 0565497dcc | |||
| 5662ac2afc | |||
| cf83982da0 | |||
| 835f9adec0 | |||
| b85fff9495 | |||
| f317325279 | |||
| 9e63062b6c | |||
| 33967b4e52 | |||
| 40d25e125b | |||
| ff687c019e | |||
| aea72c0936 | |||
| fe6c86623f | |||
| 258755a24f | |||
| e9b96fd050 | |||
| b0435cc164 | |||
| 46437966cc | |||
| 3606414ec7 | |||
| 20b759cd02 | |||
| 13683c0842 | |||
| d244596dba | |||
| 37d107e03d | |||
| df0e97a168 | |||
| 860ff445f6 | |||
| ecaf8008bb | |||
| 4a62ba9ccd | |||
| 23f5fc6765 | |||
| ed170f4333 | |||
| be57af7188 | |||
| 059980727a | |||
| 21676e80cc | |||
| 58a6171bfb | |||
| bc0d8a941e | |||
| 2d137074a3 | |||
| 5531c0df82 | |||
| 5e68503d2f | |||
| 22cc7492ff | |||
| c2fd0fa684 | |||
| fa9383d27b | |||
| 019d4c1c3f | |||
| a12f7aa8bb | |||
| 0d31864e3b | |||
| c8b7e7268a | |||
| bc79e227e6 | |||
| 88602376d4 | |||
| ded12f0968 | |||
| 80e474f11f | |||
| d341af22c0 | |||
| 88e07c42b4 | |||
| cc5efb6fc1 | |||
| 97a2474b39 | |||
| 6b4ef00a2c | |||
| 4858e26eaa | |||
| dcd7b717f8 | |||
| ac855bba0e | |||
| f95c34f415 | |||
| 679a27498d | |||
| d1ee4915f3 | |||
| 26816d1f77 | |||
| e750829015 | |||
| 7d39a45749 | |||
| 69ff114ee2 | |||
| f10a3df632 | |||
| 88a9efdb1a | |||
| 72a3af63d4 | |||
| a2819e1820 | |||
| 0a6ecea676 | |||
| b66cbb7b4c | |||
| 1d4218be56 | |||
| c4db1ce08c | |||
| 8c892c1453 | |||
| 9e398e1809 | |||
| 6e9691ff12 | |||
| 10ad7006b6 | |||
| f542d17b00 | |||
| d7ae8dfd0a | |||
| ce2cc7302e | |||
| afb20a1d67 | |||
| e4120d1e6d | |||
| cd7150a195 | |||
| 3379f88ea4 | |||
| adef1f33ab | |||
| fe295f9836 | |||
| fd943461ca | |||
| cb039ac000 | |||
| 9f004b6d94 | |||
| 0399d4b976 | |||
| 188eaa57c4 | |||
| 6b09df39be | |||
| a9efa46b69 | |||
| b2f936fd37 | |||
| ec11aa64ee | |||
| 7d81d76366 | |||
| c3d39feb3a | |||
| 258efb2575 | |||
| 1e326c686d | |||
| af6b1a3343 | |||
| 8d591fe3c7 | |||
| 15ef11a8b8 | |||
| 87d3fa6f1c | |||
| 75d9811393 | |||
| e42065b1f7 | |||
| a830f25f71 | |||
| 50edbe6f46 | |||
| 4689ace7cb | |||
| 9eabc24e24 | |||
| 0d957a8d48 | |||
| 5f215b13ce | |||
| 124da27767 | |||
| 5d2f9b5d7d | |||
| 433d38da09 | |||
| a0105a7f81 | |||
| 01ad0aacaf | |||
| fa2bee1215 | |||
| 214ca943ac | |||
| 7d4648461a | |||
| faa15772b7 | |||
| 74c209534c | |||
| 18f585f091 | |||
| 4bf0e75ae9 | |||
| a3c27b5cd1 | |||
| 47d4b6e31a | |||
| a1921c43cc | |||
| 912590a143 | |||
| 1285172aca | |||
| b53a091b97 | |||
| b5128a751b | |||
| 663602f6b0 | |||
| e1027134cd | |||
| f62272b203 | |||
| 0348a69c51 | |||
| 753a071491 | |||
| e5601d1e85 | |||
| df51ad7973 | |||
| 42be5e49b0 | |||
| e0f5d39837 | |||
| 5ed1eb0d0f | |||
| be41ccd0af | |||
| e4b69bf149 | |||
| 1d8b9e6458 | |||
| e123f4ecf0 | |||
| 6085d7a93e | |||
| 3d8be2c617 | |||
| 89e8c87354 | |||
| 20c9340c34 | |||
| b2339c87e4 | |||
| 8cced33784 | |||
| 69b8fa65d4 | |||
| 5f84eac451 | |||
| b5905f0d4a | |||
| d6137453ac | |||
| a9369fc193 | |||
| e116957a63 | |||
| 391f1ca1f4 | |||
| 72dea9f4f7 | |||
| 06164a7b28 | |||
| 529eb29b6a | |||
| dbbe2d1973 | |||
| 315a11a76f | |||
| a3b9343f08 | |||
| d8c5573ffe | |||
| c69310c625 | |||
| d3a9c69e9b | |||
| a54106bbc8 | |||
| 1a4289b6b7 | |||
| 052b3449e5 | |||
| fb112d6a73 | |||
| 7444e49d4e | |||
| 93feffbcfa | |||
| b61d9b297a | |||
| 3ab97a32d1 | |||
| 1369dae226 | |||
| 7996c14795 | |||
| 4aa0a7c195 | |||
| 7428abd54e | |||
| 0f473d643d | |||
| aa94883288 | |||
| 1350d12b0b | |||
| 02ae152222 | |||
| 9cd02b1698 | |||
| 37551ee53e | |||
| a23f18cc3e | |||
| 023f5c74b1 | |||
| 2b728e1274 | |||
| 5316ce95de | |||
| a6a6cf047d | |||
| bd10acd747 | |||
| 4148e85b3a | |||
| 4462b349b2 | |||
| 4e5ebf07ea | |||
| 447d800b81 | |||
| e63364b8df | |||
| cf0852f92e | |||
| a83f669bcf | |||
| 6c78305294 | |||
| 1b9b5d2957 | |||
| 2f9243c333 | |||
| 22ddac4b14 | |||
| f3371c39a4 | |||
| 20b49b71cd | |||
| 1791324604 | |||
| 632ddf2a0a | |||
| afb9588298 | |||
| b4a8031b2e | |||
| 413ee1a286 | |||
| dad10a78d0 | |||
| 42cc905c13 | |||
| 4d3e3ff8a2 | |||
| c9d8b916d1 | |||
| a8f9c56cb4 | |||
| 0edcc57d9a | |||
| de03a332f7 | |||
| efb7d27609 | |||
| 8d76d69d48 | |||
| cfcad80ee1 | |||
| 7d884f81c4 | |||
| abefd89059 | |||
| 0169c51820 | |||
| db305bba8b | |||
| d293e0051e | |||
| 185ecc71f1 | |||
| 40bd6d4709 | |||
| d63abbc329 | |||
| 66a05e44d6 | |||
| fdfe40a48b | |||
| dd789a4fdf | |||
| c8ef786926 | |||
| 1d5e25f353 | |||
| 9e4d79b17f | |||
| 9048fd020f | |||
| 66b1142384 | |||
| 6b6fc28e85 | |||
| 54e24f7758 | |||
| b52ceccfa8 | |||
| 755f050c67 | |||
| 07a818804e | |||
| 474c725b49 | |||
| 8269f9056c | |||
| 6ce796b495 | |||
| cff29fa7fd | |||
| 2dfd73a497 | |||
| 8081425a1c | |||
| ec8243fe2a | |||
| 3d67364b8f | |||
| 38a6bada92 | |||
| 6c70ac8eef | |||
| d497387cec | |||
| 32d4048c6b | |||
| 1eab5960f0 | |||
| 74a4832b74 | |||
| fbbcfa24c5 | |||
| f223346eb7 | |||
| 57f8cf00e9 | |||
| 6649e7e746 | |||
| 32b78578e0 | |||
| 6769a0aece | |||
| d7528d43ac | |||
| a7cdd4133c | |||
| 461ef88705 | |||
| 12d745bd7e |
@@ -5,7 +5,9 @@
|
||||
|
||||
# Dependencies
|
||||
node_modules
|
||||
**/node_modules
|
||||
.venv
|
||||
**/.venv
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
|
||||
@@ -398,3 +398,19 @@ IMAGE_TOOLS_DEBUG=false
|
||||
# Override STT provider endpoints (for proxies or self-hosted instances)
|
||||
# GROQ_BASE_URL=https://api.groq.com/openai/v1
|
||||
# STT_OPENAI_BASE_URL=https://api.openai.com/v1
|
||||
|
||||
# =============================================================================
|
||||
# MICROSOFT TEAMS INTEGRATION
|
||||
# =============================================================================
|
||||
# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
|
||||
# Or use Azure Portal: Azure Active Directory → App registrations → New registration
|
||||
# Then add the bot to Teams via the Bot Framework or App Studio.
|
||||
#
|
||||
# TEAMS_CLIENT_ID= # Azure AD App (client) ID
|
||||
# TEAMS_CLIENT_SECRET= # Azure AD client secret value
|
||||
# TEAMS_TENANT_ID= # Azure AD tenant ID (or "common" for multi-tenant)
|
||||
# TEAMS_ALLOWED_USERS= # Comma-separated AAD object IDs or UPNs
|
||||
# TEAMS_ALLOW_ALL_USERS=false # Set true to skip the allowlist
|
||||
# TEAMS_HOME_CHANNEL= # Default channel/chat ID for cron delivery
|
||||
# TEAMS_HOME_CHANNEL_NAME= # Display name for the home channel
|
||||
# TEAMS_PORT=3978 # Webhook listen port (Bot Framework default)
|
||||
|
||||
@@ -1,8 +1,18 @@
|
||||
name: 'Setup Nix'
|
||||
description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
|
||||
description: 'Install Nix and configure Cachix binary cache'
|
||||
|
||||
inputs:
|
||||
cachix-auth-token:
|
||||
description: 'Cachix auth token (enables push). Omit for read-only.'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
|
||||
- uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
|
||||
with:
|
||||
name: hermes-agent
|
||||
authToken: ${{ inputs.cachix-auth-token }}
|
||||
continue-on-error: true
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
name: Nix Lockfile Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-lockfile-check-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
|
||||
- name: Resolve head SHA
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check lockfile hashes
|
||||
id: check
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
- name: Post sticky PR comment (stale)
|
||||
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
|
||||
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Fail if stale
|
||||
if: steps.check.outputs.stale == 'true'
|
||||
run: exit 1
|
||||
@@ -1,6 +1,13 @@
|
||||
name: Nix Lockfile Fix
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'web/package-lock.json'
|
||||
- 'web/package.json'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr_number:
|
||||
@@ -19,9 +26,105 @@ concurrency:
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# ── Auto-fix on main ───────────────────────────────────────────────
|
||||
# Fires when a push to main touches package.json or package-lock.json
|
||||
# in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
|
||||
# update commit directly to main so Nix builds never stay broken.
|
||||
#
|
||||
# Safety invariants:
|
||||
# 1. The fix commit only touches nix/*.nix files, which are NOT in
|
||||
# the paths filter above, so this cannot re-trigger itself.
|
||||
# 2. An explicit file-whitelist check before commit aborts if
|
||||
# fix-lockfiles ever modifies unexpected files.
|
||||
# 3. Job-level concurrency with cancel-in-progress: true ensures
|
||||
# back-to-back pushes collapse to the newest; ref: main checkout
|
||||
# always operates on the latest branch state.
|
||||
# 4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
|
||||
# triggers downstream nix.yml verification.
|
||||
auto-fix-main:
|
||||
if: github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
concurrency:
|
||||
group: auto-fix-main
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00 # v1.9.3
|
||||
with:
|
||||
app-id: ${{ secrets.APP_ID }}
|
||||
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure only nix files were modified — prevents accidental
|
||||
# self-triggering if fix-lockfiles ever touches package files.
|
||||
unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
|
||||
if [ -n "$unexpected" ]; then
|
||||
echo "::error::Unexpected modified files: $unexpected"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Record the base SHA before committing — used to detect package
|
||||
# file changes if we need to rebase after a non-fast-forward push.
|
||||
BASE_SHA="$(git rev-parse HEAD)"
|
||||
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
|
||||
-m "Source: $GITHUB_SHA" \
|
||||
-m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
|
||||
|
||||
# Retry push with rebase in case main advanced with an unrelated
|
||||
# commit during the nix build. Without this, a non-fast-forward
|
||||
# rejection silently loses the fix. If package files changed during
|
||||
# the rebase, abort — a fresh auto-fix run will handle the new state.
|
||||
for attempt in 1 2 3; do
|
||||
if git push origin HEAD:main; then
|
||||
exit 0
|
||||
fi
|
||||
echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
|
||||
git fetch origin main
|
||||
|
||||
# If package files changed between our base and the new main,
|
||||
# our computed hashes are stale. Abort and let the next triggered
|
||||
# run recompute from the correct package-lock state.
|
||||
pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
|
||||
'ui-tui/package-lock.json' 'ui-tui/package.json' \
|
||||
'web/package-lock.json' 'web/package.json' || true)"
|
||||
if [ -n "$pkg_changed" ]; then
|
||||
echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git rebase origin/main
|
||||
done
|
||||
echo "::error::Failed to push after 3 rebase attempts"
|
||||
exit 1
|
||||
|
||||
# ── PR fix (manual / checkbox) ─────────────────────────────────────
|
||||
# Existing behavior: run on manual dispatch OR when a task-list
|
||||
# checkbox in the sticky lockfile-check comment flips from [ ] to [x].
|
||||
fix:
|
||||
# Run on manual dispatch OR when a task-list checkbox in the sticky
|
||||
# lockfile-check comment flips from `[ ]` to `[x]`.
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'issue_comment'
|
||||
@@ -99,10 +202,12 @@ jobs:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
run: nix run .#fix-lockfiles
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
|
||||
@@ -7,6 +7,7 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: nix-${{ github.ref }}
|
||||
@@ -22,12 +23,95 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: ./.github/actions/nix-setup
|
||||
with:
|
||||
cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
|
||||
|
||||
- name: Resolve head SHA
|
||||
if: github.event_name == 'pull_request'
|
||||
id: sha
|
||||
shell: bash
|
||||
run: |
|
||||
FULL="${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
echo "full=$FULL" >> "$GITHUB_OUTPUT"
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Check flake
|
||||
id: flake
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix flake check --print-build-logs
|
||||
|
||||
- name: Build package
|
||||
id: build
|
||||
if: runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
run: nix build --print-build-logs
|
||||
|
||||
# When the real Nix build fails, run a targeted diagnostic to see if
|
||||
# the failure is specifically a stale npm lockfile hash in one of the
|
||||
# known npm subpackages (tui / web). This avoids surfacing a generic
|
||||
# "build failed" message when the fix is a single known command.
|
||||
- name: Diagnose npm lockfile hashes
|
||||
id: hash_check
|
||||
if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
|
||||
continue-on-error: true
|
||||
env:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
# If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
|
||||
# etc.) it won't set stale=true/false. Treat that as a distinct failure
|
||||
# mode rather than silently ignoring it.
|
||||
- name: Fail if hash check crashed without reporting
|
||||
if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
|
||||
run: |
|
||||
echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
|
||||
exit 1
|
||||
|
||||
- name: Post sticky PR comment (stale hashes)
|
||||
if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
message: |
|
||||
### ⚠️ npm lockfile hash out of date
|
||||
|
||||
Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
|
||||
|
||||
The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
|
||||
|
||||
${{ steps.hash_check.outputs.report }}
|
||||
|
||||
#### Apply the fix
|
||||
|
||||
- [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
|
||||
- Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
|
||||
- Or locally: `nix run .#fix-lockfiles` and commit the diff
|
||||
|
||||
# Clear the sticky comment when either the build passed outright (no
|
||||
# hash check needed) or the hash check explicitly returned stale=false
|
||||
# (build failed for a non-hash reason).
|
||||
- name: Clear sticky PR comment (resolved)
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
runner.os == 'Linux' &&
|
||||
(steps.hash_check.outputs.stale == 'false' ||
|
||||
(steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
with:
|
||||
header: nix-lockfile-check
|
||||
delete: true
|
||||
|
||||
- name: Final fail if build or flake failed
|
||||
if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
|
||||
run: |
|
||||
if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
|
||||
echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
|
||||
else
|
||||
echo "::error::Nix build/flake check failed. See logs above."
|
||||
fi
|
||||
exit 1
|
||||
|
||||
- name: Evaluate flake (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
run: nix flake show --json > /dev/null
|
||||
|
||||
@@ -38,7 +38,7 @@ hermes-agent/
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
|
||||
│ └── builtin_hooks/ # Extension point for always-registered gateway hooks (none shipped)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
|
||||
+1
-1
@@ -494,7 +494,7 @@ branding:
|
||||
agent_name: "My Agent"
|
||||
welcome: "Welcome message"
|
||||
response_label: " ⚔ Agent "
|
||||
prompt_symbol: "⚔ ❯ "
|
||||
prompt_symbol: "⚔"
|
||||
|
||||
tool_prefix: "╎" # Tool output line prefix
|
||||
```
|
||||
|
||||
+9
-2
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
@@ -45,7 +45,14 @@ COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build
|
||||
cd ../ui-tui && npm run build && \
|
||||
rm -rf node_modules/@hermes/ink && \
|
||||
rm -rf packages/hermes-ink/node_modules && \
|
||||
cp -R packages/hermes-ink node_modules/@hermes/ink && \
|
||||
npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
|
||||
rm -rf node_modules/@hermes/ink/node_modules/react && \
|
||||
node --input-type=module -e "await import('@hermes/ink')" && \
|
||||
touch .hermes-prebuilt-tui
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
|
||||
@@ -112,6 +112,17 @@ def main() -> None:
|
||||
import acp
|
||||
from .server import HermesACPAgent
|
||||
|
||||
# MCP tool discovery from config.yaml — run before asyncio.run() so
|
||||
# it's safe to use blocking waits. (ACP also registers per-session
|
||||
# MCP servers dynamically via asyncio.to_thread inside the event
|
||||
# loop; that path is unaffected.) Moved from model_tools.py module
|
||||
# scope to avoid freezing the gateway's loop on lazy import (#16856).
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
|
||||
|
||||
+172
-7
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
@@ -12,6 +13,7 @@ from typing import Any, Deque, Optional
|
||||
import acp
|
||||
from acp.schema import (
|
||||
AgentCapabilities,
|
||||
AgentMessageChunk,
|
||||
AuthenticateResponse,
|
||||
AvailableCommand,
|
||||
AvailableCommandsUpdate,
|
||||
@@ -29,6 +31,7 @@ from acp.schema import (
|
||||
McpServerStdio,
|
||||
ModelInfo,
|
||||
NewSessionResponse,
|
||||
PromptCapabilities,
|
||||
PromptResponse,
|
||||
ResumeSessionResponse,
|
||||
SetSessionConfigOptionResponse,
|
||||
@@ -44,6 +47,7 @@ from acp.schema import (
|
||||
TextContentBlock,
|
||||
UnstructuredCommandInput,
|
||||
Usage,
|
||||
UserMessageChunk,
|
||||
)
|
||||
|
||||
# AuthMethodAgent was renamed from AuthMethod in agent-client-protocol 0.9.0
|
||||
@@ -87,17 +91,69 @@ def _extract_text(
|
||||
| EmbeddedResourceContentBlock
|
||||
],
|
||||
) -> str:
|
||||
"""Extract plain text from ACP content blocks."""
|
||||
"""Extract plain text from ACP content blocks for display/commands."""
|
||||
parts: list[str] = []
|
||||
for block in prompt:
|
||||
if isinstance(block, TextContentBlock):
|
||||
parts.append(block.text)
|
||||
elif hasattr(block, "text"):
|
||||
parts.append(str(block.text))
|
||||
# Non-text blocks are ignored for now.
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _image_block_to_openai_part(block: ImageContentBlock) -> dict[str, Any] | None:
|
||||
"""Convert an ACP image content block to OpenAI-style multimodal content."""
|
||||
data = str(getattr(block, "data", "") or "").strip()
|
||||
uri = str(getattr(block, "uri", "") or "").strip()
|
||||
mime_type = str(getattr(block, "mime_type", "") or "image/png").strip() or "image/png"
|
||||
|
||||
if data:
|
||||
url = data if data.startswith("data:") else f"data:{mime_type};base64,{data}"
|
||||
elif uri:
|
||||
url = uri
|
||||
else:
|
||||
return None
|
||||
|
||||
return {"type": "image_url", "image_url": {"url": url}}
|
||||
|
||||
|
||||
def _content_blocks_to_openai_user_content(
|
||||
prompt: list[
|
||||
TextContentBlock
|
||||
| ImageContentBlock
|
||||
| AudioContentBlock
|
||||
| ResourceContentBlock
|
||||
| EmbeddedResourceContentBlock
|
||||
],
|
||||
) -> str | list[dict[str, Any]]:
|
||||
"""Convert ACP prompt blocks into a Hermes/OpenAI-compatible user content payload."""
|
||||
parts: list[dict[str, Any]] = []
|
||||
text_parts: list[str] = []
|
||||
|
||||
for block in prompt:
|
||||
if isinstance(block, TextContentBlock):
|
||||
if block.text:
|
||||
parts.append({"type": "text", "text": block.text})
|
||||
text_parts.append(block.text)
|
||||
continue
|
||||
if isinstance(block, ImageContentBlock):
|
||||
image_part = _image_block_to_openai_part(block)
|
||||
if image_part is not None:
|
||||
parts.append(image_part)
|
||||
continue
|
||||
|
||||
if not parts:
|
||||
return _extract_text(prompt)
|
||||
|
||||
# Keep pure text prompts as strings so slash-command handling and text-only
|
||||
# providers keep the exact legacy path. Switch to structured content only
|
||||
# when an actual non-text block is present.
|
||||
if all(part.get("type") == "text" for part in parts):
|
||||
return "\n".join(text_parts)
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
class HermesACPAgent(acp.Agent):
|
||||
"""ACP Agent implementation wrapping Hermes AIAgent."""
|
||||
|
||||
@@ -351,6 +407,7 @@ class HermesACPAgent(acp.Agent):
|
||||
agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
|
||||
agent_capabilities=AgentCapabilities(
|
||||
load_session=True,
|
||||
prompt_capabilities=PromptCapabilities(image=True),
|
||||
session_capabilities=SessionCapabilities(
|
||||
fork=SessionForkCapabilities(),
|
||||
list=SessionListCapabilities(),
|
||||
@@ -376,6 +433,78 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
# ---- Session management -------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _history_message_text(message: dict[str, Any]) -> str:
|
||||
"""Extract displayable text from a persisted OpenAI-style message."""
|
||||
content = message.get("content")
|
||||
if isinstance(content, str):
|
||||
return content.strip()
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text")
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
elif item.get("type") == "text" and isinstance(item.get("content"), str):
|
||||
parts.append(item["content"])
|
||||
elif isinstance(item, str):
|
||||
parts.append(item)
|
||||
return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _history_message_update(
|
||||
*,
|
||||
role: str,
|
||||
text: str,
|
||||
) -> UserMessageChunk | AgentMessageChunk | None:
|
||||
"""Build an ACP history replay update for a user/assistant message."""
|
||||
block = TextContentBlock(type="text", text=text)
|
||||
if role == "user":
|
||||
return UserMessageChunk(
|
||||
session_update="user_message_chunk",
|
||||
content=block,
|
||||
)
|
||||
if role == "assistant":
|
||||
return AgentMessageChunk(
|
||||
session_update="agent_message_chunk",
|
||||
content=block,
|
||||
)
|
||||
return None
|
||||
|
||||
async def _replay_session_history(self, state: SessionState) -> None:
|
||||
"""Send persisted user/assistant history to clients during session/load.
|
||||
|
||||
Zed's ACP history UI calls ``session/load`` after the user picks an item
|
||||
from the Agents sidebar. The agent must then replay the full conversation
|
||||
as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
|
||||
restoring server-side state makes Hermes remember context, but leaves the
|
||||
editor looking like a clean thread.
|
||||
"""
|
||||
if not self._conn or not state.history:
|
||||
return
|
||||
|
||||
for message in state.history:
|
||||
role = str(message.get("role") or "")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
text = self._history_message_text(message)
|
||||
if not text:
|
||||
continue
|
||||
update = self._history_message_update(role=role, text=text)
|
||||
if update is None:
|
||||
continue
|
||||
try:
|
||||
await self._conn.session_update(session_id=state.session_id, update=update)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to replay ACP history for session %s",
|
||||
state.session_id,
|
||||
exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
async def new_session(
|
||||
self,
|
||||
cwd: str,
|
||||
@@ -404,6 +533,7 @@ class HermesACPAgent(acp.Agent):
|
||||
return None
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Loaded session %s", session_id)
|
||||
await self._replay_session_history(state)
|
||||
self._schedule_available_commands_update(session_id)
|
||||
return LoadSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
@@ -420,6 +550,7 @@ class HermesACPAgent(acp.Agent):
|
||||
state = self.session_manager.create_session(cwd=cwd)
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Resumed session %s", state.session_id)
|
||||
await self._replay_session_history(state)
|
||||
self._schedule_available_commands_update(state.session_id)
|
||||
return ResumeSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
@@ -516,11 +647,18 @@ class HermesACPAgent(acp.Agent):
|
||||
return PromptResponse(stop_reason="refusal")
|
||||
|
||||
user_text = _extract_text(prompt).strip()
|
||||
if not user_text:
|
||||
user_content = _content_blocks_to_openai_user_content(prompt)
|
||||
has_content = bool(user_text) or (
|
||||
isinstance(user_content, list) and bool(user_content)
|
||||
)
|
||||
if not has_content:
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
# Intercept slash commands — handle locally without calling the LLM
|
||||
if user_text.startswith("/"):
|
||||
# Intercept slash commands — handle locally without calling the LLM.
|
||||
# Slash commands are text-only; if the client included images/resources,
|
||||
# send the whole multimodal prompt to the agent instead of treating it as
|
||||
# an ACP command.
|
||||
if isinstance(user_content, str) and user_text.startswith("/"):
|
||||
response_text = self._handle_slash_command(user_text, state)
|
||||
if response_text is not None:
|
||||
if self._conn:
|
||||
@@ -574,6 +712,22 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
def _run_agent() -> dict:
|
||||
nonlocal previous_approval_cb, previous_interactive
|
||||
# Bind HERMES_SESSION_KEY for this session so per-session caches
|
||||
# (e.g. the interactive sudo password cache in tools.terminal_tool)
|
||||
# scope to the ACP session rather than leaking across sessions
|
||||
# that land on the same reused executor thread. This call runs
|
||||
# inside a contextvars.copy_context() below, so the ContextVar
|
||||
# write is isolated from other concurrent ACP sessions.
|
||||
try:
|
||||
from gateway.session_context import (
|
||||
clear_session_vars,
|
||||
set_session_vars,
|
||||
)
|
||||
session_tokens = set_session_vars(session_key=session_id)
|
||||
except Exception:
|
||||
session_tokens = None
|
||||
clear_session_vars = None # type: ignore[assignment]
|
||||
logger.debug("Could not set ACP session context", exc_info=True)
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
@@ -587,9 +741,10 @@ class HermesACPAgent(acp.Agent):
|
||||
os.environ["HERMES_INTERACTIVE"] = "1"
|
||||
try:
|
||||
result = agent.run_conversation(
|
||||
user_message=user_text,
|
||||
user_message=user_content,
|
||||
conversation_history=state.history,
|
||||
task_id=session_id,
|
||||
persist_user_message=user_text or "[Image attachment]",
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -607,9 +762,19 @@ class HermesACPAgent(acp.Agent):
|
||||
_terminal_tool.set_approval_callback(previous_approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not restore approval callback", exc_info=True)
|
||||
if session_tokens is not None and clear_session_vars is not None:
|
||||
try:
|
||||
clear_session_vars(session_tokens)
|
||||
except Exception:
|
||||
logger.debug("Could not clear ACP session context", exc_info=True)
|
||||
|
||||
try:
|
||||
result = await loop.run_in_executor(_executor, _run_agent)
|
||||
# Wrap the executor call in a fresh copy of the current context so
|
||||
# concurrent ACP sessions on the shared ThreadPoolExecutor don't
|
||||
# stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
|
||||
# particular — used by the interactive sudo password cache scope).
|
||||
ctx = contextvars.copy_context()
|
||||
result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
|
||||
except Exception:
|
||||
logger.exception("Executor error for session %s", session_id)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
+236
-30
@@ -20,12 +20,27 @@ from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from utils import normalize_proxy_env_vars
|
||||
from utils import base_url_host_matches, normalize_proxy_env_vars
|
||||
|
||||
try:
|
||||
import anthropic as _anthropic_sdk
|
||||
except ImportError:
|
||||
_anthropic_sdk = None # type: ignore[assignment]
|
||||
# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
|
||||
# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
|
||||
# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
|
||||
# read_claude_code_credentials_from_keychain) are all on cold user-triggered
|
||||
# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
|
||||
# the module after the first call and returns None on ImportError.
|
||||
_anthropic_sdk: Any = ... # sentinel — None means "tried and missing"
|
||||
|
||||
|
||||
def _get_anthropic_sdk():
|
||||
"""Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
|
||||
global _anthropic_sdk
|
||||
if _anthropic_sdk is ...:
|
||||
try:
|
||||
import anthropic as _sdk
|
||||
_anthropic_sdk = _sdk
|
||||
except ImportError:
|
||||
_anthropic_sdk = None
|
||||
return _anthropic_sdk
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -202,19 +217,33 @@ def _forbids_sampling_params(model: str) -> bool:
|
||||
|
||||
|
||||
# Beta headers for enhanced features (sent with ALL auth types).
|
||||
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
|
||||
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
|
||||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||||
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
|
||||
# that still gate on the headers continue to get the enhanced features.
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models.
|
||||
#
|
||||
# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
|
||||
# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
|
||||
# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
|
||||
# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
|
||||
# at 200K even though model_metadata.py advertises 1M. The header is a harmless
|
||||
# no-op on endpoints where 1M is GA.
|
||||
#
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models or once
|
||||
# Bedrock/Azure promote 1M to GA.
|
||||
_COMMON_BETAS = [
|
||||
"interleaved-thinking-2025-05-14",
|
||||
"fine-grained-tool-streaming-2025-05-14",
|
||||
"context-1m-2025-08-07",
|
||||
]
|
||||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||||
# fall back to the provider's default response path.
|
||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||
# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
|
||||
# Bearer-auth (MiniMax) endpoints since they host their own models and
|
||||
# unknown Anthropic beta headers risk request rejection.
|
||||
_CONTEXT_1M_BETA = "context-1m-2025-08-07"
|
||||
|
||||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||||
@@ -336,6 +365,88 @@ def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
|
||||
return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
|
||||
|
||||
|
||||
# Model-name prefixes that identify the Kimi / Moonshot family. Covers
|
||||
# - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
|
||||
# - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
|
||||
# Matched case-insensitively against the post-``normalize_model_name`` form,
|
||||
# so a caller's ``provider/vendor/model`` slug is handled the same as a
|
||||
# bare name.
|
||||
_KIMI_FAMILY_MODEL_PREFIXES = (
|
||||
"kimi-", "kimi_",
|
||||
"moonshot-", "moonshot_",
|
||||
"k1.", "k1-",
|
||||
"k2.", "k2-",
|
||||
"k25", "k2.5",
|
||||
)
|
||||
|
||||
|
||||
def _model_name_is_kimi_family(model: str | None) -> bool:
|
||||
if not isinstance(model, str):
|
||||
return False
|
||||
m = model.strip().lower()
|
||||
if not m:
|
||||
return False
|
||||
# Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
|
||||
if "/" in m:
|
||||
m = m.rsplit("/", 1)[-1]
|
||||
return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
|
||||
|
||||
|
||||
def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
|
||||
"""Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
|
||||
|
||||
Broader than ``_is_kimi_coding_endpoint`` — matches:
|
||||
|
||||
- Kimi's official ``/coding`` URL (legacy check, preserved)
|
||||
- Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
|
||||
- Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
|
||||
family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …). Users with
|
||||
``api_mode: anthropic_messages`` on a private gateway fronting Kimi
|
||||
fall into this branch — the upstream still enforces Kimi's thinking
|
||||
semantics (reasoning_content required on every replayed tool-call
|
||||
message) regardless of the gateway's hostname.
|
||||
|
||||
Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
|
||||
preserve unsigned reasoning_content-derived thinking blocks on replay.
|
||||
See hermes-agent#13848, #17057.
|
||||
"""
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
return True
|
||||
for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
|
||||
if base_url_host_matches(base_url or "", _domain):
|
||||
return True
|
||||
if _model_name_is_kimi_family(model):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
|
||||
"""Return True for DeepSeek's Anthropic-compatible endpoint.
|
||||
|
||||
DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
|
||||
but, when thinking mode is enabled, requires the ``thinking`` blocks
|
||||
from prior assistant turns to round-trip on subsequent requests — the
|
||||
generic third-party path strips them and triggers HTTP 400::
|
||||
|
||||
The content[].thinking in the thinking mode must be passed back
|
||||
to the API.
|
||||
|
||||
Per DeepSeek's published compatibility matrix the blocks are unsigned
|
||||
(no Anthropic-proprietary signature, no ``redacted_thinking`` support),
|
||||
so this endpoint is handled with the same strip-signed / keep-unsigned
|
||||
policy used for Kimi's ``/coding`` endpoint. The match is pinned to
|
||||
the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
|
||||
base URL (which never reaches this adapter) is not misclassified.
|
||||
See hermes-agent#16748.
|
||||
"""
|
||||
if not base_url_host_matches(base_url or "", "api.deepseek.com"):
|
||||
return False
|
||||
normalized = _normalize_base_url_text(base_url)
|
||||
if not normalized:
|
||||
return False
|
||||
return "/anthropic" in normalized.rstrip("/").lower()
|
||||
|
||||
|
||||
def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
"""Return True for Anthropic-compatible providers that require Bearer auth.
|
||||
|
||||
@@ -350,20 +461,45 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
|
||||
return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
|
||||
|
||||
|
||||
def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
def _common_betas_for_base_url(
|
||||
base_url: str | None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
) -> list[str]:
|
||||
"""Return the beta headers that are safe for the configured endpoint.
|
||||
|
||||
MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
|
||||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||||
tool-use message triggers a connection error. Strip that beta for
|
||||
Bearer-auth endpoints while keeping all other betas intact.
|
||||
|
||||
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
|
||||
endpoints — MiniMax hosts its own models, not Claude, so the header is
|
||||
irrelevant at best and risks request rejection at worst.
|
||||
|
||||
``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
|
||||
otherwise-unrelated endpoints. The OAuth retry path flips this flag after
|
||||
a subscription rejects the beta with
|
||||
"The long context beta is not yet available for this subscription" so
|
||||
subsequent requests in the same session don't repeat the probe. See the
|
||||
reactive recovery loop in ``run_agent.py`` and issue-comment history on
|
||||
PR #17680 for the full rationale.
|
||||
"""
|
||||
if _requires_bearer_auth(base_url):
|
||||
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
|
||||
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
|
||||
return [b for b in _COMMON_BETAS if b not in _stripped]
|
||||
if drop_context_1m_beta:
|
||||
return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
|
||||
def build_anthropic_client(
|
||||
api_key: str,
|
||||
base_url: str = None,
|
||||
timeout: float = None,
|
||||
*,
|
||||
drop_context_1m_beta: bool = False,
|
||||
):
|
||||
"""Create an Anthropic client, auto-detecting setup-tokens vs API keys.
|
||||
|
||||
If *timeout* is provided it overrides the default 900s read timeout. The
|
||||
@@ -372,8 +508,15 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
Anthropic-compatible providers respect the same knob as OpenAI-wire
|
||||
providers.
|
||||
|
||||
``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
|
||||
client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
|
||||
path in ``run_agent.py`` when a subscription rejects the beta; leave at
|
||||
its default on fresh clients so 1M-capable subscriptions keep the
|
||||
capability.
|
||||
|
||||
Returns an anthropic.Anthropic instance.
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for the Anthropic provider. "
|
||||
@@ -400,7 +543,10 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
common_betas = _common_betas_for_base_url(
|
||||
normalized_base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
# Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
|
||||
@@ -456,8 +602,16 @@ def build_anthropic_bedrock_client(region: str):
|
||||
Claude feature parity: prompt caching, thinking budgets, adaptive
|
||||
thinking, fast mode — features not available via the Converse API.
|
||||
|
||||
Attaches the common Anthropic beta headers as client-level defaults so
|
||||
that Bedrock-hosted Claude models get the same enhanced features as
|
||||
native Anthropic. The ``context-1m-2025-08-07`` beta in particular
|
||||
unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
|
||||
it, Bedrock caps these models at 200K even though the Anthropic API
|
||||
serves them with 1M natively.
|
||||
|
||||
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for the Bedrock provider. "
|
||||
@@ -473,6 +627,7 @@ def build_anthropic_bedrock_client(region: str):
|
||||
return _anthropic_sdk.AnthropicBedrock(
|
||||
aws_region=region,
|
||||
timeout=Timeout(timeout=900.0, connect=10.0),
|
||||
default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
|
||||
)
|
||||
|
||||
|
||||
@@ -488,9 +643,6 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
if platform.system() != "Darwin":
|
||||
return None
|
||||
|
||||
@@ -1035,9 +1187,12 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
# These must not be converted to hyphens. See issue #12295.
|
||||
if _is_bedrock_model_id(model):
|
||||
return model
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
model = model.replace(".", "-")
|
||||
# Only convert dots to hyphens for Anthropic/Claude models.
|
||||
# Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
|
||||
# as part of their canonical names. See issue #17171.
|
||||
_lower = model.lower()
|
||||
if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
|
||||
model = model.replace(".", "-")
|
||||
return model
|
||||
|
||||
|
||||
@@ -1054,6 +1209,33 @@ def _sanitize_tool_id(tool_id: str) -> str:
|
||||
return sanitized or "tool_0"
|
||||
|
||||
|
||||
def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool schemas before sending them to Anthropic.
|
||||
|
||||
Anthropic's tool schema validator rejects nullable unions such as
|
||||
``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
|
||||
commonly emits for optional fields. Tool optionality is represented by
|
||||
the parent ``required`` array, so we delegate to the shared
|
||||
``strip_nullable_unions`` helper to collapse nullable unions to the
|
||||
non-null branch while preserving metadata like description/default.
|
||||
|
||||
``keep_nullable_hint=False`` because the Anthropic validator does not
|
||||
recognize the OpenAPI-style ``nullable: true`` extension and strict
|
||||
schema-to-grammar converters may reject unknown keywords.
|
||||
"""
|
||||
if not schema:
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
from tools.schema_sanitizer import strip_nullable_unions
|
||||
|
||||
normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
|
||||
if not isinstance(normalized, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
|
||||
normalized = {**normalized, "properties": {}}
|
||||
return normalized
|
||||
|
||||
|
||||
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
"""Convert OpenAI tool definitions to Anthropic format."""
|
||||
if not tools:
|
||||
@@ -1064,7 +1246,9 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
result.append({
|
||||
"name": fn.get("name", ""),
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
|
||||
"input_schema": _normalize_tool_input_schema(
|
||||
fn.get("parameters", {"type": "object", "properties": {}})
|
||||
),
|
||||
})
|
||||
return result
|
||||
|
||||
@@ -1195,6 +1379,7 @@ def _convert_content_to_anthropic(content: Any) -> Any:
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
@@ -1206,6 +1391,12 @@ def convert_messages_to_anthropic(
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
@@ -1434,7 +1625,16 @@ def convert_messages_to_anthropic(
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
_is_kimi = _is_kimi_coding_endpoint(base_url)
|
||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
||||
_preserve_unsigned_thinking = (
|
||||
_is_kimi_family_endpoint(base_url, model)
|
||||
or _is_deepseek_anthropic_endpoint(base_url)
|
||||
)
|
||||
|
||||
last_assistant_idx = None
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
@@ -1446,22 +1646,22 @@ def convert_messages_to_anthropic(
|
||||
if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
|
||||
continue
|
||||
|
||||
if _is_kimi:
|
||||
# Kimi's /coding endpoint enables thinking server-side and
|
||||
# requires unsigned thinking blocks on replayed assistant
|
||||
# tool-call messages. Strip signed Anthropic blocks (Kimi
|
||||
# can't validate signatures) but preserve the unsigned ones
|
||||
# we synthesised from reasoning_content above.
|
||||
if _preserve_unsigned_thinking:
|
||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
||||
# thinking server-side and require unsigned thinking blocks on
|
||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
||||
# blocks (neither upstream can validate Anthropic signatures) but
|
||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Anthropic-signed block — Kimi can't validate, strip
|
||||
# Anthropic-signed block — upstream can't validate, strip
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: Kimi needs it for message-history validation.
|
||||
# keep it: the upstream needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
@@ -1518,6 +1718,7 @@ def build_anthropic_kwargs(
|
||||
context_length: Optional[int] = None,
|
||||
base_url: str | None = None,
|
||||
fast_mode: bool = False,
|
||||
drop_context_1m_beta: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create().
|
||||
|
||||
@@ -1557,7 +1758,9 @@ def build_anthropic_kwargs(
|
||||
Currently only supported on native Anthropic endpoints (not third-party
|
||||
compatible ones).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
system, anthropic_messages = convert_messages_to_anthropic(
|
||||
messages, base_url=base_url, model=model
|
||||
)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
@@ -1663,7 +1866,7 @@ def build_anthropic_kwargs(
|
||||
# silently hides reasoning text that Hermes surfaces in its CLI. We
|
||||
# request "summarized" so the reasoning blocks stay populated — matching
|
||||
# 4.6 behavior and preserving the activity-feed UX during long tool runs.
|
||||
_is_kimi_coding = _is_kimi_coding_endpoint(base_url)
|
||||
_is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
|
||||
if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
|
||||
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
|
||||
effort = str(reasoning_config.get("effort", "medium")).lower()
|
||||
@@ -1704,7 +1907,10 @@ def build_anthropic_kwargs(
|
||||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
betas = list(_common_betas_for_base_url(
|
||||
base_url,
|
||||
drop_context_1m_beta=drop_context_1m_beta,
|
||||
))
|
||||
if is_oauth:
|
||||
betas.extend(_OAUTH_ONLY_BETAS)
|
||||
betas.append(_FAST_MODE_BETA)
|
||||
|
||||
+378
-55
@@ -5,11 +5,11 @@ session search, web extraction, vision analysis, browser vision) picks up
|
||||
the best available backend without duplicating fallback logic.
|
||||
|
||||
Resolution order for text tasks (auto mode):
|
||||
1. OpenRouter (OPENROUTER_API_KEY)
|
||||
2. Nous Portal (~/.hermes/auth.json active provider)
|
||||
3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
|
||||
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
|
||||
wrapped to look like a chat.completions client)
|
||||
1. User's main provider + main model (used regardless of provider type —
|
||||
aggregators, direct API-key providers, native Anthropic, Codex, etc.)
|
||||
2. OpenRouter (OPENROUTER_API_KEY)
|
||||
3. Nous Portal (~/.hermes/auth.json active provider)
|
||||
4. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
|
||||
5. Native Anthropic
|
||||
6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
|
||||
7. None
|
||||
@@ -18,10 +18,16 @@ Resolution order for vision/multimodal tasks (auto mode):
|
||||
1. Selected main provider, if it is one of the supported vision backends below
|
||||
2. OpenRouter
|
||||
3. Nous Portal
|
||||
4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
|
||||
5. Native Anthropic
|
||||
6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
|
||||
7. None
|
||||
4. Native Anthropic
|
||||
5. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
|
||||
6. None
|
||||
|
||||
Codex OAuth (ChatGPT-account auth) is intentionally NOT in either
|
||||
fallback chain: OpenAI gates this endpoint behind an undocumented,
|
||||
shifting model allow-list, so "just try Codex with a hardcoded model"
|
||||
rots on its own. Codex is used only when the user's main provider *is*
|
||||
openai-codex (Step 1 above) or when a caller explicitly requests it with
|
||||
a model (auxiliary.<task>.provider + auxiliary.<task>.model).
|
||||
|
||||
Per-task overrides are configured in config.yaml under the ``auxiliary:`` section
|
||||
(e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
|
||||
@@ -41,10 +47,57 @@ import threading
|
||||
import time
|
||||
from pathlib import Path # noqa: F401 — used by test mocks
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
from openai import OpenAI
|
||||
# NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
|
||||
# openai SDK pulls a large type tree (~240 ms cold, including responses/*,
|
||||
# graders/*). We expose `OpenAI` here as a thin proxy that imports the SDK on
|
||||
# first call and forwards, so:
|
||||
# (a) the 15+ in-module `OpenAI(...)` construction sites work unchanged
|
||||
# (Python's function-scope name lookup resolves `OpenAI` to the proxy
|
||||
# object bound in module globals here, without triggering any import);
|
||||
# (b) external code can still do `auxiliary_client.OpenAI` or
|
||||
# `patch("agent.auxiliary_client.OpenAI", ...)` — tests see the proxy,
|
||||
# and patch replaces the module attribute as usual;
|
||||
# (c) `OpenAI` as a type annotation resolves at runtime to the proxy class
|
||||
# (which is harmless — annotations aren't type-checked at runtime).
|
||||
# See tests/agent/test_auxiliary_client.py for patch patterns this supports.
|
||||
if TYPE_CHECKING:
|
||||
from openai import OpenAI # noqa: F401 — type hints only
|
||||
|
||||
_OPENAI_CLS_CACHE: Optional[type] = None
|
||||
|
||||
|
||||
def _load_openai_cls() -> type:
|
||||
"""Import and cache ``openai.OpenAI``."""
|
||||
global _OPENAI_CLS_CACHE
|
||||
if _OPENAI_CLS_CACHE is None:
|
||||
from openai import OpenAI as _cls
|
||||
_OPENAI_CLS_CACHE = _cls
|
||||
return _OPENAI_CLS_CACHE
|
||||
|
||||
|
||||
class _OpenAIProxy:
|
||||
"""Module-level proxy that looks like the ``openai.OpenAI`` class.
|
||||
|
||||
Forwards ``OpenAI(...)`` calls and ``isinstance(x, OpenAI)`` checks to the
|
||||
real SDK class, importing the SDK lazily on first use.
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return _load_openai_cls()(*args, **kwargs)
|
||||
|
||||
def __instancecheck__(self, obj):
|
||||
return isinstance(obj, _load_openai_cls())
|
||||
|
||||
def __repr__(self):
|
||||
return "<lazy openai.OpenAI proxy>"
|
||||
|
||||
|
||||
OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
from hermes_cli.config import get_hermes_home
|
||||
@@ -54,6 +107,14 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
|
||||
"""Return False instead of raising when a patched symbol is not a type."""
|
||||
try:
|
||||
return isinstance(obj, maybe_type)
|
||||
except TypeError:
|
||||
return False
|
||||
|
||||
|
||||
def _extract_url_query_params(url: str):
|
||||
"""Extract query params from URL, return (clean_url, default_query dict or None)."""
|
||||
parsed = urlparse(url)
|
||||
@@ -94,6 +155,10 @@ _PROVIDER_ALIASES = {
|
||||
"github-models": "copilot",
|
||||
"github-copilot-acp": "copilot-acp",
|
||||
"copilot-acp-agent": "copilot-acp",
|
||||
"tencent": "tencent-tokenhub",
|
||||
"tokenhub": "tencent-tokenhub",
|
||||
"tencent-cloud": "tencent-tokenhub",
|
||||
"tencentmaas": "tencent-tokenhub",
|
||||
}
|
||||
|
||||
|
||||
@@ -159,6 +224,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"kimi-coding-cn": "kimi-k2-turbo-preview",
|
||||
"gmi": "google/gemini-3.1-flash-lite-preview",
|
||||
"minimax": "MiniMax-M2.7",
|
||||
"minimax-oauth": "MiniMax-M2.7-highspeed",
|
||||
"minimax-cn": "MiniMax-M2.7",
|
||||
"anthropic": "claude-haiku-4-5-20251001",
|
||||
"ai-gateway": "google/gemini-3-flash",
|
||||
@@ -166,6 +232,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"opencode-go": "glm-5",
|
||||
"kilocode": "google/gemini-3-flash-preview",
|
||||
"ollama-cloud": "nemotron-3-nano:30b",
|
||||
"tencent-tokenhub": "hy3-preview",
|
||||
}
|
||||
|
||||
# Vision-specific model overrides for direct providers.
|
||||
@@ -177,6 +244,21 @@ _PROVIDER_VISION_MODELS: Dict[str, str] = {
|
||||
"zai": "glm-5v-turbo",
|
||||
}
|
||||
|
||||
# Providers whose endpoint does not accept image input, even though the
|
||||
# provider's broader ecosystem has vision models available elsewhere. When
|
||||
# `auxiliary.vision.provider: auto` sees one of these as the main provider,
|
||||
# it must skip straight to the aggregator chain instead of returning a client
|
||||
# that will 404 on every vision request.
|
||||
#
|
||||
# kimi-coding / kimi-coding-cn: the Kimi Coding Plan routes through
|
||||
# api.kimi.com/coding (Anthropic Messages wire) which Kimi's own docs
|
||||
# describe as having no image_in capability. Vision lives on the separate
|
||||
# Kimi Platform (api.moonshot.ai, OpenAI-wire, pay-as-you-go). See #17076.
|
||||
_PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
|
||||
"kimi-coding",
|
||||
"kimi-coding-cn",
|
||||
})
|
||||
|
||||
# OpenRouter app attribution headers
|
||||
_OR_HEADERS = {
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
@@ -209,12 +291,14 @@ _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
|
||||
_AUTH_JSON_PATH = get_hermes_home() / "auth.json"
|
||||
|
||||
# Codex fallback: uses the Responses API (the only endpoint the Codex
|
||||
# OAuth token can access) with a fast model for auxiliary tasks.
|
||||
# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these
|
||||
# auxiliary flows, while gpt-5.2-codex remains broadly available and supports
|
||||
# vision via Responses.
|
||||
_CODEX_AUX_MODEL = "gpt-5.2-codex"
|
||||
# Codex OAuth endpoint used when a caller explicitly requests
|
||||
# provider="openai-codex". There is deliberately no hardcoded default
|
||||
# model: the set of models OpenAI accepts on this endpoint for
|
||||
# ChatGPT-account auth is an undocumented, shifting allow-list, and
|
||||
# pinning one here has drifted silently twice (gpt-5.3-codex → gpt-5.2-codex
|
||||
# → gpt-5.4 over 6 weeks in early 2026). Callers must pass the model
|
||||
# they want explicitly (from config.yaml model.model, auxiliary.<task>.model,
|
||||
# or the user's active Codex model selection).
|
||||
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
|
||||
@@ -271,6 +355,13 @@ def _to_openai_base_url(base_url: str) -> str:
|
||||
rewritten = url[: -len("/anthropic")] + "/v1"
|
||||
logger.debug("Auxiliary client: rewrote base URL %s → %s", url, rewritten)
|
||||
return rewritten
|
||||
if "api.kimi.com" in url and url.endswith("/coding"):
|
||||
# Kimi Code uses /coding/v1/messages for Anthropic SDK (appends /v1/messages)
|
||||
# but /coding/v1/chat/completions for OpenAI SDK (appends /chat/completions)
|
||||
# Without /v1 here, OpenAI SDK hits /coding/chat/completions — a 404.
|
||||
rewritten = url + "/v1"
|
||||
logger.debug("Auxiliary client: rewrote Kimi base URL %s → %s", url, rewritten)
|
||||
return rewritten
|
||||
return url
|
||||
|
||||
|
||||
@@ -405,6 +496,33 @@ class _CodexCompletionsAdapter:
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Translate extra_body.reasoning (chat.completions shape) into the
|
||||
# Responses API's top-level reasoning + include fields. Mirrors
|
||||
# agent/transports/codex.py::build_kwargs() so auxiliary callers
|
||||
# that configure reasoning via auxiliary.<task>.extra_body get the
|
||||
# same behavior as the main agent's Codex transport.
|
||||
extra_body = kwargs.get("extra_body") or {}
|
||||
if isinstance(extra_body, dict):
|
||||
reasoning_cfg = extra_body.get("reasoning")
|
||||
if isinstance(reasoning_cfg, dict):
|
||||
if reasoning_cfg.get("enabled") is False:
|
||||
# Reasoning explicitly disabled — do not set reasoning
|
||||
# or include. The Codex backend still thinks by
|
||||
# default, but we honor the caller's intent where the
|
||||
# API allows it.
|
||||
pass
|
||||
else:
|
||||
effort = reasoning_cfg.get("effort", "medium")
|
||||
# Codex backend rejects "minimal"; clamp to "low" to
|
||||
# match the main-agent Codex transport behavior.
|
||||
if effort == "minimal":
|
||||
effort = "low"
|
||||
resp_kwargs["reasoning"] = {
|
||||
"effort": effort,
|
||||
"summary": "auto",
|
||||
}
|
||||
resp_kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
|
||||
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
@@ -714,6 +832,116 @@ class AsyncAnthropicAuxiliaryClient:
|
||||
self.base_url = sync_wrapper.base_url
|
||||
|
||||
|
||||
def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
|
||||
"""True if the endpoint at ``base_url`` speaks the Anthropic Messages
|
||||
protocol instead of OpenAI chat.completions.
|
||||
|
||||
Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
|
||||
auxiliary client and the main agent stay in sync on transport selection.
|
||||
Covers:
|
||||
|
||||
- Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
|
||||
Anthropic-compatible gateways).
|
||||
- ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
|
||||
speaks Claude-Code's native Anthropic shape; ``chat.completions``
|
||||
returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
|
||||
- ``api.anthropic.com`` (native Anthropic).
|
||||
"""
|
||||
normalized = (base_url or "").strip().lower().rstrip("/")
|
||||
if not normalized:
|
||||
return False
|
||||
if normalized.endswith("/anthropic"):
|
||||
return True
|
||||
hostname = base_url_hostname(normalized)
|
||||
if hostname == "api.anthropic.com":
|
||||
return True
|
||||
if hostname == "api.kimi.com" and "/coding" in normalized:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _maybe_wrap_anthropic(
|
||||
client_obj: Any,
|
||||
model: str,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> Any:
|
||||
"""Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
|
||||
the endpoint actually speaks Anthropic Messages.
|
||||
|
||||
This is the single chokepoint for aux-client transport correction.
|
||||
Runs at the end of every ``resolve_provider_client`` branch so that
|
||||
api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
|
||||
future /anthropic gateways all land on the right wire format
|
||||
regardless of which branch built the client.
|
||||
|
||||
Returns ``client_obj`` unchanged when:
|
||||
|
||||
- It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
|
||||
- The endpoint is an OpenAI-wire endpoint.
|
||||
- ``api_mode`` is explicitly set to a non-Anthropic transport.
|
||||
- The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
|
||||
"""
|
||||
# Already wrapped — don't double-wrap.
|
||||
if _safe_isinstance(client_obj, AnthropicAuxiliaryClient):
|
||||
return client_obj
|
||||
# Other specialized adapters we should never re-dispatch.
|
||||
if _safe_isinstance(client_obj, CodexAuxiliaryClient):
|
||||
return client_obj
|
||||
try:
|
||||
from agent.gemini_native_adapter import GeminiNativeClient
|
||||
if _safe_isinstance(client_obj, GeminiNativeClient):
|
||||
return client_obj
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
from agent.copilot_acp_client import CopilotACPClient
|
||||
if _safe_isinstance(client_obj, CopilotACPClient):
|
||||
return client_obj
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Explicit non-anthropic api_mode wins over URL heuristics.
|
||||
if api_mode and api_mode != "anthropic_messages":
|
||||
return client_obj
|
||||
|
||||
should_wrap = (
|
||||
api_mode == "anthropic_messages"
|
||||
or _endpoint_speaks_anthropic_messages(base_url)
|
||||
)
|
||||
if not should_wrap:
|
||||
return client_obj
|
||||
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
|
||||
"not installed — falling back to OpenAI-wire (will likely 404).",
|
||||
base_url,
|
||||
)
|
||||
return client_obj
|
||||
|
||||
try:
|
||||
real_client = build_anthropic_client(api_key, base_url)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Failed to build Anthropic client for %s (%s) — falling back to "
|
||||
"OpenAI-wire client.", base_url, exc,
|
||||
)
|
||||
return client_obj
|
||||
|
||||
logger.debug(
|
||||
"Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
|
||||
"(model=%s, base_url=%s, api_mode=%s)",
|
||||
model, base_url[:60] if base_url else "", api_mode or "auto-detected",
|
||||
)
|
||||
return AnthropicAuxiliaryClient(
|
||||
real_client, model, api_key, base_url, is_oauth=False,
|
||||
)
|
||||
|
||||
|
||||
def _read_nous_auth() -> Optional[dict]:
|
||||
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
|
||||
|
||||
@@ -865,9 +1093,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
if not api_key:
|
||||
continue
|
||||
|
||||
base_url = _to_openai_base_url(
|
||||
_pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
|
||||
)
|
||||
raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
|
||||
base_url = _to_openai_base_url(raw_base_url)
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
|
||||
if model is None:
|
||||
continue # skip provider if we don't know a valid aux model
|
||||
@@ -884,16 +1111,17 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
|
||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
||||
return _client, model
|
||||
|
||||
creds = resolve_api_key_provider_credentials(provider_id)
|
||||
api_key = str(creds.get("api_key", "")).strip()
|
||||
if not api_key:
|
||||
continue
|
||||
|
||||
base_url = _to_openai_base_url(
|
||||
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
||||
)
|
||||
raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
||||
base_url = _to_openai_base_url(raw_base_url)
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
|
||||
if model is None:
|
||||
continue # skip provider if we don't know a valid aux model
|
||||
@@ -910,7 +1138,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
|
||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
|
||||
return _client, model
|
||||
|
||||
return None, None
|
||||
|
||||
@@ -1194,10 +1424,32 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
# URL-based anthropic detection for custom endpoints that didn't set
|
||||
# api_mode explicitly (e.g. kimi.com/coding reached via custom config).
|
||||
_fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
_fallback_client = _maybe_wrap_anthropic(
|
||||
_fallback_client, model, custom_key, custom_base, custom_mode,
|
||||
)
|
||||
return _fallback_client, model
|
||||
|
||||
|
||||
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Build a CodexAuxiliaryClient for an explicitly-requested model.
|
||||
|
||||
There is no auto-selection of the Codex model: the ChatGPT-account
|
||||
Codex endpoint's accepted model list is an undocumented, drifting
|
||||
allow-list, so any hardcoded default we pick goes stale. The caller
|
||||
is responsible for passing the model (e.g. from the user's own
|
||||
``model.model`` or ``auxiliary.<task>.model`` config).
|
||||
|
||||
Returns (None, None) when no Codex OAuth token is available.
|
||||
"""
|
||||
if not model:
|
||||
logger.warning(
|
||||
"Auxiliary client: openai-codex requested without a model; "
|
||||
"pass model explicitly (auxiliary.<task>.model in config.yaml)."
|
||||
)
|
||||
return None, None
|
||||
pool_present, entry = _select_pool_entry("openai-codex")
|
||||
if pool_present:
|
||||
codex_token = _pool_runtime_api_key(entry)
|
||||
@@ -1213,13 +1465,13 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
if not codex_token:
|
||||
return None, None
|
||||
base_url = _CODEX_AUX_BASE_URL
|
||||
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
|
||||
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
|
||||
real_client = OpenAI(
|
||||
api_key=codex_token,
|
||||
base_url=base_url,
|
||||
default_headers=_codex_cloudflare_headers(codex_token),
|
||||
)
|
||||
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
|
||||
|
||||
def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
|
||||
@@ -1274,7 +1526,6 @@ _AUTO_PROVIDER_LABELS = {
|
||||
"_try_openrouter": "openrouter",
|
||||
"_try_nous": "nous",
|
||||
"_try_custom_endpoint": "local/custom",
|
||||
"_try_codex": "openai-codex",
|
||||
"_resolve_api_key_provider": "api-key",
|
||||
}
|
||||
|
||||
@@ -1301,12 +1552,18 @@ def _get_provider_chain() -> List[tuple]:
|
||||
|
||||
Built at call time (not module level) so that test patches
|
||||
on the ``_try_*`` functions are picked up correctly.
|
||||
|
||||
NOTE: ``openai-codex`` is deliberately NOT in this chain. The
|
||||
ChatGPT-account Codex endpoint only accepts a shifting, undocumented
|
||||
allow-list of model IDs, so falling back to it with a guessed model
|
||||
fails more often than not. Codex is used only when the user's main
|
||||
provider *is* openai-codex (see Step 1 of ``_resolve_auto``) or when
|
||||
a caller explicitly requests it with a model.
|
||||
"""
|
||||
return [
|
||||
("openrouter", _try_openrouter),
|
||||
("nous", _try_nous),
|
||||
("local/custom", _try_custom_endpoint),
|
||||
("openai-codex", _try_codex),
|
||||
("api-key", _resolve_api_key_provider),
|
||||
]
|
||||
|
||||
@@ -1745,8 +2002,20 @@ def resolve_provider_client(
|
||||
return True
|
||||
return False
|
||||
|
||||
def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
|
||||
"""Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
|
||||
def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "",
|
||||
api_key_str: str = ""):
|
||||
"""Wrap a plain OpenAI client in the correct transport adapter.
|
||||
|
||||
Handles two cases:
|
||||
- ``CodexAuxiliaryClient`` when the endpoint needs the Responses API
|
||||
(explicit ``api_mode=codex_responses`` or api.openai.com + codex
|
||||
model name).
|
||||
- ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic
|
||||
Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic``
|
||||
suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``).
|
||||
|
||||
Clients that are already specialized wrappers pass through unchanged.
|
||||
"""
|
||||
if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
|
||||
logger.debug(
|
||||
"resolve_provider_client: wrapping client in CodexAuxiliaryClient "
|
||||
@@ -1754,7 +2023,11 @@ def resolve_provider_client(
|
||||
api_mode or "auto-detected", final_model_str,
|
||||
base_url_str[:60] if base_url_str else "")
|
||||
return CodexAuxiliaryClient(client_obj, final_model_str)
|
||||
return client_obj
|
||||
# Anthropic-wire endpoints: rewrap plain OpenAI clients so
|
||||
# chat.completions.create() is translated to /v1/messages.
|
||||
return _maybe_wrap_anthropic(
|
||||
client_obj, final_model_str, api_key_str, base_url_str, api_mode,
|
||||
)
|
||||
|
||||
# ── Auto: try all providers in priority order ────────────────────
|
||||
if provider == "auto":
|
||||
@@ -1806,6 +2079,13 @@ def resolve_provider_client(
|
||||
|
||||
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
|
||||
if provider == "openai-codex":
|
||||
if not model:
|
||||
logger.warning(
|
||||
"resolve_provider_client: openai-codex requested without a "
|
||||
"model; pass model explicitly (e.g. model.model in config.yaml "
|
||||
"or auxiliary.<task>.model for per-task aux routing)."
|
||||
)
|
||||
return None, None
|
||||
if raw_codex:
|
||||
# Return the raw OpenAI client for callers that need direct
|
||||
# access to responses.stream() (e.g., the main agent loop).
|
||||
@@ -1814,7 +2094,7 @@ def resolve_provider_client(
|
||||
logger.warning("resolve_provider_client: openai-codex requested "
|
||||
"but no Codex OAuth token found (run: hermes model)")
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
|
||||
final_model = _normalize_resolved_model(model, provider)
|
||||
raw_client = OpenAI(
|
||||
api_key=codex_token,
|
||||
base_url=_CODEX_AUX_BASE_URL,
|
||||
@@ -1822,7 +2102,7 @@ def resolve_provider_client(
|
||||
)
|
||||
return (raw_client, final_model)
|
||||
# Standard path: wrap in CodexAuxiliaryClient adapter
|
||||
client, default = _try_codex()
|
||||
client, default = _build_codex_client(model)
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: openai-codex requested "
|
||||
"but no Codex OAuth token found (run: hermes model)")
|
||||
@@ -1834,7 +2114,7 @@ def resolve_provider_client(
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
if provider == "custom":
|
||||
if explicit_base_url:
|
||||
custom_base = explicit_base_url.strip()
|
||||
custom_base = _to_openai_base_url(explicit_base_url).strip()
|
||||
custom_key = (
|
||||
(explicit_api_key or "").strip()
|
||||
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||
@@ -1847,7 +2127,7 @@ def resolve_provider_client(
|
||||
)
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(
|
||||
model or _read_main_model() or "gpt-4o-mini",
|
||||
model or (main_runtime.get("model") if main_runtime else None) or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
@@ -1862,17 +2142,18 @@ def resolve_provider_client(
|
||||
is_agent_turn=True, is_vision=is_vision
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
client = _wrap_if_needed(client, final_model, custom_base, custom_key)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
# Try custom first, then codex, then API-key providers
|
||||
for try_fn in (_try_custom_endpoint, _try_codex,
|
||||
_resolve_api_key_provider):
|
||||
# Try custom first, then API-key providers (Codex excluded here:
|
||||
# falling through to Codex with no model is a stale-constant trap).
|
||||
for try_fn in (_try_custom_endpoint, _resolve_api_key_provider):
|
||||
client, default = try_fn()
|
||||
if client is not None:
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
_cbase = str(getattr(client, "base_url", "") or "")
|
||||
client = _wrap_if_needed(client, final_model, _cbase)
|
||||
_ckey = str(getattr(client, "api_key", "") or "")
|
||||
client = _wrap_if_needed(client, final_model, _cbase, _ckey)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning("resolve_provider_client: custom/main requested "
|
||||
@@ -1895,10 +2176,24 @@ def resolve_provider_client(
|
||||
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
|
||||
if custom_base:
|
||||
final_model = _normalize_resolved_model(
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
model
|
||||
or custom_entry.get("model")
|
||||
or (main_runtime.get("model") if main_runtime else None)
|
||||
or _read_main_model()
|
||||
or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
|
||||
# anthropic_messages talks to the /anthropic surface directly;
|
||||
# OpenAI-wire paths (chat_completions / codex_responses) need the
|
||||
# /v1 equivalent. Rewrite only on the OpenAI-wire path so the
|
||||
# Anthropic fallback SDK still sees the original URL.
|
||||
if entry_api_mode == "anthropic_messages":
|
||||
openai_base = custom_base
|
||||
raw_base_for_wrap = custom_base
|
||||
else:
|
||||
openai_base = _to_openai_base_url(custom_base)
|
||||
raw_base_for_wrap = custom_base
|
||||
_clean_base2, _dq2 = _extract_url_query_params(openai_base)
|
||||
_extra2 = {"default_query": _dq2} if _dq2 else {}
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
|
||||
@@ -1917,7 +2212,12 @@ def resolve_provider_client(
|
||||
"installed — falling back to OpenAI-wire.",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
# Fallback went OpenAI-wire after all — redo the query
|
||||
# extraction against the rewritten /v1 URL.
|
||||
_fallback_base = _to_openai_base_url(custom_base)
|
||||
_fb_clean, _fb_dq = _extract_url_query_params(_fallback_base)
|
||||
_fb_extra = {"default_query": _fb_dq} if _fb_dq else {}
|
||||
client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
@@ -1936,7 +2236,7 @@ def resolve_provider_client(
|
||||
):
|
||||
client = CodexAuxiliaryClient(client, final_model)
|
||||
else:
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
client = _wrap_if_needed(client, final_model, raw_base_for_wrap, custom_key)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning(
|
||||
@@ -1982,9 +2282,8 @@ def resolve_provider_client(
|
||||
provider, ", ".join(tried_sources))
|
||||
return None, None
|
||||
|
||||
base_url = _to_openai_base_url(
|
||||
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
||||
)
|
||||
raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
|
||||
base_url = _to_openai_base_url(raw_base_url)
|
||||
|
||||
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
@@ -2029,8 +2328,11 @@ def resolve_provider_client(
|
||||
|
||||
# Honor api_mode for any API-key provider (e.g. direct OpenAI with
|
||||
# codex-family models). The copilot-specific wrapping above handles
|
||||
# copilot; this covers the general case (#6800).
|
||||
client = _wrap_if_needed(client, final_model, base_url)
|
||||
# copilot; this covers the general case (#6800). Also rewraps
|
||||
# Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
|
||||
# /anthropic-suffixed gateways) so named providers like kimi-coding
|
||||
# land on the right transport without needing per-provider branches.
|
||||
client = _wrap_if_needed(client, final_model, raw_base_url, api_key)
|
||||
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
@@ -2038,7 +2340,12 @@ def resolve_provider_client(
|
||||
|
||||
if pconfig.auth_type == "external_process":
|
||||
creds = resolve_external_process_provider_credentials(provider)
|
||||
final_model = _normalize_resolved_model(model or _read_main_model(), provider)
|
||||
final_model = _normalize_resolved_model(
|
||||
model
|
||||
or (main_runtime.get("model") if main_runtime else None)
|
||||
or _read_main_model(),
|
||||
provider,
|
||||
)
|
||||
if provider == "copilot-acp":
|
||||
api_key = str(creds.get("api_key", "")).strip()
|
||||
base_url = str(creds.get("base_url", "")).strip()
|
||||
@@ -2188,7 +2495,10 @@ def _resolve_strict_vision_backend(
|
||||
if provider == "nous":
|
||||
return _try_nous(vision=True)
|
||||
if provider == "openai-codex":
|
||||
return _try_codex()
|
||||
# Route through resolve_provider_client so the caller's explicit
|
||||
# model is used. There is no safe default Codex model (shifting
|
||||
# allow-list); callers must specify via auxiliary.<task>.model.
|
||||
return resolve_provider_client("openai-codex", model, is_vision=True)
|
||||
if provider == "anthropic":
|
||||
return _try_anthropic()
|
||||
if provider == "custom":
|
||||
@@ -2293,6 +2603,19 @@ def resolve_vision_provider_client(
|
||||
main_provider, default_model or resolved_model or main_model,
|
||||
)
|
||||
return _finalize(main_provider, sync_client, default_model)
|
||||
elif main_provider in _PROVIDERS_WITHOUT_VISION:
|
||||
# Kimi Coding Plan's /coding endpoint (Anthropic Messages wire)
|
||||
# does not accept image input — Kimi's own docs say "Current
|
||||
# model does not support image input, switch to a model with
|
||||
# image_in capability" and vision lives on the separate Kimi
|
||||
# Platform (api.moonshot.ai). Skip the main provider and fall
|
||||
# through to the aggregator chain instead of returning a
|
||||
# client that will 404 on every vision request (#17076).
|
||||
logger.debug(
|
||||
"Vision auto-detect: skipping main provider %s (no "
|
||||
"vision support) — falling through to aggregator chain",
|
||||
main_provider,
|
||||
)
|
||||
else:
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
@@ -2774,7 +3097,7 @@ def _get_task_extra_body(task: str) -> Dict[str, Any]:
|
||||
|
||||
# Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
|
||||
# Their image content blocks must use Anthropic format, not OpenAI format.
|
||||
_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
|
||||
_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-oauth", "minimax-cn"})
|
||||
|
||||
|
||||
def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
|
||||
|
||||
@@ -291,14 +291,52 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
|
||||
def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
|
||||
"""Resolve the AWS region for Bedrock API calls.
|
||||
|
||||
Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
|
||||
Priority:
|
||||
1. AWS_REGION env var
|
||||
2. AWS_DEFAULT_REGION env var
|
||||
3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
|
||||
4. us-east-1 (hard fallback)
|
||||
|
||||
The boto3 fallback is critical for EU/AP users who configure their region
|
||||
in ~/.aws/config via a named profile rather than env vars — without it,
|
||||
live model discovery would always return us.* profile IDs regardless of
|
||||
the user's actual region.
|
||||
"""
|
||||
env = env if env is not None else os.environ
|
||||
return (
|
||||
explicit = (
|
||||
env.get("AWS_REGION", "").strip()
|
||||
or env.get("AWS_DEFAULT_REGION", "").strip()
|
||||
or "us-east-1"
|
||||
)
|
||||
if explicit:
|
||||
return explicit
|
||||
try:
|
||||
import botocore.session
|
||||
region = botocore.session.get_session().get_config_variable("region")
|
||||
if region:
|
||||
return region
|
||||
except Exception:
|
||||
pass
|
||||
return "us-east-1"
|
||||
|
||||
|
||||
def bedrock_model_ids_or_none() -> Optional[List[str]]:
|
||||
"""Live-discover Bedrock model IDs for the active region.
|
||||
|
||||
Returns a list of model ID strings if discovery succeeds and yields
|
||||
at least one model, or ``None`` on failure / empty result. Callers
|
||||
should fall back to the static curated list when ``None`` is returned.
|
||||
|
||||
This helper consolidates the discover → extract-ids → fallback
|
||||
pattern that was previously duplicated across ``provider_model_ids``,
|
||||
``list_authenticated_providers`` section 2, and section 3.
|
||||
"""
|
||||
try:
|
||||
discovered = discover_bedrock_models(resolve_bedrock_region())
|
||||
if discovered:
|
||||
return [m["id"] for m in discovered]
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -608,7 +608,7 @@ class CopilotACPClient:
|
||||
end = start + limit if isinstance(limit, int) and limit > 0 else None
|
||||
content = "".join(lines[start:end])
|
||||
if content:
|
||||
content = redact_sensitive_text(content)
|
||||
content = redact_sensitive_text(content, force=True)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
|
||||
+118
-1
@@ -7,7 +7,6 @@ import random
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, fields, replace
|
||||
from datetime import datetime
|
||||
@@ -456,6 +455,70 @@ class CredentialPool:
|
||||
logger.debug("Failed to sync from credentials file: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Codex device_code pool entry from auth.json if tokens differ.
|
||||
|
||||
When a Codex OAuth access token expires (or the ChatGPT account hits
|
||||
its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
|
||||
with a ``last_error_reset_at`` that can be many hours in the future.
|
||||
Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
|
||||
performs a fresh device-code login and writes new tokens to
|
||||
``auth.json`` under ``_auth_store_lock``. Without this sync the pool
|
||||
entry stays frozen until ``last_error_reset_at`` elapses — even
|
||||
though fresh credentials are sitting on disk — and every request
|
||||
fails with "no available entries (all exhausted or empty)".
|
||||
|
||||
Mirrors the Nous/Anthropic resync paths above. Only applies to
|
||||
device_code-sourced entries; env/API-key-sourced entries have no
|
||||
auth.json shadow to sync from.
|
||||
"""
|
||||
if self.provider != "openai-codex" or entry.source != "device_code":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
if not isinstance(state, dict):
|
||||
return entry
|
||||
tokens = state.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return entry
|
||||
store_access = tokens.get("access_token", "")
|
||||
store_refresh = tokens.get("refresh_token", "")
|
||||
# Adopt auth.json tokens when either side differs. Codex refresh
|
||||
# tokens are single-use too, so a fresh refresh_token from
|
||||
# another process means our entry's pair is consumed/stale.
|
||||
entry_access = entry.access_token or ""
|
||||
entry_refresh = entry.refresh_token or ""
|
||||
if store_access and (
|
||||
store_access != entry_access
|
||||
or (store_refresh and store_refresh != entry_refresh)
|
||||
):
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing Codex tokens from auth.json "
|
||||
"(refreshed by another process)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh or entry.refresh_token,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
"last_error_reason": None,
|
||||
"last_error_message": None,
|
||||
"last_error_reset_at": None,
|
||||
}
|
||||
if state.get("last_refresh"):
|
||||
field_updates["last_refresh"] = state["last_refresh"]
|
||||
updated = replace(entry, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Nous pool entry from auth.json if tokens differ.
|
||||
|
||||
@@ -788,6 +851,18 @@ class CredentialPool:
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For openai-codex entries, same pattern: the user may have
|
||||
# re-authed via `hermes model` / `hermes auth` after a 429/401,
|
||||
# leaving fresh tokens on disk while the pool entry is still
|
||||
# frozen behind last_error_reset_at (can be hours in the
|
||||
# future for ChatGPT weekly windows).
|
||||
if (self.provider == "openai-codex"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_codex_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
@@ -1224,6 +1299,48 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
except Exception as exc:
|
||||
logger.debug("Qwen OAuth token seed failed: %s", exc)
|
||||
|
||||
elif provider == "minimax-oauth":
|
||||
# MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
|
||||
# Seed the pool so `/auth list` reflects the logged-in state and the
|
||||
# standard `hermes auth remove minimax-oauth <N>` flow works.
|
||||
# Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
|
||||
# always refreshes on expiry, so instead read raw state here to avoid
|
||||
# surprise network calls during provider discovery.
|
||||
try:
|
||||
from hermes_cli.auth import get_provider_auth_state
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if state and state.get("access_token"):
|
||||
source_name = "oauth"
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
expires_at_ms = None
|
||||
try:
|
||||
from datetime import datetime as _dt
|
||||
raw = state.get("expires_at", "")
|
||||
if raw:
|
||||
expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
|
||||
except Exception:
|
||||
expires_at_ms = None
|
||||
base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"access_token": state["access_token"],
|
||||
"refresh_token": state.get("refresh_token"),
|
||||
"expires_at_ms": expires_at_ms,
|
||||
"base_url": base_url,
|
||||
"label": state.get("label", "") or label_from_token(
|
||||
state.get("access_token", ""), source_name
|
||||
),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("MiniMax OAuth token seed failed: %s", exc)
|
||||
|
||||
elif provider == "openai-codex":
|
||||
# Respect user suppression — `hermes auth remove openai-codex` marks
|
||||
# the device_code source as suppressed so it won't be re-seeded from
|
||||
|
||||
@@ -47,7 +47,6 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
|
||||
@@ -253,6 +252,19 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
|
||||
return result
|
||||
|
||||
|
||||
def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
|
||||
"""MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
|
||||
|
||||
Same pattern as Nous: single-source OAuth state with refresh tokens.
|
||||
Suppression of the `oauth` source ensures the pool reseed path
|
||||
(_seed_from_singletons) doesn't instantly undo the removal.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
|
||||
|
||||
@@ -390,6 +402,11 @@ def _register_all_sources() -> None:
|
||||
remove_fn=_remove_qwen_cli,
|
||||
description="~/.qwen/oauth_creds.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="minimax-oauth", source_id="oauth",
|
||||
remove_fn=_remove_minimax_oauth,
|
||||
description="auth.json providers.minimax-oauth",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="config:",
|
||||
match_fn=lambda src: src.startswith("config:") or src == "model_config",
|
||||
|
||||
+1315
File diff suppressed because it is too large
Load Diff
@@ -54,6 +54,7 @@ class FailoverReason(enum.Enum):
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
long_context_tier = "long_context_tier" # Anthropic "extra usage" tier gate
|
||||
oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden" # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry
|
||||
|
||||
# Catch-all
|
||||
unknown = "unknown" # Unclassifiable — retry with backoff
|
||||
@@ -91,6 +92,7 @@ class ClassifiedError:
|
||||
_BILLING_PATTERNS = [
|
||||
"insufficient credits",
|
||||
"insufficient_quota",
|
||||
"insufficient balance",
|
||||
"credit balance",
|
||||
"credits have been exhausted",
|
||||
"top up your credits",
|
||||
@@ -449,6 +451,25 @@ def classify_api_error(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejects the 1M-context beta header.
|
||||
# Observed error body: "The long context beta is not yet available for
|
||||
# this subscription." Returned as HTTP 400 from native Anthropic when
|
||||
# the subscription doesn't include 1M context, even though the request
|
||||
# carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
|
||||
# in run_agent.py rebuilds the Anthropic client with the beta stripped
|
||||
# and retries once. Pattern is narrow enough that it won't collide with
|
||||
# the 429 tier-gate pattern above (different status, different phrase).
|
||||
if (
|
||||
status_code == 400
|
||||
and "long context beta" in error_msg
|
||||
and "not yet available" in error_msg
|
||||
):
|
||||
return _result(
|
||||
FailoverReason.oauth_long_context_beta_forbidden,
|
||||
retryable=True,
|
||||
should_compress=False,
|
||||
)
|
||||
|
||||
# ── 2. HTTP status code classification ──────────────────────────
|
||||
|
||||
if status_code is not None:
|
||||
|
||||
@@ -30,7 +30,6 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
@@ -42,7 +41,6 @@ from agent import google_oauth
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
from agent.google_code_assist import (
|
||||
CODE_ASSIST_ENDPOINT,
|
||||
FREE_TIER_ID,
|
||||
CodeAssistError,
|
||||
ProjectContext,
|
||||
resolve_project_context,
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict
|
||||
|
||||
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
|
||||
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
|
||||
|
||||
@@ -29,7 +29,6 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
@@ -49,14 +49,13 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import socket
|
||||
import stat
|
||||
import threading
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
@@ -98,6 +97,7 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"
|
||||
|
||||
# Regex patterns for fallback scraping from an installed gemini-cli.
|
||||
import re as _re
|
||||
from utils import atomic_replace
|
||||
_CLIENT_ID_PATTERN = _re.compile(
|
||||
r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
|
||||
)
|
||||
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
|
||||
fh.flush()
|
||||
os.fsync(fh.fileno())
|
||||
os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
os.replace(tmp_path, path)
|
||||
atomic_replace(tmp_path, path)
|
||||
finally:
|
||||
try:
|
||||
if tmp_path.exists():
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
"""LM Studio reasoning-effort resolution shared by the chat-completions
|
||||
transport and run_agent's iteration-limit summary path.
|
||||
|
||||
LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
|
||||
``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
|
||||
graduated models). We map the user's ``reasoning_config`` onto LM Studio's
|
||||
OpenAI-compatible vocabulary, then clamp against the model's allowed set so
|
||||
the server doesn't 400 on an unsupported effort.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
# LM Studio accepts these top-level reasoning_effort values via its
|
||||
# OpenAI-compatible chat.completions endpoint.
|
||||
_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
|
||||
|
||||
# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
|
||||
# Map them onto the OpenAI-compatible request vocabulary.
|
||||
_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
|
||||
|
||||
|
||||
def resolve_lmstudio_effort(
|
||||
reasoning_config: Optional[dict],
|
||||
allowed_options: Optional[List[str]],
|
||||
) -> Optional[str]:
|
||||
"""Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
|
||||
|
||||
``None`` means "omit the field": the user picked a level the model can't
|
||||
honor, so let LM Studio fall back to the model's declared default rather
|
||||
than silently substituting a different effort. When ``allowed_options`` is
|
||||
falsy (probe failed), skip clamping and send the resolved effort anyway.
|
||||
"""
|
||||
effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
effort = "none"
|
||||
else:
|
||||
raw = (reasoning_config.get("effort") or "").strip().lower()
|
||||
raw = _LM_EFFORT_ALIASES.get(raw, raw)
|
||||
if raw in _LM_VALID_EFFORTS:
|
||||
effort = raw
|
||||
if allowed_options:
|
||||
allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
|
||||
if effort not in allowed:
|
||||
return None
|
||||
return effort
|
||||
+35
-1
@@ -28,7 +28,6 @@ Usage in run_agent.py:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
@@ -403,6 +402,41 @@ class MemoryManager:
|
||||
provider.name, e,
|
||||
)
|
||||
|
||||
def on_session_switch(
|
||||
self,
|
||||
new_session_id: str,
|
||||
*,
|
||||
parent_session_id: str = "",
|
||||
reset: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Notify all providers that the agent's session_id has rotated.
|
||||
|
||||
Fires on ``/resume``, ``/branch``, ``/reset``, ``/new``, and
|
||||
context compression — any path that reassigns
|
||||
``AIAgent.session_id`` without tearing the provider down.
|
||||
|
||||
Providers keep running; they only need to refresh cached
|
||||
per-session state so subsequent writes land in the correct
|
||||
session's record. See ``MemoryProvider.on_session_switch`` for
|
||||
the full contract.
|
||||
"""
|
||||
if not new_session_id:
|
||||
return
|
||||
for provider in self._providers:
|
||||
try:
|
||||
provider.on_session_switch(
|
||||
new_session_id,
|
||||
parent_session_id=parent_session_id,
|
||||
reset=reset,
|
||||
**kwargs,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_session_switch failed: %s",
|
||||
provider.name, e,
|
||||
)
|
||||
|
||||
def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
|
||||
"""Notify all providers before context compression.
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ Lifecycle (called by MemoryManager, wired in run_agent.py):
|
||||
Optional hooks (override to opt in):
|
||||
on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
|
||||
on_session_end(messages) — end-of-session extraction
|
||||
on_session_switch(new_session_id, **kwargs) — mid-process session_id rotation
|
||||
on_pre_compress(messages) -> str — extract before context compression
|
||||
on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
|
||||
on_delegation(task, result, **kwargs) — parent-side observation of subagent work
|
||||
@@ -160,6 +161,45 @@ class MemoryProvider(ABC):
|
||||
(CLI exit, /reset, gateway session expiry).
|
||||
"""
|
||||
|
||||
def on_session_switch(
|
||||
self,
|
||||
new_session_id: str,
|
||||
*,
|
||||
parent_session_id: str = "",
|
||||
reset: bool = False,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Called when the agent switches session_id mid-process.
|
||||
|
||||
Fires on ``/resume``, ``/branch``, ``/reset``, ``/new`` (CLI), the
|
||||
gateway equivalents, and context compression — any path that
|
||||
reassigns ``AIAgent.session_id`` without tearing the provider down.
|
||||
|
||||
Providers that cache per-session state in ``initialize()``
|
||||
(``_session_id``, ``_document_id``, accumulated turn buffers,
|
||||
counters) should update or reset that state here so subsequent
|
||||
writes land in the correct session's record.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
new_session_id:
|
||||
The session_id the agent just switched to.
|
||||
parent_session_id:
|
||||
The previous session_id, if meaningful — set for ``/branch``
|
||||
(fork lineage), context compression (continuation lineage),
|
||||
and ``/resume`` (the session we're leaving). Empty string
|
||||
when no lineage applies.
|
||||
reset:
|
||||
``True`` when this is a genuinely new conversation, not a
|
||||
resumption of an existing one. Fired by ``/reset`` / ``/new``.
|
||||
Providers should flush accumulated per-session buffers
|
||||
(``_session_turns``, ``_turn_counter``, etc.) when this is
|
||||
set. ``False`` for ``/resume`` / ``/branch`` / compression
|
||||
where the logical conversation continues under the new id.
|
||||
|
||||
Default is no-op for backward compatibility.
|
||||
"""
|
||||
|
||||
def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
|
||||
"""Called before context compression discards old messages.
|
||||
|
||||
|
||||
+17
-12
@@ -46,12 +46,13 @@ def _resolve_requests_verify() -> bool | str:
|
||||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"tencent-tokenhub",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
@@ -60,6 +61,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"ollama",
|
||||
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"tencent", "tokenhub", "tencent-cloud", "tencentmaas",
|
||||
"arcee-ai", "arceeai",
|
||||
"gmi-cloud", "gmicloud",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
@@ -208,6 +210,8 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
|
||||
# Kimi
|
||||
"kimi": 262144,
|
||||
# Tencent — Hy3 Preview (Hunyuan) with 256K context window
|
||||
"hy3-preview": 256000,
|
||||
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
|
||||
"nemotron": 131072,
|
||||
# Arcee
|
||||
@@ -310,6 +314,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"api.xiaomimimo.com": "xiaomi",
|
||||
"xiaomimimo.com": "xiaomi",
|
||||
"api.gmi-serving.com": "gmi",
|
||||
"tokenhub.tencentmaas.com": "tencent-tokenhub",
|
||||
"ollama.com": "ollama-cloud",
|
||||
}
|
||||
|
||||
@@ -620,8 +625,6 @@ def fetch_endpoint_model_metadata(
|
||||
if isinstance(ctx, int) and ctx > 0:
|
||||
context_length = ctx
|
||||
break
|
||||
if context_length is None:
|
||||
context_length = _extract_context_length(model)
|
||||
if context_length is not None:
|
||||
entry["context_length"] = context_length
|
||||
|
||||
@@ -1011,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
||||
ctx = cfg.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
# Fall back to max_context_length (theoretical model max)
|
||||
ctx = m.get("max_context_length") or m.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
break
|
||||
|
||||
# LM Studio / vLLM / llama.cpp: try /v1/models/{model}
|
||||
resp = client.get(f"{server_url}/v1/models/{model}")
|
||||
@@ -1247,7 +1247,7 @@ def get_model_context_length(
|
||||
6. Nous suffix-match via OpenRouter cache
|
||||
7. models.dev registry lookup (provider-aware)
|
||||
8. Thin hardcoded defaults (broad family patterns)
|
||||
9. Default fallback (128K)
|
||||
9. Default fallback (256K)
|
||||
"""
|
||||
# 0. Explicit config override — user knows best
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
@@ -1276,7 +1276,10 @@ def get_model_context_length(
|
||||
model = _strip_provider_prefix(model)
|
||||
|
||||
# 1. Check persistent cache (model+provider)
|
||||
if base_url:
|
||||
# LM Studio is excluded — its loaded context length is transient (the
|
||||
# user can reload the model with a different context_length at any time
|
||||
# via /api/v1/models/load), so a stale cached value would mask reloads.
|
||||
if base_url and provider != "lmstudio":
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
if cached is not None:
|
||||
# Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
|
||||
@@ -1329,7 +1332,8 @@ def get_model_context_length(
|
||||
if is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
if local_ctx and local_ctx > 0:
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
if provider != "lmstudio":
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
logger.info(
|
||||
"Could not detect context length for model %r at %s — "
|
||||
@@ -1419,10 +1423,11 @@ def get_model_context_length(
|
||||
if base_url and is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
if local_ctx and local_ctx > 0:
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
if provider != "lmstudio":
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
|
||||
# 10. Default fallback — 128K
|
||||
# 10. Default fallback — 256K
|
||||
return DEFAULT_FALLBACK_CONTEXT
|
||||
|
||||
|
||||
|
||||
@@ -149,6 +149,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"stepfun": "stepfun",
|
||||
"kimi-coding-cn": "kimi-for-coding",
|
||||
"minimax": "minimax",
|
||||
"minimax-oauth": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
"deepseek": "deepseek",
|
||||
"alibaba": "alibaba",
|
||||
|
||||
@@ -18,6 +18,7 @@ import os
|
||||
import tempfile
|
||||
import time
|
||||
from typing import Any, Mapping, Optional
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -118,7 +119,7 @@ def record_nous_rate_limit(
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(state, f)
|
||||
os.replace(tmp_path, path)
|
||||
atomic_replace(tmp_path, path)
|
||||
except Exception:
|
||||
# Clean up temp file on failure
|
||||
try:
|
||||
|
||||
+11
-9
@@ -98,17 +98,19 @@ def tool_progress_hint_cli() -> str:
|
||||
def openclaw_residue_hint_cli() -> str:
|
||||
"""Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
|
||||
|
||||
OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
|
||||
otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
|
||||
get carried forward and the agent dutifully reads them). ``hermes claw
|
||||
cleanup`` renames the directory so the agent stops finding it.
|
||||
Points users at ``hermes claw migrate`` (non-destructive port of config,
|
||||
memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
|
||||
follow-up step for users who have already migrated and want to archive
|
||||
the old directory — with a warning that archiving breaks OpenClaw.
|
||||
"""
|
||||
return (
|
||||
"Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
|
||||
"After migrating, the agent can still get confused and read that "
|
||||
"directory's config/memory instead of Hermes's.\n"
|
||||
"Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
|
||||
"This tip only shows once; rerun it any time with `hermes claw cleanup`."
|
||||
"A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
|
||||
"To port your config, memory, and skills over to Hermes, run "
|
||||
"`hermes claw migrate`.\n"
|
||||
"If you've already migrated and want to archive the old directory, "
|
||||
"run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
|
||||
"OpenClaw will stop working after this).\n"
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -310,6 +310,10 @@ PLATFORM_HINTS = {
|
||||
"Standard markdown is automatically converted to Telegram format. "
|
||||
"Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
|
||||
"`inline code`, ```code blocks```, [links](url), and ## headers. "
|
||||
"Telegram has NO table syntax — prefer bullet lists or labeled "
|
||||
"key: value pairs over pipe tables (any tables you do emit are "
|
||||
"auto-rewritten into row-group bullets, which you can produce "
|
||||
"directly for cleaner output). "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images "
|
||||
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
|
||||
|
||||
+62
-8
@@ -56,8 +56,12 @@ _SENSITIVE_BODY_KEYS = frozenset({
|
||||
})
|
||||
|
||||
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
|
||||
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
|
||||
# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
|
||||
# mid-session. OFF by default — user must opt in via
|
||||
# `security.redact_secrets: true` in config.yaml (bridged to this env var
|
||||
# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
|
||||
# in ~/.hermes/.env.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")
|
||||
|
||||
# Known API key prefixes -- match the prefix + contiguous token chars
|
||||
_PREFIX_PATTERNS = [
|
||||
@@ -180,11 +184,59 @@ _PREFIX_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
def mask_secret(
|
||||
value: str,
|
||||
*,
|
||||
head: int = 4,
|
||||
tail: int = 4,
|
||||
floor: int = 12,
|
||||
placeholder: str = "***",
|
||||
empty: str = "",
|
||||
) -> str:
|
||||
"""Mask a secret for display, preserving ``head`` and ``tail`` characters.
|
||||
|
||||
Canonical helper for display-time redaction across Hermes — used by
|
||||
``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
|
||||
a secret needs to be shown truncated for debuggability while still
|
||||
keeping the bulk hidden.
|
||||
|
||||
Args:
|
||||
value: The secret to mask. ``None``/empty returns ``empty``.
|
||||
head: Leading characters to preserve. Default 4.
|
||||
tail: Trailing characters to preserve. Default 4.
|
||||
floor: Values shorter than ``head + tail + floor_margin`` are
|
||||
fully masked (returns ``placeholder``). Default 12 —
|
||||
matches the existing config/status/dump convention.
|
||||
placeholder: Value returned for too-short inputs. Default ``"***"``.
|
||||
empty: Value returned when ``value`` is falsy (None, ""). The
|
||||
caller can override this to e.g. ``color("(not set)",
|
||||
Colors.DIM)`` for user-facing display.
|
||||
|
||||
Examples:
|
||||
>>> mask_secret("sk-proj-abcdef1234567890")
|
||||
'sk-p...7890'
|
||||
>>> mask_secret("short") # fully masked
|
||||
'***'
|
||||
>>> mask_secret("") # empty default
|
||||
''
|
||||
>>> mask_secret("", empty="(not set)") # empty override
|
||||
'(not set)'
|
||||
>>> mask_secret("long-token", head=6, tail=4, floor=18)
|
||||
'***'
|
||||
"""
|
||||
if not value:
|
||||
return empty
|
||||
if len(value) < floor:
|
||||
return placeholder
|
||||
return f"{value[:head]}...{value[-tail:]}"
|
||||
|
||||
|
||||
def _mask_token(token: str) -> str:
|
||||
"""Mask a token, preserving prefix for long tokens."""
|
||||
if len(token) < 18:
|
||||
"""Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
|
||||
# Empty input: historically this returned "***" rather than "". Preserve.
|
||||
if not token:
|
||||
return "***"
|
||||
return f"{token[:6]}...{token[-4:]}"
|
||||
return mask_secret(token, head=6, tail=4, floor=18)
|
||||
|
||||
|
||||
def _redact_query_string(query: str) -> str:
|
||||
@@ -253,11 +305,13 @@ def _redact_form_body(text: str) -> str:
|
||||
return _redact_query_string(text.strip())
|
||||
|
||||
|
||||
def redact_sensitive_text(text: str) -> str:
|
||||
def redact_sensitive_text(text: str, *, force: bool = False) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
Safe to call on any string -- non-matching text passes through unchanged.
|
||||
Disabled when security.redact_secrets is false in config.yaml.
|
||||
Disabled by default — enable via security.redact_secrets: true in config.yaml.
|
||||
Set force=True for safety boundaries that must never return raw secrets
|
||||
regardless of the user's global logging redaction preference.
|
||||
"""
|
||||
if text is None:
|
||||
return None
|
||||
@@ -265,7 +319,7 @@ def redact_sensitive_text(text: str) -> str:
|
||||
text = str(text)
|
||||
if not text:
|
||||
return text
|
||||
if not _REDACT_ENABLED:
|
||||
if not (force or _REDACT_ENABLED):
|
||||
return text
|
||||
|
||||
# Known prefixes (sk-, ghp_, etc.)
|
||||
|
||||
@@ -76,6 +76,7 @@ except ImportError: # pragma: no cover
|
||||
fcntl = None # type: ignore[assignment]
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -568,7 +569,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
|
||||
try:
|
||||
with os.fdopen(fd, "w") as fh:
|
||||
fh.write(json.dumps(data, indent=2, sort_keys=True))
|
||||
os.replace(tmp_path, p)
|
||||
atomic_replace(tmp_path, p)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
+82
-1
@@ -234,7 +234,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
|
||||
for scan_dir in dirs_to_scan:
|
||||
for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
|
||||
if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
|
||||
continue
|
||||
try:
|
||||
content = skill_md.read_text(encoding='utf-8')
|
||||
@@ -284,6 +284,71 @@ def get_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
return _skill_commands
|
||||
|
||||
|
||||
def reload_skills() -> Dict[str, Any]:
|
||||
"""Re-scan the skills directory and return a diff of what changed.
|
||||
|
||||
Rescans ``~/.hermes/skills/`` and any ``skills.external_dirs`` so the
|
||||
slash-command map (``agent.skill_commands._skill_commands``) reflects
|
||||
skills added or removed on disk.
|
||||
|
||||
This does NOT invalidate the skills system-prompt cache. Skills are
|
||||
called by name via ``/skill-name``, ``skills_list``, or ``skill_view``
|
||||
— they don't need to be in the system prompt for the model to use them.
|
||||
Keeping the prompt cache intact preserves prefix caching across the
|
||||
reload, so a user invoking ``/reload-skills`` pays no cache-reset cost.
|
||||
|
||||
Returns:
|
||||
Dict with keys::
|
||||
|
||||
{
|
||||
"added": [{"name": str, "description": str}, ...],
|
||||
"removed": [{"name": str, "description": str}, ...],
|
||||
"unchanged": [skill names present before and after],
|
||||
"total": total skill count after rescan,
|
||||
"commands": total /slash-skill count after rescan,
|
||||
}
|
||||
|
||||
``description`` is the skill's full SKILL.md frontmatter
|
||||
``description:`` field — the same string the system prompt renders
|
||||
as `` - name: description`` for pre-existing skills.
|
||||
"""
|
||||
# Snapshot pre-reload state (name -> description) from the current
|
||||
# slash-command cache. Using dicts lets the post-rescan diff carry
|
||||
# descriptions for newly-visible or just-removed skills without a
|
||||
# second disk walk.
|
||||
def _snapshot(cmds: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
|
||||
out: Dict[str, str] = {}
|
||||
for slash_key, info in cmds.items():
|
||||
bare = slash_key.lstrip("/")
|
||||
out[bare] = (info or {}).get("description") or ""
|
||||
return out
|
||||
|
||||
before = _snapshot(_skill_commands)
|
||||
|
||||
# Rescan the skills dir. ``scan_skill_commands`` resets
|
||||
# ``_skill_commands = {}`` internally and repopulates it.
|
||||
new_commands = scan_skill_commands()
|
||||
|
||||
after = _snapshot(new_commands)
|
||||
|
||||
added_names = sorted(set(after) - set(before))
|
||||
removed_names = sorted(set(before) - set(after))
|
||||
unchanged = sorted(set(after) & set(before))
|
||||
|
||||
added = [{"name": n, "description": after[n]} for n in added_names]
|
||||
# For removed skills, use the description we had cached pre-rescan
|
||||
# (the skill file is gone so we can't re-read it).
|
||||
removed = [{"name": n, "description": before[n]} for n in removed_names]
|
||||
|
||||
return {
|
||||
"added": added,
|
||||
"removed": removed,
|
||||
"unchanged": unchanged,
|
||||
"total": len(after),
|
||||
"commands": len(new_commands),
|
||||
}
|
||||
|
||||
|
||||
def resolve_skill_command_key(command: str) -> Optional[str]:
|
||||
"""Resolve a user-typed /command to its canonical skill_cmds key.
|
||||
|
||||
@@ -328,6 +393,14 @@ def build_skill_invocation_message(
|
||||
return f"[Failed to load skill: {skill_info['name']}]"
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
|
||||
# Track active usage for Curator lifecycle management (#17782)
|
||||
try:
|
||||
from tools.skill_usage import bump_use
|
||||
bump_use(skill_name)
|
||||
except Exception:
|
||||
pass # Non-critical — skill invocation proceeds regardless
|
||||
|
||||
activation_note = (
|
||||
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
"you to follow its instructions. The full skill content is loaded below.]"
|
||||
@@ -367,6 +440,14 @@ def build_preloaded_skills_prompt(
|
||||
continue
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
|
||||
# Track active usage for Curator lifecycle management (#17782)
|
||||
try:
|
||||
from tools.skill_usage import bump_use
|
||||
bump_use(skill_name)
|
||||
except Exception:
|
||||
pass # Non-critical
|
||||
|
||||
activation_note = (
|
||||
f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
|
||||
"preloaded. Treat its instructions as active guidance for the duration of this "
|
||||
|
||||
+11
-3
@@ -24,7 +24,7 @@ PLATFORM_MAP = {
|
||||
"windows": "win32",
|
||||
}
|
||||
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))
|
||||
EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
|
||||
|
||||
# ── Lazy YAML loader ─────────────────────────────────────────────────────
|
||||
|
||||
@@ -200,6 +200,9 @@ def get_external_skills_dirs() -> List[Path]:
|
||||
if not isinstance(raw_dirs, list):
|
||||
return []
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
hermes_home = get_hermes_home()
|
||||
local_skills = get_skills_dir().resolve()
|
||||
seen: Set[Path] = set()
|
||||
result: List[Path] = []
|
||||
@@ -210,7 +213,12 @@ def get_external_skills_dirs() -> List[Path]:
|
||||
continue
|
||||
# Expand ~ and environment variables
|
||||
expanded = os.path.expanduser(os.path.expandvars(entry))
|
||||
p = Path(expanded).resolve()
|
||||
p = Path(expanded)
|
||||
# Resolve relative paths against HERMES_HOME, not cwd
|
||||
if not p.is_absolute():
|
||||
p = (hermes_home / p).resolve()
|
||||
else:
|
||||
p = p.resolve()
|
||||
if p == local_skills:
|
||||
continue
|
||||
if p in seen:
|
||||
@@ -432,7 +440,7 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
|
||||
def iter_skill_index_files(skills_dir: Path, filename: str):
|
||||
"""Walk skills_dir yielding sorted paths matching *filename*.
|
||||
|
||||
Excludes ``.git``, ``.github``, ``.hub`` directories.
|
||||
Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
|
||||
"""
|
||||
matches = []
|
||||
for root, dirs, files in os.walk(skills_dir, followlinks=True):
|
||||
|
||||
@@ -30,10 +30,12 @@ def generate_title(
|
||||
assistant_response: str,
|
||||
timeout: float = 30.0,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> Optional[str]:
|
||||
"""Generate a session title from the first exchange.
|
||||
|
||||
Uses the auxiliary LLM client (cheapest/fastest available model).
|
||||
Uses the main runtime's model when available, falling back to the
|
||||
auxiliary LLM client (cheapest/fastest available model).
|
||||
Returns the title string or None on failure.
|
||||
|
||||
``failure_callback`` is invoked with ``(task, exception)`` when the
|
||||
@@ -57,6 +59,7 @@ def generate_title(
|
||||
max_tokens=500,
|
||||
temperature=0.3,
|
||||
timeout=timeout,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
title = (response.choices[0].message.content or "").strip()
|
||||
# Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
|
||||
@@ -86,6 +89,7 @@ def auto_title_session(
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> None:
|
||||
"""Generate and set a session title if one doesn't already exist.
|
||||
|
||||
@@ -107,7 +111,7 @@ def auto_title_session(
|
||||
return
|
||||
|
||||
title = generate_title(
|
||||
user_message, assistant_response, failure_callback=failure_callback
|
||||
user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
|
||||
)
|
||||
if not title:
|
||||
return
|
||||
@@ -126,6 +130,7 @@ def maybe_auto_title(
|
||||
assistant_response: str,
|
||||
conversation_history: list,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> None:
|
||||
"""Fire-and-forget title generation after the first exchange.
|
||||
|
||||
@@ -147,7 +152,7 @@ def maybe_auto_title(
|
||||
thread = threading.Thread(
|
||||
target=auto_title_session,
|
||||
args=(session_db, session_id, user_message, assistant_response),
|
||||
kwargs={"failure_callback": failure_callback},
|
||||
kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
|
||||
daemon=True,
|
||||
name="auto-title",
|
||||
)
|
||||
|
||||
@@ -58,6 +58,7 @@ class AnthropicTransport(ProviderTransport):
|
||||
context_length: int | None
|
||||
base_url: str | None
|
||||
fast_mode: bool
|
||||
drop_context_1m_beta: bool
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
@@ -73,6 +74,7 @@ class AnthropicTransport(ProviderTransport):
|
||||
context_length=params.get("context_length"),
|
||||
base_url=params.get("base_url"),
|
||||
fast_mode=params.get("fast_mode", False),
|
||||
drop_context_1m_beta=params.get("drop_context_1m_beta", False),
|
||||
)
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
|
||||
@@ -12,12 +12,93 @@ reasoning configuration, temperature handling, and extra_body assembly.
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.lmstudio_reasoning import resolve_lmstudio_effort
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
|
||||
"""Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
|
||||
if reasoning_config is None or not isinstance(reasoning_config, dict):
|
||||
return None
|
||||
|
||||
normalized_model = (model or "").strip().lower()
|
||||
if normalized_model.startswith("google/"):
|
||||
normalized_model = normalized_model.split("/", 1)[1]
|
||||
|
||||
# ``thinking_config`` is a Gemini-only request parameter. The same
|
||||
# ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
|
||||
# those reject the field with HTTP 400 "Unknown name 'thinking_config':
|
||||
# Cannot find field" — including the polite ``{"includeThoughts": False}``
|
||||
# form. Omit the field entirely on non-Gemini models. (#17426)
|
||||
if not normalized_model.startswith("gemini"):
|
||||
return None
|
||||
|
||||
if reasoning_config.get("enabled") is False:
|
||||
# Gemini can hide thought parts even when internal thinking still
|
||||
# happens; omit thinkingLevel to avoid model-specific validation quirks.
|
||||
return {"includeThoughts": False}
|
||||
|
||||
effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
|
||||
if effort == "none":
|
||||
return {"includeThoughts": False}
|
||||
|
||||
thinking_config: Dict[str, Any] = {"includeThoughts": True}
|
||||
|
||||
# Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
|
||||
# coarse effort levels. ``includeThoughts`` alone is enough to surface
|
||||
# thought parts without risking request validation errors.
|
||||
if normalized_model.startswith("gemini-2.5-"):
|
||||
return thinking_config
|
||||
|
||||
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
|
||||
effort = "medium"
|
||||
|
||||
# Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
|
||||
# is stricter (low/high). Clamp Hermes' wider effort set to what each
|
||||
# family accepts so we never forward an undocumented level verbatim.
|
||||
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
|
||||
if "flash" in normalized_model:
|
||||
if effort in {"minimal", "low"}:
|
||||
thinking_config["thinkingLevel"] = "low"
|
||||
elif effort in {"high", "xhigh"}:
|
||||
thinking_config["thinkingLevel"] = "high"
|
||||
else:
|
||||
thinking_config["thinkingLevel"] = "medium"
|
||||
elif "pro" in normalized_model:
|
||||
thinking_config["thinkingLevel"] = (
|
||||
"high" if effort in {"high", "xhigh"} else "low"
|
||||
)
|
||||
|
||||
return thinking_config
|
||||
|
||||
|
||||
def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
|
||||
"""Convert Gemini thinking config keys to the OpenAI-compat field names."""
|
||||
if not isinstance(config, dict) or not config:
|
||||
return None
|
||||
|
||||
translated: Dict[str, Any] = {}
|
||||
if isinstance(config.get("includeThoughts"), bool):
|
||||
translated["include_thoughts"] = config["includeThoughts"]
|
||||
if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
|
||||
translated["thinking_level"] = config["thinkingLevel"].strip().lower()
|
||||
if isinstance(config.get("thinkingBudget"), (int, float)):
|
||||
translated["thinking_budget"] = int(config["thinkingBudget"])
|
||||
return translated or None
|
||||
|
||||
|
||||
def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
|
||||
normalized = str(base_url or "").strip().rstrip("/").lower()
|
||||
if not normalized:
|
||||
return False
|
||||
if "generativelanguage.googleapis.com" not in normalized:
|
||||
return False
|
||||
return normalized.endswith("/openai")
|
||||
|
||||
|
||||
class ChatCompletionsTransport(ProviderTransport):
|
||||
"""Transport for api_mode='chat_completions'.
|
||||
|
||||
@@ -101,6 +182,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
is_github_models: bool
|
||||
is_nvidia_nim: bool
|
||||
is_kimi: bool
|
||||
is_lmstudio: bool
|
||||
is_custom_provider: bool
|
||||
ollama_num_ctx: int | None
|
||||
# Provider routing
|
||||
@@ -114,6 +196,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# Reasoning
|
||||
supports_reasoning: bool
|
||||
github_reasoning_extra: dict | None
|
||||
lmstudio_reasoning_options: list[str] | None # raw allowed_options from /api/v1/models
|
||||
# Claude on OpenRouter/Nous max output
|
||||
anthropic_max_output: int | None
|
||||
# Extra
|
||||
@@ -188,6 +271,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
anthropic_max_out = params.get("anthropic_max_output")
|
||||
is_nvidia_nim = params.get("is_nvidia_nim", False)
|
||||
is_kimi = params.get("is_kimi", False)
|
||||
is_tokenhub = params.get("is_tokenhub", False)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
@@ -219,12 +303,41 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
_kimi_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _kimi_effort
|
||||
|
||||
# Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
|
||||
if is_tokenhub:
|
||||
_tokenhub_thinking_off = bool(
|
||||
reasoning_config
|
||||
and isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _tokenhub_thinking_off:
|
||||
_tokenhub_effort = "high"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in ("low", "medium", "high"):
|
||||
_tokenhub_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _tokenhub_effort
|
||||
|
||||
# LM Studio: top-level reasoning_effort. Only emit when the model
|
||||
# declares reasoning support via /api/v1/models capabilities (gated
|
||||
# upstream by params["supports_reasoning"]). resolve_lmstudio_effort
|
||||
# is shared with run_agent's summary path so both stay in sync.
|
||||
if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
|
||||
_lm_effort = resolve_lmstudio_effort(
|
||||
reasoning_config,
|
||||
params.get("lmstudio_reasoning_options"),
|
||||
)
|
||||
if _lm_effort is not None:
|
||||
api_kwargs["reasoning_effort"] = _lm_effort
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: Dict[str, Any] = {}
|
||||
|
||||
is_openrouter = params.get("is_openrouter", False)
|
||||
is_nous = params.get("is_nous", False)
|
||||
is_github_models = params.get("is_github_models", False)
|
||||
provider_name = str(params.get("provider_name") or "").strip().lower()
|
||||
base_url = params.get("base_url")
|
||||
|
||||
provider_prefs = params.get("provider_preferences")
|
||||
if provider_prefs and is_openrouter:
|
||||
@@ -240,8 +353,9 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
# Reasoning
|
||||
if params.get("supports_reasoning", False):
|
||||
# Reasoning. LM Studio is handled above via top-level reasoning_effort,
|
||||
# so skip emitting extra_body.reasoning for it.
|
||||
if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
|
||||
if is_github_models:
|
||||
gh_reasoning = params.get("github_reasoning_extra")
|
||||
if gh_reasoning is not None:
|
||||
@@ -277,6 +391,23 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if is_qwen:
|
||||
extra_body["vl_high_resolution_images"] = True
|
||||
|
||||
if provider_name == "gemini":
|
||||
raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if _is_gemini_openai_compat_base_url(base_url):
|
||||
thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
|
||||
if thinking_config:
|
||||
openai_compat_extra = extra_body.get("extra_body", {})
|
||||
google_extra = openai_compat_extra.get("google", {})
|
||||
google_extra["thinking_config"] = thinking_config
|
||||
openai_compat_extra["google"] = google_extra
|
||||
extra_body["extra_body"] = openai_compat_extra
|
||||
elif raw_thinking_config:
|
||||
extra_body["thinking_config"] = raw_thinking_config
|
||||
elif provider_name == "google-gemini-cli":
|
||||
thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if thinking_config:
|
||||
extra_body["thinking_config"] = thinking_config
|
||||
|
||||
# Merge any pre-built extra_body additions
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
|
||||
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
|
||||
|
||||
class ResponsesApiTransport(ProviderTransport):
|
||||
@@ -151,8 +151,6 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
"""Normalize Codex Responses API response to NormalizedResponse."""
|
||||
from agent.codex_responses_adapter import (
|
||||
_normalize_codex_response,
|
||||
_extract_responses_message_text,
|
||||
_extract_responses_reasoning_text,
|
||||
)
|
||||
|
||||
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
|
||||
|
||||
@@ -359,6 +359,25 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
||||
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||
pricing_version="bedrock-pricing-2026-04",
|
||||
),
|
||||
# MiniMax
|
||||
(
|
||||
"minimax",
|
||||
"minimax-m2.7",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("0.30"),
|
||||
output_cost_per_million=Decimal("1.20"),
|
||||
source="official_docs_snapshot",
|
||||
pricing_version="minimax-pricing-2026-04",
|
||||
),
|
||||
(
|
||||
"minimax-cn",
|
||||
"minimax-m2.7",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("0.30"),
|
||||
output_cost_per_million=Decimal("1.20"),
|
||||
source="official_docs_snapshot",
|
||||
pricing_version="minimax-pricing-2026-04",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -400,6 +419,8 @@ def resolve_billing_route(
|
||||
return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name == "openai":
|
||||
return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name in {"minimax", "minimax-cn"}:
|
||||
return BillingRoute(provider=provider_name, model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
|
||||
if provider_name in {"custom", "local"} or (base and "localhost" in base):
|
||||
return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
|
||||
return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
|
||||
|
||||
+14
-8
@@ -30,14 +30,13 @@ model:
|
||||
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
|
||||
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||
# "lmstudio" - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
|
||||
#
|
||||
# Local servers (LM Studio, Ollama, vLLM, llama.cpp):
|
||||
# "custom" - Any OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# Example for LM Studio:
|
||||
# provider: "lmstudio"
|
||||
# base_url: "http://localhost:1234/v1"
|
||||
# No API key needed — local servers typically ignore auth.
|
||||
# "custom" - Any other OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# LM Studio is first-class and uses provider: "lmstudio".
|
||||
# It works with both no-auth and auth-enabled server modes.
|
||||
#
|
||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||
provider: "auto"
|
||||
@@ -181,6 +180,11 @@ terminal:
|
||||
# lifetime_seconds: 300
|
||||
# docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
# docker_mount_cwd_to_workspace: true # Explicit opt-in: mount your launch cwd into /workspace
|
||||
# # Optional: run the container as your host user's uid:gid so files written
|
||||
# # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
|
||||
# # caps too since no gosu privilege drop is needed. Leave off if your
|
||||
# # chosen docker_image expects to start as root.
|
||||
# docker_run_as_host_user: true
|
||||
# # Optional: explicitly forward selected env vars into Docker.
|
||||
# # These values come from your current shell first, then ~/.hermes/.env.
|
||||
# # Warning: anything forwarded here is visible to commands run in the container.
|
||||
@@ -566,7 +570,7 @@ agent:
|
||||
# - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
|
||||
# - A list of individual toolsets to compose your own (see list below)
|
||||
#
|
||||
# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
|
||||
# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot, teams
|
||||
#
|
||||
# Examples:
|
||||
#
|
||||
@@ -596,6 +600,7 @@ agent:
|
||||
# signal: hermes-signal (same as telegram)
|
||||
# homeassistant: hermes-homeassistant (same as telegram)
|
||||
# qqbot: hermes-qqbot (same as telegram)
|
||||
# teams: hermes-teams (same as telegram)
|
||||
#
|
||||
platform_toolsets:
|
||||
cli: [hermes-cli]
|
||||
@@ -607,6 +612,7 @@ platform_toolsets:
|
||||
homeassistant: [hermes-homeassistant]
|
||||
qqbot: [hermes-qqbot]
|
||||
yuanbao: [hermes-yuanbao]
|
||||
teams: [hermes-teams]
|
||||
|
||||
# =============================================================================
|
||||
# Gateway Platform Settings
|
||||
@@ -928,7 +934,7 @@ display:
|
||||
# agent_name: "My Agent" # Banner title and branding
|
||||
# welcome: "Welcome message" # Shown at CLI startup
|
||||
# response_label: " ⚔ Agent " # Response box header label
|
||||
# prompt_symbol: "⚔ ❯ " # Prompt symbol
|
||||
# prompt_symbol: "⚔" # Prompt symbol (bare token; renderers add trailing space)
|
||||
# tool_prefix: "╎" # Tool output line prefix (default: ┊)
|
||||
#
|
||||
skin: default
|
||||
|
||||
@@ -69,7 +69,9 @@ from agent.usage_pricing import (
|
||||
format_duration_compact,
|
||||
format_token_count_compact,
|
||||
)
|
||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
# NOTE: `from agent.account_usage import ...` is deliberately NOT at module
|
||||
# top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
|
||||
# needed when the user runs `/limits`. Lazy-imported inside the handler below.
|
||||
from hermes_cli.banner import _format_context_length, format_banner_version_label
|
||||
|
||||
_COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
|
||||
@@ -78,6 +80,11 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
|
||||
# Load .env from ~/.hermes/.env first, then project root as dev fallback.
|
||||
# User-managed env files should override stale shell exports on restart.
|
||||
from hermes_constants import get_hermes_home, display_hermes_home
|
||||
from hermes_cli.browser_connect import (
|
||||
DEFAULT_BROWSER_CDP_URL,
|
||||
manual_chrome_debug_command,
|
||||
try_launch_chrome_debug,
|
||||
)
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from utils import base_url_host_matches
|
||||
|
||||
@@ -238,65 +245,6 @@ def _parse_service_tier_config(raw: str) -> str | None:
|
||||
logger.warning("Unknown service_tier '%s', ignoring", raw)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def _get_chrome_debug_candidates(system: str) -> list[str]:
|
||||
"""Return likely browser executables for local CDP auto-launch."""
|
||||
candidates: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add_candidate(path: str | None) -> None:
|
||||
if not path:
|
||||
return
|
||||
normalized = os.path.normcase(os.path.normpath(path))
|
||||
if normalized in seen:
|
||||
return
|
||||
if os.path.isfile(path):
|
||||
candidates.append(path)
|
||||
seen.add(normalized)
|
||||
|
||||
def _add_from_path(*names: str) -> None:
|
||||
for name in names:
|
||||
_add_candidate(shutil.which(name))
|
||||
|
||||
if system == "Darwin":
|
||||
for app in (
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||
):
|
||||
_add_candidate(app)
|
||||
elif system == "Windows":
|
||||
_add_from_path(
|
||||
"chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
|
||||
"chrome", "msedge", "brave", "chromium",
|
||||
)
|
||||
|
||||
for base in (
|
||||
os.environ.get("ProgramFiles"),
|
||||
os.environ.get("ProgramFiles(x86)"),
|
||||
os.environ.get("LOCALAPPDATA"),
|
||||
):
|
||||
if not base:
|
||||
continue
|
||||
for parts in (
|
||||
("Google", "Chrome", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chromium.exe"),
|
||||
("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
|
||||
("Microsoft", "Edge", "Application", "msedge.exe"),
|
||||
):
|
||||
_add_candidate(os.path.join(base, *parts))
|
||||
else:
|
||||
_add_from_path(
|
||||
"google-chrome", "google-chrome-stable", "chromium-browser",
|
||||
"chromium", "brave-browser", "microsoft-edge",
|
||||
)
|
||||
|
||||
return candidates
|
||||
|
||||
|
||||
def load_cli_config() -> Dict[str, Any]:
|
||||
"""
|
||||
Load CLI configuration from config files.
|
||||
@@ -549,18 +497,20 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"singularity_image": "TERMINAL_SINGULARITY_IMAGE",
|
||||
"modal_image": "TERMINAL_MODAL_IMAGE",
|
||||
"daytona_image": "TERMINAL_DAYTONA_IMAGE",
|
||||
"vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
|
||||
# SSH config
|
||||
"ssh_host": "TERMINAL_SSH_HOST",
|
||||
"ssh_user": "TERMINAL_SSH_USER",
|
||||
"ssh_port": "TERMINAL_SSH_PORT",
|
||||
"ssh_key": "TERMINAL_SSH_KEY",
|
||||
# Container resource config (docker, singularity, modal, daytona -- ignored for local/ssh)
|
||||
# Container resource config (docker, singularity, modal, daytona, vercel_sandbox -- ignored for local/ssh)
|
||||
"container_cpu": "TERMINAL_CONTAINER_CPU",
|
||||
"container_memory": "TERMINAL_CONTAINER_MEMORY",
|
||||
"container_disk": "TERMINAL_CONTAINER_DISK",
|
||||
"container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
|
||||
"docker_volumes": "TERMINAL_DOCKER_VOLUMES",
|
||||
"docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
# Persistent shell (non-local backends)
|
||||
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||||
@@ -1590,9 +1540,29 @@ def _strip_leaked_bracketed_paste_wrappers(text: str) -> str:
|
||||
# that appears when the ESC byte was stripped by a prior filter.
|
||||
_DSR_CPR_ESC_RE = re.compile(r"\x1b\[\d+;\d+R")
|
||||
_DSR_CPR_VISIBLE_RE = re.compile(r"\^\[\[\d+;\d+R")
|
||||
_SGR_MOUSE_ESC_RE = re.compile(r"\x1b\[<\d+;\d+;\d+[Mm]")
|
||||
_SGR_MOUSE_VISIBLE_RE = re.compile(r"\^\[\[<\d+;\d+;\d+[Mm]")
|
||||
# Some terminals/filters can drop ESC and literal "^[[", leaving only
|
||||
# "<btn;col;rowM" fragments in the buffer. Keep this broad on purpose:
|
||||
# these fragments are extremely unlikely to be intentional user input, and
|
||||
# stripping them is better than sending corrupted prompts.
|
||||
_SGR_MOUSE_BARE_RE = re.compile(r"<\d+;\d+;\d+[Mm]")
|
||||
_TERMINAL_INPUT_MODE_RESET_SEQ = (
|
||||
"\x1b[?1006l" # disable SGR mouse
|
||||
"\x1b[?1003l" # disable any-motion tracking
|
||||
"\x1b[?1002l" # disable button-motion tracking
|
||||
"\x1b[?1000l" # disable click tracking
|
||||
"\x1b[?1004l" # disable focus events
|
||||
"\x1b[?2004l" # disable bracketed paste
|
||||
"\x1b[?1049l" # leave alt screen (if stuck there)
|
||||
"\x1b[<u" # pop kitty keyboard mode
|
||||
"\x1b[>4m" # reset modifyOtherKeys
|
||||
"\x1b[0m" # reset text attributes
|
||||
"\x1b[?25h" # ensure cursor visible
|
||||
)
|
||||
|
||||
|
||||
def _strip_leaked_terminal_responses(text: str) -> str:
|
||||
def _strip_leaked_terminal_responses_with_meta(text: str) -> tuple[str, bool]:
|
||||
"""Strip leaked terminal control-response sequences from user input.
|
||||
|
||||
Covers Cursor Position Report (CPR / DSR) responses — ``ESC[<row>;<col>R``
|
||||
@@ -1602,12 +1572,43 @@ def _strip_leaked_terminal_responses(text: str) -> str:
|
||||
(resize storms, multiplexer focus changes, slow PTYs) the response
|
||||
lands in the input buffer as literal text and corrupts what the user
|
||||
typed.
|
||||
|
||||
Also strips leaked SGR mouse-report fragments (``ESC[<...M/m`` and
|
||||
degraded visible forms). Returns ``(cleaned_text, had_mouse_reports)``
|
||||
so callers can trigger an in-place terminal mode recovery when needed.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
text = _DSR_CPR_ESC_RE.sub("", text)
|
||||
text = _DSR_CPR_VISIBLE_RE.sub("", text)
|
||||
return text
|
||||
return text, False
|
||||
|
||||
has_esc = "\x1b[" in text
|
||||
has_visible = "^[" in text
|
||||
has_bare_mouse = "<" in text and ";" in text and ("M" in text or "m" in text)
|
||||
if not (has_esc or has_visible or has_bare_mouse):
|
||||
return text, False
|
||||
|
||||
had_mouse_reports = False
|
||||
|
||||
if has_esc:
|
||||
text = _DSR_CPR_ESC_RE.sub("", text)
|
||||
text, count = _SGR_MOUSE_ESC_RE.subn("", text)
|
||||
had_mouse_reports = had_mouse_reports or count > 0
|
||||
|
||||
if has_visible:
|
||||
text = _DSR_CPR_VISIBLE_RE.sub("", text)
|
||||
text, count = _SGR_MOUSE_VISIBLE_RE.subn("", text)
|
||||
had_mouse_reports = had_mouse_reports or count > 0
|
||||
|
||||
if has_bare_mouse:
|
||||
text, count = _SGR_MOUSE_BARE_RE.subn("", text)
|
||||
had_mouse_reports = had_mouse_reports or count > 0
|
||||
|
||||
return text, had_mouse_reports
|
||||
|
||||
|
||||
def _strip_leaked_terminal_responses(text: str) -> str:
|
||||
"""Compatibility wrapper returning only cleaned text."""
|
||||
cleaned, _ = _strip_leaked_terminal_responses_with_meta(text)
|
||||
return cleaned
|
||||
|
||||
|
||||
def _collect_query_images(query: str | None, image_arg: str | None = None) -> tuple[str, list[Path]]:
|
||||
@@ -1981,6 +1982,8 @@ class HermesCLI:
|
||||
self._stream_box_opened = False # True once the response box header is printed
|
||||
self._reasoning_preview_buf = "" # Coalesce tiny reasoning chunks for [thinking] output
|
||||
self._pending_edit_snapshots = {}
|
||||
self._last_input_mode_recovery = 0.0
|
||||
self._input_mode_recovery_notice_shown = False
|
||||
|
||||
# Configuration - priority: CLI args > env vars > config file
|
||||
# Model comes from: CLI arg or config.yaml (single source of truth).
|
||||
@@ -3157,6 +3160,8 @@ class HermesCLI:
|
||||
return "Processing skills command..."
|
||||
if cmd_lower == "/reload-mcp":
|
||||
return "Reloading MCP servers..."
|
||||
if cmd_lower == "/reload-skills" or cmd_lower == "/reload_skills":
|
||||
return "Reloading skills..."
|
||||
if cmd_lower.startswith("/browser"):
|
||||
return "Configuring browser..."
|
||||
return "Processing command..."
|
||||
@@ -4170,6 +4175,37 @@ class HermesCLI:
|
||||
sys.stdout.write(seq)
|
||||
sys.stdout.flush()
|
||||
|
||||
def _recover_terminal_input_modes(self, *, reason: str) -> None:
|
||||
"""Best-effort reset when leaked mouse reports indicate mode drift."""
|
||||
now = time.monotonic()
|
||||
# Rate-limit to avoid thrashing if a terminal floods reports.
|
||||
if now - self._last_input_mode_recovery < 0.5:
|
||||
return
|
||||
self._last_input_mode_recovery = now
|
||||
|
||||
out = getattr(self, "_app", None)
|
||||
output = getattr(out, "output", None) if out else None
|
||||
try:
|
||||
if output and hasattr(output, "write_raw"):
|
||||
output.write_raw(_TERMINAL_INPUT_MODE_RESET_SEQ)
|
||||
output.flush()
|
||||
elif output and hasattr(output, "write"):
|
||||
output.write(_TERMINAL_INPUT_MODE_RESET_SEQ)
|
||||
output.flush()
|
||||
else:
|
||||
sys.stdout.write(_TERMINAL_INPUT_MODE_RESET_SEQ)
|
||||
sys.stdout.flush()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
logger.warning("Recovered terminal input modes after leak: %s", reason)
|
||||
if not self._input_mode_recovery_notice_shown:
|
||||
self._input_mode_recovery_notice_shown = True
|
||||
_cprint(
|
||||
f" {_DIM}Recovered terminal input modes after leaked mouse reports. "
|
||||
f"If this repeats, run /new or restart this tab.{_RST}"
|
||||
)
|
||||
|
||||
def _handle_copy_command(self, cmd_original: str) -> None:
|
||||
"""Handle /copy [number] — copy assistant output to clipboard."""
|
||||
parts = cmd_original.split(maxsplit=1)
|
||||
@@ -4860,6 +4896,22 @@ class HermesCLI:
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
# Notify memory providers that session_id rotated to a fresh
|
||||
# conversation. reset=True signals providers to flush accumulated
|
||||
# per-session state (_session_turns, _turn_counter, _document_id).
|
||||
# Fires BEFORE the plugin on_session_reset hook (shell hooks only
|
||||
# see the new id; Python providers see the transition). See #6672.
|
||||
try:
|
||||
_mm = getattr(self.agent, "_memory_manager", None)
|
||||
if _mm is not None:
|
||||
_mm.on_session_switch(
|
||||
self.session_id,
|
||||
parent_session_id=old_session_id or "",
|
||||
reset=True,
|
||||
reason="new_session",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
self._notify_session_boundary("on_session_reset")
|
||||
|
||||
if not silent:
|
||||
@@ -4912,6 +4964,7 @@ class HermesCLI:
|
||||
_cprint(" Already on that session.")
|
||||
return
|
||||
|
||||
old_session_id = self.session_id
|
||||
# End current session
|
||||
try:
|
||||
self._session_db.end_session(self.session_id, "resumed_other")
|
||||
@@ -4949,6 +5002,22 @@ class HermesCLI:
|
||||
if hasattr(self.agent, "_invalidate_system_prompt"):
|
||||
self.agent._invalidate_system_prompt()
|
||||
|
||||
# Notify memory providers that session_id rotated to a resumed
|
||||
# session. reset=False — the provider's accumulated state is
|
||||
# still valid; it just needs to target the new session_id for
|
||||
# subsequent writes. See #6672.
|
||||
try:
|
||||
_mm = getattr(self.agent, "_memory_manager", None)
|
||||
if _mm is not None:
|
||||
_mm.on_session_switch(
|
||||
target_id,
|
||||
parent_session_id=old_session_id or "",
|
||||
reset=False,
|
||||
reason="resume",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
title_part = f" \"{session_meta['title']}\"" if session_meta.get("title") else ""
|
||||
msg_count = len([m for m in self.conversation_history if m.get("role") == "user"])
|
||||
if self.conversation_history:
|
||||
@@ -5069,6 +5138,22 @@ class HermesCLI:
|
||||
if hasattr(self.agent, "_invalidate_system_prompt"):
|
||||
self.agent._invalidate_system_prompt()
|
||||
|
||||
# Notify memory providers that session_id forked to a new branch.
|
||||
# reset=False — the branched session carries the transcript
|
||||
# forward, so provider state tracks the lineage. parent_session_id
|
||||
# links the branch back to the original. See #6672.
|
||||
try:
|
||||
_mm = getattr(self.agent, "_memory_manager", None)
|
||||
if _mm is not None:
|
||||
_mm.on_session_switch(
|
||||
new_session_id,
|
||||
parent_session_id=parent_session_id or "",
|
||||
reset=False,
|
||||
reason="branch",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
msg_count = len([m for m in self.conversation_history if m.get("role") == "user"])
|
||||
_cprint(
|
||||
f" ⑂ Branched session \"{branch_title}\""
|
||||
@@ -5327,6 +5412,7 @@ class HermesCLI:
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
config_context_length=getattr(self.agent, "_config_context_length", None) if self.agent else None,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
@@ -5457,6 +5543,8 @@ class HermesCLI:
|
||||
try:
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=self.provider or "",
|
||||
current_base_url=self.base_url or "",
|
||||
current_model=self.model or "",
|
||||
user_providers=user_provs,
|
||||
custom_providers=custom_provs,
|
||||
max_models=50,
|
||||
@@ -5551,6 +5639,7 @@ class HermesCLI:
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
config_context_length=getattr(self.agent, "_config_context_length", None) if self.agent else None,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
@@ -5975,7 +6064,29 @@ class HermesCLI:
|
||||
|
||||
print(f"(._.) Unknown cron command: {subcommand}")
|
||||
print(" Available: list, add, edit, pause, resume, run, remove")
|
||||
|
||||
|
||||
def _handle_curator_command(self, cmd: str):
|
||||
"""Handle /curator slash command.
|
||||
|
||||
Delegates to hermes_cli.curator so the CLI and the `hermes curator`
|
||||
subcommand share the same handler set.
|
||||
"""
|
||||
import shlex
|
||||
|
||||
tokens = shlex.split(cmd)[1:] if cmd else []
|
||||
if not tokens:
|
||||
tokens = ["status"]
|
||||
|
||||
try:
|
||||
from hermes_cli.curator import cli_main
|
||||
cli_main(tokens)
|
||||
except SystemExit:
|
||||
# argparse calls sys.exit() on --help or errors; swallow so we
|
||||
# don't kill the interactive session.
|
||||
pass
|
||||
except Exception as exc:
|
||||
print(f"(._.) curator: {exc}")
|
||||
|
||||
def _handle_skills_command(self, cmd: str):
|
||||
"""Handle /skills slash command — delegates to hermes_cli.skills_hub."""
|
||||
from hermes_cli.skills_hub import handle_skills_slash
|
||||
@@ -6219,6 +6330,8 @@ class HermesCLI:
|
||||
self.save_conversation()
|
||||
elif canonical == "cron":
|
||||
self._handle_cron_command(cmd_original)
|
||||
elif canonical == "curator":
|
||||
self._handle_curator_command(cmd_original)
|
||||
elif canonical == "skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._handle_skills_command(cmd_original)
|
||||
@@ -6232,6 +6345,8 @@ class HermesCLI:
|
||||
self._console_print(f" Status bar {state}")
|
||||
elif canonical == "verbose":
|
||||
self._toggle_verbose()
|
||||
elif canonical == "footer":
|
||||
self._handle_footer_command(cmd_original)
|
||||
elif canonical == "yolo":
|
||||
self._toggle_yolo()
|
||||
elif canonical == "reasoning":
|
||||
@@ -6257,8 +6372,13 @@ class HermesCLI:
|
||||
count = reload_env()
|
||||
print(f" Reloaded .env ({count} var(s) updated)")
|
||||
elif canonical == "reload-mcp":
|
||||
# Interactive reload: confirm first (unless the user has opted out).
|
||||
# The auto-reload path (file watcher) calls _reload_mcp directly
|
||||
# without this confirmation.
|
||||
self._confirm_and_reload_mcp(cmd_original)
|
||||
elif canonical == "reload-skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._reload_mcp()
|
||||
self._reload_skills()
|
||||
elif canonical == "browser":
|
||||
self._handle_browser_command(cmd_original)
|
||||
elif canonical == "plugins":
|
||||
@@ -6600,34 +6720,7 @@ class HermesCLI:
|
||||
|
||||
Returns True if a launch command was executed (doesn't guarantee success).
|
||||
"""
|
||||
import subprocess as _sp
|
||||
|
||||
candidates = _get_chrome_debug_candidates(system)
|
||||
|
||||
if not candidates:
|
||||
return False
|
||||
|
||||
# Dedicated profile dir so debug Chrome won't collide with normal Chrome
|
||||
data_dir = str(_hermes_home / "chrome-debug")
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
chrome = candidates[0]
|
||||
try:
|
||||
_sp.Popen(
|
||||
[
|
||||
chrome,
|
||||
f"--remote-debugging-port={port}",
|
||||
f"--user-data-dir={data_dir}",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
],
|
||||
stdout=_sp.DEVNULL,
|
||||
stderr=_sp.DEVNULL,
|
||||
start_new_session=True, # detach from terminal
|
||||
)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
return try_launch_chrome_debug(port, system)
|
||||
|
||||
def _handle_browser_command(self, cmd: str):
|
||||
"""Handle /browser connect|disconnect|status — manage live Chrome CDP connection."""
|
||||
@@ -6636,13 +6729,44 @@ class HermesCLI:
|
||||
parts = cmd.strip().split(None, 1)
|
||||
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
|
||||
|
||||
_DEFAULT_CDP = "http://127.0.0.1:9222"
|
||||
_DEFAULT_CDP = DEFAULT_BROWSER_CDP_URL
|
||||
current = os.environ.get("BROWSER_CDP_URL", "").strip()
|
||||
|
||||
if sub.startswith("connect"):
|
||||
# Optionally accept a custom CDP URL: /browser connect ws://host:port
|
||||
connect_parts = cmd.strip().split(None, 2) # ["/browser", "connect", "ws://..."]
|
||||
cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP
|
||||
parsed_cdp = urlparse(cdp_url if "://" in cdp_url else f"http://{cdp_url}")
|
||||
if parsed_cdp.scheme not in {"http", "https", "ws", "wss"}:
|
||||
print()
|
||||
print(
|
||||
f" ⚠ Unsupported browser url scheme: {parsed_cdp.scheme or '(missing)'} "
|
||||
"(expected one of: http, https, ws, wss)"
|
||||
)
|
||||
print()
|
||||
return
|
||||
try:
|
||||
_port = parsed_cdp.port or (443 if parsed_cdp.scheme in {"https", "wss"} else 80)
|
||||
except ValueError:
|
||||
print()
|
||||
print(f" ⚠ Invalid port in browser url: {cdp_url}")
|
||||
print()
|
||||
return
|
||||
if not parsed_cdp.hostname:
|
||||
print()
|
||||
print(f" ⚠ Missing host in browser url: {cdp_url}")
|
||||
print()
|
||||
return
|
||||
_host = parsed_cdp.hostname
|
||||
if parsed_cdp.path.startswith("/devtools/browser/"):
|
||||
cdp_url = parsed_cdp.geturl()
|
||||
else:
|
||||
cdp_url = parsed_cdp._replace(
|
||||
path="",
|
||||
params="",
|
||||
query="",
|
||||
fragment="",
|
||||
).geturl()
|
||||
|
||||
# Clear any existing browser sessions so the next tool call uses the new backend
|
||||
try:
|
||||
@@ -6653,20 +6777,13 @@ class HermesCLI:
|
||||
|
||||
print()
|
||||
|
||||
# Extract port for connectivity checks
|
||||
_port = 9222
|
||||
try:
|
||||
_port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
# Check if Chrome is already listening on the debug port
|
||||
import socket
|
||||
_already_open = False
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.settimeout(1)
|
||||
s.connect(("127.0.0.1", _port))
|
||||
s.connect((_host, _port))
|
||||
s.close()
|
||||
_already_open = True
|
||||
except (OSError, socket.timeout):
|
||||
@@ -6684,7 +6801,7 @@ class HermesCLI:
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.settimeout(1)
|
||||
s.connect(("127.0.0.1", _port))
|
||||
s.connect((_host, _port))
|
||||
s.close()
|
||||
_already_open = True
|
||||
break
|
||||
@@ -6697,33 +6814,22 @@ class HermesCLI:
|
||||
print(" Try again in a few seconds — the debug instance may still be starting")
|
||||
else:
|
||||
print(" ⚠ Could not auto-launch Chrome")
|
||||
# Show manual instructions as fallback
|
||||
_data_dir = str(_hermes_home / "chrome-debug")
|
||||
sys_name = _plat.system()
|
||||
if sys_name == "Darwin":
|
||||
chrome_cmd = (
|
||||
'open -a "Google Chrome" --args'
|
||||
f" --remote-debugging-port=9222"
|
||||
f' --user-data-dir="{_data_dir}"'
|
||||
" --no-first-run --no-default-browser-check"
|
||||
)
|
||||
elif sys_name == "Windows":
|
||||
chrome_cmd = (
|
||||
f'chrome.exe --remote-debugging-port=9222'
|
||||
f' --user-data-dir="{_data_dir}"'
|
||||
f" --no-first-run --no-default-browser-check"
|
||||
)
|
||||
chrome_cmd = manual_chrome_debug_command(_port, sys_name)
|
||||
if chrome_cmd:
|
||||
print(f" Launch Chrome manually:")
|
||||
print(f" {chrome_cmd}")
|
||||
else:
|
||||
chrome_cmd = (
|
||||
f"google-chrome --remote-debugging-port=9222"
|
||||
f' --user-data-dir="{_data_dir}"'
|
||||
f" --no-first-run --no-default-browser-check"
|
||||
)
|
||||
print(f" Launch Chrome manually:")
|
||||
print(f" {chrome_cmd}")
|
||||
print(" No Chrome/Chromium executable found in this environment")
|
||||
else:
|
||||
print(f" ⚠ Port {_port} is not reachable at {cdp_url}")
|
||||
|
||||
if not _already_open:
|
||||
print()
|
||||
print("Browser not connected — start Chrome with remote debugging and retry /browser connect")
|
||||
print()
|
||||
return
|
||||
|
||||
os.environ["BROWSER_CDP_URL"] = cdp_url
|
||||
# Eagerly start the CDP supervisor so pending_dialogs + frame_tree
|
||||
# show up in the next browser_snapshot. No-op if already started.
|
||||
@@ -6859,6 +6965,58 @@ class HermesCLI:
|
||||
if self._apply_tui_skin_style():
|
||||
print(" Prompt + TUI colors updated.")
|
||||
|
||||
def _handle_footer_command(self, cmd_original: str) -> None:
|
||||
"""Toggle or inspect ``display.runtime_footer.enabled`` from the CLI.
|
||||
|
||||
Usage:
|
||||
/footer → toggle
|
||||
/footer on|off → explicit
|
||||
/footer status → show current state
|
||||
"""
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.colors import Colors as _Colors
|
||||
|
||||
# Parse arg
|
||||
arg = ""
|
||||
try:
|
||||
parts = (cmd_original or "").strip().split(None, 1)
|
||||
if len(parts) > 1:
|
||||
arg = parts[1].strip().lower()
|
||||
except Exception:
|
||||
arg = ""
|
||||
|
||||
cfg = load_config() or {}
|
||||
footer_cfg = ((cfg.get("display") or {}).get("runtime_footer") or {})
|
||||
current = bool(footer_cfg.get("enabled", False))
|
||||
fields = footer_cfg.get("fields") or ["model", "context_pct", "cwd"]
|
||||
|
||||
if arg in ("status", "?"):
|
||||
state = "ON" if current else "OFF"
|
||||
_cprint(
|
||||
f" {_Colors.BOLD}Runtime footer:{_Colors.RESET} {state}\n"
|
||||
f" Fields: {', '.join(fields)}"
|
||||
)
|
||||
return
|
||||
|
||||
if arg in ("on", "enable", "true", "1"):
|
||||
new_state = True
|
||||
elif arg in ("off", "disable", "false", "0"):
|
||||
new_state = False
|
||||
elif arg == "":
|
||||
new_state = not current
|
||||
else:
|
||||
_cprint(" Usage: /footer [on|off|status]")
|
||||
return
|
||||
|
||||
if save_config_value("display.runtime_footer.enabled", new_state):
|
||||
state = (
|
||||
f"{_Colors.GREEN}ON{_Colors.RESET}" if new_state
|
||||
else f"{_Colors.DIM}OFF{_Colors.RESET}"
|
||||
)
|
||||
_cprint(f" Runtime footer: {state}")
|
||||
else:
|
||||
_cprint(" Failed to save runtime_footer setting to config.yaml")
|
||||
|
||||
def _toggle_verbose(self):
|
||||
"""Cycle tool progress mode: off → new → all → verbose → off."""
|
||||
cycle = ["off", "new", "all", "verbose"]
|
||||
@@ -7099,9 +7257,15 @@ class HermesCLI:
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
|
||||
# Pass None as system_message so _compress_context rebuilds
|
||||
# the system prompt from scratch via _build_system_prompt(None).
|
||||
# Passing _cached_system_prompt caused duplication because
|
||||
# _build_system_prompt appends system_message to prompt_parts
|
||||
# which already contain the agent identity — resulting in the
|
||||
# identity block appearing twice (issue #15281).
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
None,
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
@@ -7225,6 +7389,8 @@ class HermesCLI:
|
||||
provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
|
||||
base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
|
||||
api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
|
||||
# Lazy import — pulls the OpenAI SDK chain, only needed here.
|
||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
account_snapshot = None
|
||||
if provider:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
|
||||
@@ -7339,6 +7505,77 @@ class HermesCLI:
|
||||
if _reload_thread.is_alive():
|
||||
print(" ⚠️ MCP reload timed out (30s). Some servers may not have reconnected.")
|
||||
|
||||
def _confirm_and_reload_mcp(self, cmd_original: str = "") -> None:
|
||||
"""Interactive /reload-mcp — confirm with the user, then reload.
|
||||
|
||||
Reloading MCP tools invalidates the provider prompt cache for the
|
||||
active session (tool schemas are baked into the system prompt).
|
||||
The next message re-sends full input tokens — can be expensive on
|
||||
long-context or high-reasoning models.
|
||||
|
||||
Three options: Approve Once, Always Approve (persists
|
||||
``approvals.mcp_reload_confirm: false`` so future reloads run
|
||||
without this prompt), Cancel. Gated by
|
||||
``approvals.mcp_reload_confirm`` — default on.
|
||||
"""
|
||||
# Gate check — respects prior "Always Approve" clicks.
|
||||
try:
|
||||
cfg = load_cli_config()
|
||||
approvals = cfg.get("approvals") if isinstance(cfg, dict) else None
|
||||
confirm_required = True
|
||||
if isinstance(approvals, dict):
|
||||
confirm_required = bool(approvals.get("mcp_reload_confirm", True))
|
||||
except Exception:
|
||||
confirm_required = True
|
||||
|
||||
if not confirm_required:
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._reload_mcp()
|
||||
return
|
||||
|
||||
# Render warning + prompt. Use a single-line prompt so the user
|
||||
# sees the warning as output and types a response into the composer.
|
||||
print()
|
||||
print("⚠️ /reload-mcp — Prompt cache invalidation warning")
|
||||
print()
|
||||
print(" Reloading MCP servers rebuilds the tool set for this session and")
|
||||
print(" invalidates the provider prompt cache. The next message will")
|
||||
print(" re-send full input tokens (can be expensive on long-context or")
|
||||
print(" high-reasoning models).")
|
||||
print()
|
||||
print(" [1] Approve Once — reload now")
|
||||
print(" [2] Always Approve — reload now and silence this prompt permanently")
|
||||
print(" [3] Cancel — leave MCP tools unchanged")
|
||||
print()
|
||||
raw = self._prompt_text_input("Choice [1/2/3]: ")
|
||||
if raw is None:
|
||||
print("🟡 /reload-mcp cancelled (no input).")
|
||||
return
|
||||
choice_raw = raw.strip().lower()
|
||||
if choice_raw in ("1", "once", "approve", "yes", "y", "ok"):
|
||||
choice = "once"
|
||||
elif choice_raw in ("2", "always", "remember"):
|
||||
choice = "always"
|
||||
elif choice_raw in ("3", "cancel", "nevermind", "no", "n", ""):
|
||||
choice = "cancel"
|
||||
else:
|
||||
print(f"🟡 Unrecognized choice '{raw}'. /reload-mcp cancelled.")
|
||||
return
|
||||
|
||||
if choice == "cancel":
|
||||
print("🟡 /reload-mcp cancelled. MCP tools unchanged.")
|
||||
return
|
||||
|
||||
if choice == "always":
|
||||
if save_config_value("approvals.mcp_reload_confirm", False):
|
||||
print("🔒 Future /reload-mcp calls will run without confirmation.")
|
||||
print(" Re-enable via `approvals.mcp_reload_confirm: true` in config.yaml.")
|
||||
else:
|
||||
print("⚠️ Couldn't persist opt-out — reloading once.")
|
||||
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._reload_mcp()
|
||||
|
||||
def _reload_mcp(self):
|
||||
"""Reload MCP servers: disconnect all, re-read config.yaml, reconnect.
|
||||
|
||||
@@ -7424,6 +7661,78 @@ class HermesCLI:
|
||||
except Exception as e:
|
||||
print(f" ❌ MCP reload failed: {e}")
|
||||
|
||||
def _reload_skills(self) -> None:
|
||||
"""Reload skills: rescan ~/.hermes/skills/ and queue a note for the
|
||||
next user turn.
|
||||
|
||||
Skills don't need to live in the system prompt for the model to use
|
||||
them (they're invoked via ``/skill-name``, ``skills_list``, or
|
||||
``skill_view`` at runtime), so this does NOT clear the prompt cache.
|
||||
It rescans the slash-command map, prints the diff for the user, and
|
||||
— if any skills were added or removed — queues a one-shot note that
|
||||
gets prepended to the next user message. This preserves message
|
||||
alternation (no phantom user turn injected out of band) and keeps
|
||||
prompt caching intact.
|
||||
"""
|
||||
try:
|
||||
from agent.skill_commands import reload_skills
|
||||
|
||||
if not self._command_running:
|
||||
print("🔄 Reloading skills...")
|
||||
|
||||
result = reload_skills()
|
||||
added = result.get("added", []) # [{"name", "description"}, ...]
|
||||
removed = result.get("removed", []) # [{"name", "description"}, ...]
|
||||
total = result.get("total", 0)
|
||||
|
||||
if not added and not removed:
|
||||
print(" No new skills detected.")
|
||||
print(f" 📚 {total} skill(s) available")
|
||||
return
|
||||
|
||||
def _fmt_line(item: dict) -> str:
|
||||
nm = item.get("name", "")
|
||||
desc = item.get("description", "")
|
||||
return f" - {nm}: {desc}" if desc else f" - {nm}"
|
||||
|
||||
if added:
|
||||
print(" ➕ Added Skills:")
|
||||
for item in added:
|
||||
print(f" {_fmt_line(item)}")
|
||||
if removed:
|
||||
print(" ➖ Removed Skills:")
|
||||
for item in removed:
|
||||
print(f" {_fmt_line(item)}")
|
||||
print(f" 📚 {total} skill(s) available")
|
||||
|
||||
# Queue a one-shot note for the NEXT user turn. The CLI's agent
|
||||
# loop prepends ``_pending_skills_reload_note`` (if set) to the
|
||||
# API-call-local message at ~L8770, then clears it — same
|
||||
# pattern as ``_pending_model_switch_note``. Nothing is written
|
||||
# to conversation_history here, so message alternation stays
|
||||
# intact and no out-of-band user turn is persisted.
|
||||
#
|
||||
# Format matches how the system prompt renders pre-existing
|
||||
# skills (`` - name: description``) so the model reads the
|
||||
# diff in the same shape as its original skill catalog.
|
||||
sections = ["[USER INITIATED SKILLS RELOAD:"]
|
||||
if added:
|
||||
sections.append("")
|
||||
sections.append("Added Skills:")
|
||||
for item in added:
|
||||
sections.append(_fmt_line(item))
|
||||
if removed:
|
||||
sections.append("")
|
||||
sections.append("Removed Skills:")
|
||||
for item in removed:
|
||||
sections.append(_fmt_line(item))
|
||||
sections.append("")
|
||||
sections.append("Use skills_list to see the updated catalog.]")
|
||||
self._pending_skills_reload_note = "\n".join(sections)
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Skills reload failed: {e}")
|
||||
|
||||
# ====================================================================
|
||||
# Tool-call generation indicator (shown during streaming)
|
||||
# ====================================================================
|
||||
@@ -8505,7 +8814,8 @@ class HermesCLI:
|
||||
from agent.context_references import preprocess_context_references
|
||||
from agent.model_metadata import get_model_context_length
|
||||
_ctx_len = get_model_context_length(
|
||||
self.model, base_url=self.base_url or "", api_key=self.api_key or "")
|
||||
self.model, base_url=self.base_url or "", api_key=self.api_key or "",
|
||||
config_context_length=getattr(self.agent, "_config_context_length", None) if self.agent else None)
|
||||
_ctx_result = preprocess_context_references(
|
||||
message, cwd=os.getcwd(), context_length=_ctx_len)
|
||||
if _ctx_result.expanded or _ctx_result.blocked:
|
||||
@@ -8632,6 +8942,13 @@ class HermesCLI:
|
||||
if _msn:
|
||||
agent_message = _msn + "\n\n" + agent_message
|
||||
self._pending_model_switch_note = None
|
||||
# Prepend pending /reload-skills note so the model sees which
|
||||
# skills were added/removed before handling this turn. Same
|
||||
# one-shot queue pattern as the model-switch note above.
|
||||
_srn = getattr(self, '_pending_skills_reload_note', None)
|
||||
if _srn:
|
||||
agent_message = _srn + "\n\n" + agent_message
|
||||
self._pending_skills_reload_note = None
|
||||
try:
|
||||
result = self.agent.run_conversation(
|
||||
user_message=agent_message,
|
||||
@@ -8814,6 +9131,13 @@ class HermesCLI:
|
||||
response,
|
||||
self.conversation_history,
|
||||
failure_callback=_title_failure_cb,
|
||||
main_runtime={
|
||||
"model": self.model,
|
||||
"provider": self.provider,
|
||||
"base_url": self.base_url,
|
||||
"api_key": self.api_key,
|
||||
"api_mode": self.api_mode,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -9271,6 +9595,21 @@ class HermesCLI:
|
||||
self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
|
||||
except Exception:
|
||||
pass # Tips are non-critical — never break startup
|
||||
|
||||
# Curator — kick off a background skill-maintenance pass on startup
|
||||
# if the schedule says we're due. Runs in a daemon thread so it
|
||||
# never blocks the interactive loop. Best-effort; any failure is
|
||||
# swallowed to avoid breaking session startup.
|
||||
try:
|
||||
from agent.curator import maybe_run_curator
|
||||
maybe_run_curator(
|
||||
idle_for_seconds=float("inf"), # CLI startup = fully idle
|
||||
on_summary=lambda msg: self._console_print(
|
||||
f"[dim #6b7684]💾 {msg}[/]"
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if self.preloaded_skills and not self._startup_skills_line_shown:
|
||||
skills_label = ", ".join(self.preloaded_skills)
|
||||
self._console_print(
|
||||
@@ -9940,7 +10279,9 @@ class HermesCLI:
|
||||
# so the 5-line collapse threshold and display are consistent.
|
||||
pasted_text = pasted_text.replace('\r\n', '\n').replace('\r', '\n')
|
||||
pasted_text = _strip_leaked_bracketed_paste_wrappers(pasted_text)
|
||||
pasted_text = _strip_leaked_terminal_responses(pasted_text)
|
||||
pasted_text, _had_mouse_reports = _strip_leaked_terminal_responses_with_meta(pasted_text)
|
||||
if _had_mouse_reports:
|
||||
self._recover_terminal_input_modes(reason="mouse reports leaked into bracketed paste payload")
|
||||
if _should_auto_attach_clipboard_image_on_paste(pasted_text) and self._try_attach_clipboard_image():
|
||||
event.app.invalidate()
|
||||
if pasted_text:
|
||||
@@ -10094,7 +10435,9 @@ class HermesCLI:
|
||||
event so it never triggers this.
|
||||
"""
|
||||
text = _strip_leaked_bracketed_paste_wrappers(buf.text)
|
||||
text = _strip_leaked_terminal_responses(text)
|
||||
text, _had_mouse_reports = _strip_leaked_terminal_responses_with_meta(text)
|
||||
if _had_mouse_reports:
|
||||
self._recover_terminal_input_modes(reason="mouse reports leaked into prompt buffer")
|
||||
if text != buf.text:
|
||||
cursor = min(buf.cursor_position, len(text))
|
||||
_paste_just_collapsed[0] = True
|
||||
@@ -10847,7 +11190,9 @@ class HermesCLI:
|
||||
|
||||
if isinstance(user_input, str):
|
||||
user_input = _strip_leaked_bracketed_paste_wrappers(user_input)
|
||||
user_input = _strip_leaked_terminal_responses(user_input)
|
||||
user_input, _had_mouse_reports = _strip_leaked_terminal_responses_with_meta(user_input)
|
||||
if _had_mouse_reports:
|
||||
self._recover_terminal_input_modes(reason="mouse reports leaked into submitted input")
|
||||
|
||||
# Check for commands — but detect dragged/pasted file paths first.
|
||||
# See _detect_file_drop() for details.
|
||||
|
||||
+15
-6
@@ -21,6 +21,7 @@ from typing import Optional, Dict, List, Any, Union
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from hermes_time import now as _hermes_now
|
||||
from utils import atomic_replace
|
||||
|
||||
try:
|
||||
from croniter import croniter
|
||||
@@ -312,13 +313,21 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
|
||||
elif schedule["kind"] == "cron":
|
||||
if not HAS_CRONITER:
|
||||
logger.warning(
|
||||
"Cannot compute next run for cron schedule %r: 'croniter' "
|
||||
"is not installed. Install the 'cron' extra (pip install "
|
||||
"'hermes-agent[cron]') to re-enable recurring cron jobs.",
|
||||
"Cannot compute next run for cron schedule %r: 'croniter' is "
|
||||
"not installed. croniter is a core dependency as of v0.9.x; "
|
||||
"reinstall hermes-agent or run 'pip install croniter' in your "
|
||||
"runtime env.",
|
||||
schedule.get("expr"),
|
||||
)
|
||||
return None
|
||||
cron = croniter(schedule["expr"], now)
|
||||
# Use last_run_at as the croniter base when available, consistent
|
||||
# with interval jobs. This ensures that after a crash/restart,
|
||||
# the next run is anchored to the actual last execution time
|
||||
# rather than to an arbitrary restart time.
|
||||
base_time = now
|
||||
if last_run_at:
|
||||
base_time = _ensure_aware(datetime.fromisoformat(last_run_at))
|
||||
cron = croniter(schedule["expr"], base_time)
|
||||
next_run = cron.get_next(datetime)
|
||||
return next_run.isoformat()
|
||||
|
||||
@@ -367,7 +376,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, JOBS_FILE)
|
||||
atomic_replace(tmp_path, JOBS_FILE)
|
||||
_secure_file(JOBS_FILE)
|
||||
except BaseException:
|
||||
try:
|
||||
@@ -863,7 +872,7 @@ def save_job_output(job_id: str, output: str):
|
||||
f.write(output)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, output_file)
|
||||
atomic_replace(tmp_path, output_file)
|
||||
_secure_file(output_file)
|
||||
except BaseException:
|
||||
try:
|
||||
|
||||
+107
-44
@@ -198,7 +198,9 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
||||
if resolved:
|
||||
parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
|
||||
if resolved_is_explicit:
|
||||
chat_id, thread_id = parsed_chat_id, parsed_thread_id
|
||||
chat_id = parsed_chat_id
|
||||
if parsed_thread_id is not None:
|
||||
thread_id = parsed_thread_id
|
||||
else:
|
||||
chat_id = resolved
|
||||
except Exception:
|
||||
@@ -231,12 +233,32 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
||||
}
|
||||
|
||||
|
||||
def _normalize_deliver_value(deliver) -> str:
|
||||
"""Normalize a stored/submitted ``deliver`` value to its canonical string form.
|
||||
|
||||
The contract is that ``deliver`` is a string (``"local"``, ``"origin"``,
|
||||
``"telegram"``, ``"telegram:-1001:17"``, or comma-separated combinations).
|
||||
Historically some callers — MCP clients passing an array, direct edits of
|
||||
``jobs.json``, or stale code paths — have stored a list/tuple like
|
||||
``["telegram"]``. ``str(["telegram"])`` would serialize to the literal
|
||||
string ``"['telegram']"``, which is not a known platform and fails
|
||||
resolution silently. Flatten lists/tuples into a comma-separated string
|
||||
so both forms work. Returns ``"local"`` for anything falsy.
|
||||
"""
|
||||
if deliver is None or deliver == "":
|
||||
return "local"
|
||||
if isinstance(deliver, (list, tuple)):
|
||||
parts = [str(p).strip() for p in deliver if str(p).strip()]
|
||||
return ",".join(parts) if parts else "local"
|
||||
return str(deliver)
|
||||
|
||||
|
||||
def _resolve_delivery_targets(job: dict) -> List[dict]:
|
||||
"""Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
|
||||
deliver = job.get("deliver", "local")
|
||||
deliver = _normalize_deliver_value(job.get("deliver", "local"))
|
||||
if deliver == "local":
|
||||
return []
|
||||
parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
|
||||
parts = [p.strip() for p in deliver.split(",") if p.strip()]
|
||||
seen = set()
|
||||
targets = []
|
||||
for part in parts:
|
||||
@@ -255,13 +277,21 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
|
||||
return targets[0] if targets else None
|
||||
|
||||
|
||||
# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
|
||||
_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
|
||||
# Media extension sets — audio routing is centralized in gateway.platforms.base
|
||||
# via should_send_media_as_audio() so Telegram-specific rules stay in one place.
|
||||
_VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
|
||||
_IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'})
|
||||
|
||||
|
||||
def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None:
|
||||
def _send_media_via_adapter(
|
||||
adapter,
|
||||
chat_id: str,
|
||||
media_files: list,
|
||||
metadata: dict | None,
|
||||
loop,
|
||||
job: dict,
|
||||
platform=None,
|
||||
) -> None:
|
||||
"""Send extracted MEDIA files as native platform attachments via a live adapter.
|
||||
|
||||
Routes each file to the appropriate adapter method (send_voice, send_image_file,
|
||||
@@ -270,10 +300,13 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
|
||||
for media_path, _is_voice in media_files:
|
||||
try:
|
||||
ext = Path(media_path).suffix.lower()
|
||||
if ext in _AUDIO_EXTS:
|
||||
route_platform = platform if platform is not None else getattr(adapter, "platform", None)
|
||||
if should_send_media_as_audio(route_platform, ext, is_voice=_is_voice):
|
||||
coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata)
|
||||
elif ext in _VIDEO_EXTS:
|
||||
coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata)
|
||||
@@ -319,27 +352,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
from tools.send_message_tool import _send_to_platform
|
||||
from gateway.config import load_gateway_config, Platform
|
||||
|
||||
platform_map = {
|
||||
"telegram": Platform.TELEGRAM,
|
||||
"discord": Platform.DISCORD,
|
||||
"slack": Platform.SLACK,
|
||||
"whatsapp": Platform.WHATSAPP,
|
||||
"signal": Platform.SIGNAL,
|
||||
"matrix": Platform.MATRIX,
|
||||
"mattermost": Platform.MATTERMOST,
|
||||
"homeassistant": Platform.HOMEASSISTANT,
|
||||
"dingtalk": Platform.DINGTALK,
|
||||
"feishu": Platform.FEISHU,
|
||||
"wecom": Platform.WECOM,
|
||||
"wecom_callback": Platform.WECOM_CALLBACK,
|
||||
"weixin": Platform.WEIXIN,
|
||||
"email": Platform.EMAIL,
|
||||
"sms": Platform.SMS,
|
||||
"bluebubbles": Platform.BLUEBUBBLES,
|
||||
"qqbot": Platform.QQBOT,
|
||||
"yuanbao": Platform.YUANBAO,
|
||||
}
|
||||
|
||||
# Optionally wrap the content with a header/footer so the user knows this
|
||||
# is a cron delivery. Wrapping is on by default; set cron.wrap_response: false
|
||||
# in config.yaml for clean output.
|
||||
@@ -396,13 +408,23 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
job["id"], platform_name, chat_id, thread_id,
|
||||
)
|
||||
|
||||
platform = platform_map.get(platform_name.lower())
|
||||
if not platform:
|
||||
# Built-in names resolve to their enum member; plugin platform names
|
||||
# create dynamic members via Platform._missing_().
|
||||
try:
|
||||
platform = Platform(platform_name.lower())
|
||||
except (ValueError, KeyError):
|
||||
msg = f"unknown platform '{platform_name}'"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
delivery_errors.append(msg)
|
||||
continue
|
||||
|
||||
pconfig = config.platforms.get(platform)
|
||||
if not pconfig or not pconfig.enabled:
|
||||
msg = f"platform '{platform_name}' not configured/enabled"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
delivery_errors.append(msg)
|
||||
continue
|
||||
|
||||
# Prefer the live adapter when the gateway is running — this supports E2EE
|
||||
# rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
|
||||
runtime_adapter = (adapters or {}).get(platform)
|
||||
@@ -433,7 +455,15 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
|
||||
# Send extracted media files as native attachments via the live adapter
|
||||
if adapter_ok and media_files:
|
||||
_send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
|
||||
_send_media_via_adapter(
|
||||
runtime_adapter,
|
||||
chat_id,
|
||||
media_files,
|
||||
send_metadata,
|
||||
loop,
|
||||
job,
|
||||
platform=platform,
|
||||
)
|
||||
|
||||
if adapter_ok:
|
||||
logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
|
||||
@@ -445,13 +475,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
)
|
||||
|
||||
if not delivered:
|
||||
pconfig = config.platforms.get(platform)
|
||||
if not pconfig or not pconfig.enabled:
|
||||
msg = f"platform '{platform_name}' not configured/enabled"
|
||||
logger.warning("Job '%s': %s", job["id"], msg)
|
||||
delivery_errors.append(msg)
|
||||
continue
|
||||
|
||||
# Standalone path: run the async send in a fresh event loop (safe from any thread)
|
||||
coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
|
||||
try:
|
||||
@@ -838,6 +861,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
chat_id=str(origin["chat_id"]) if origin else "",
|
||||
chat_name=origin.get("chat_name", "") if origin else "",
|
||||
)
|
||||
_cron_delivery_vars = (
|
||||
"HERMES_CRON_AUTO_DELIVER_PLATFORM",
|
||||
"HERMES_CRON_AUTO_DELIVER_CHAT_ID",
|
||||
"HERMES_CRON_AUTO_DELIVER_THREAD_ID",
|
||||
)
|
||||
for _var_name in _cron_delivery_vars:
|
||||
_VAR_MAP[_var_name].set("")
|
||||
|
||||
# Per-job working directory. When set (and validated at create/update
|
||||
# time), we point TERMINAL_CWD at it so:
|
||||
@@ -876,8 +906,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
if delivery_target:
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
|
||||
if delivery_target.get("thread_id") is not None:
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
|
||||
_VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(
|
||||
""
|
||||
if delivery_target.get("thread_id") is None
|
||||
else str(delivery_target["thread_id"])
|
||||
)
|
||||
|
||||
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
|
||||
|
||||
@@ -1011,10 +1044,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
# When a workdir is configured, inject AGENTS.md / CLAUDE.md /
|
||||
# .cursorrules from that directory; otherwise preserve the old
|
||||
# behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
|
||||
# Cron jobs should always inherit the user's SOUL.md identity from
|
||||
# HERMES_HOME. When a workdir is configured, also inject project
|
||||
# context files (AGENTS.md / CLAUDE.md / .cursorrules) from there.
|
||||
# Without a workdir, keep cwd context discovery disabled.
|
||||
skip_context_files=not bool(_job_workdir),
|
||||
load_soul_identity=True,
|
||||
skip_memory=True, # Cron system prompts would corrupt user representations
|
||||
platform="cron",
|
||||
session_id=_cron_session_id,
|
||||
@@ -1029,7 +1064,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
#
|
||||
# Uses the agent's built-in activity tracker (updated by
|
||||
# _touch_activity() on every tool call, API call, and stream delta).
|
||||
_cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
|
||||
_raw_cron_timeout = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
|
||||
if _raw_cron_timeout:
|
||||
try:
|
||||
_cron_timeout = float(_raw_cron_timeout)
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(
|
||||
"Invalid HERMES_CRON_TIMEOUT=%r; using default 600s",
|
||||
_raw_cron_timeout,
|
||||
)
|
||||
_cron_timeout = 600.0
|
||||
else:
|
||||
_cron_timeout = 600.0
|
||||
_cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
|
||||
_POLL_INTERVAL = 5.0
|
||||
_cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
@@ -1104,6 +1150,21 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
|
||||
)
|
||||
|
||||
# If the agent itself reported failure (e.g. all retries exhausted on
|
||||
# API errors, model abort, mid-run interrupt), do not silently mark the
|
||||
# job as successful. run_agent populates `failed=True`/`completed=False`
|
||||
# on these paths and may put the error into `final_response`, which
|
||||
# would otherwise be delivered as if it were the agent's reply and the
|
||||
# job's `last_status` set to "ok". Raise so the except handler below
|
||||
# builds the proper failure tuple. (issue #17855)
|
||||
if result.get("failed") is True or result.get("completed") is False:
|
||||
_err_text = (
|
||||
result.get("error")
|
||||
or (result.get("final_response") or "").strip()
|
||||
or "agent reported failure"
|
||||
)
|
||||
raise RuntimeError(_err_text)
|
||||
|
||||
final_response = result.get("final_response", "") or ""
|
||||
# Strip leaked placeholder text that upstream may inject on empty completions.
|
||||
if final_response.strip() == "(No response generated)":
|
||||
@@ -1163,6 +1224,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
|
||||
# Clean up ContextVar session/delivery state for this job.
|
||||
clear_session_vars(_ctx_tokens)
|
||||
for _var_name in _cron_delivery_vars:
|
||||
_VAR_MAP[_var_name].set("")
|
||||
if _session_db:
|
||||
try:
|
||||
_session_db.end_session(_cron_session_id, "cron_complete")
|
||||
|
||||
@@ -34,6 +34,13 @@ services:
|
||||
# uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
|
||||
# - API_SERVER_HOST=0.0.0.0
|
||||
# - API_SERVER_KEY=${API_SERVER_KEY}
|
||||
# Microsoft Teams — uncomment and fill in to enable Teams gateway.
|
||||
# Register your bot at https://dev.botframework.com/ to get these values.
|
||||
# - TEAMS_CLIENT_ID=${TEAMS_CLIENT_ID}
|
||||
# - TEAMS_CLIENT_SECRET=${TEAMS_CLIENT_SECRET}
|
||||
# - TEAMS_TENANT_ID=${TEAMS_TENANT_ID}
|
||||
# - TEAMS_ALLOWED_USERS=${TEAMS_ALLOWED_USERS}
|
||||
# - TEAMS_PORT=3978
|
||||
command: ["gateway", "run"]
|
||||
|
||||
dashboard:
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
|
||||
|
||||
This hook is always registered. It silently skips if no BOOT.md exists.
|
||||
To activate, create ``~/.hermes/BOOT.md`` with instructions for the
|
||||
agent to execute on every gateway restart.
|
||||
|
||||
Example BOOT.md::
|
||||
|
||||
# Startup Checklist
|
||||
|
||||
1. Check if any cron jobs failed overnight
|
||||
2. Send a status update to Discord #general
|
||||
3. If there are errors in /opt/app/deploy.log, summarize them
|
||||
|
||||
The agent runs in a background thread so it doesn't block gateway
|
||||
startup. If nothing needs attention, it replies with [SILENT] to
|
||||
suppress delivery.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
|
||||
logger = logging.getLogger("hooks.boot-md")
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
HERMES_HOME = get_hermes_home()
|
||||
BOOT_FILE = HERMES_HOME / "BOOT.md"
|
||||
|
||||
|
||||
def _build_boot_prompt(content: str) -> str:
|
||||
"""Wrap BOOT.md content in a system-level instruction."""
|
||||
return (
|
||||
"You are running a startup boot checklist. Follow the BOOT.md "
|
||||
"instructions below exactly.\n\n"
|
||||
"---\n"
|
||||
f"{content}\n"
|
||||
"---\n\n"
|
||||
"Execute each instruction. If you need to send a message to a "
|
||||
"platform, use the send_message tool.\n"
|
||||
"If nothing needs attention and there is nothing to report, "
|
||||
"reply with ONLY: [SILENT]"
|
||||
)
|
||||
|
||||
|
||||
def _run_boot_agent(content: str) -> None:
|
||||
"""Spawn a one-shot agent session to execute the boot instructions."""
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
|
||||
prompt = _build_boot_prompt(content)
|
||||
agent = AIAgent(
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
max_iterations=20,
|
||||
)
|
||||
result = agent.run_conversation(prompt)
|
||||
response = result.get("final_response", "")
|
||||
if response and "[SILENT]" not in response:
|
||||
logger.info("boot-md completed: %s", response[:200])
|
||||
else:
|
||||
logger.info("boot-md completed (nothing to report)")
|
||||
except Exception as e:
|
||||
logger.error("boot-md agent failed: %s", e)
|
||||
|
||||
|
||||
async def handle(event_type: str, context: dict) -> None:
|
||||
"""Gateway startup handler — run BOOT.md if it exists."""
|
||||
if not BOOT_FILE.exists():
|
||||
return
|
||||
|
||||
content = BOOT_FILE.read_text(encoding="utf-8").strip()
|
||||
if not content:
|
||||
return
|
||||
|
||||
logger.info("Running BOOT.md (%d chars)", len(content))
|
||||
|
||||
# Run in a background thread so we don't block gateway startup.
|
||||
thread = threading.Thread(
|
||||
target=_run_boot_agent,
|
||||
args=(content,),
|
||||
name="boot-md",
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
@@ -86,6 +86,16 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
continue
|
||||
platforms[plat_name] = _build_from_sessions(plat_name)
|
||||
|
||||
# Include plugin-registered platforms (dynamic enum members aren't in
|
||||
# Platform.__members__, so the loop above misses them).
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
for entry in platform_registry.plugin_entries():
|
||||
if entry.name not in _SKIP_SESSION_DISCOVERY and entry.name not in platforms:
|
||||
platforms[entry.name] = _build_from_sessions(entry.name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
directory = {
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
"platforms": platforms,
|
||||
|
||||
+193
-60
@@ -13,7 +13,7 @@ import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional, Any
|
||||
from typing import Dict, List, Optional, Any, Callable
|
||||
from enum import Enum
|
||||
|
||||
from hermes_cli.config import get_hermes_home
|
||||
@@ -45,8 +45,19 @@ def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> st
|
||||
return default
|
||||
|
||||
|
||||
# Module-level cache for bundled platform plugin names (lives outside the
|
||||
# enum so it doesn't become an accidental enum member).
|
||||
_Platform__bundled_plugin_names: Optional[set] = None
|
||||
|
||||
|
||||
class Platform(Enum):
|
||||
"""Supported messaging platforms."""
|
||||
"""Supported messaging platforms.
|
||||
|
||||
Built-in platforms have explicit members. Plugin platforms use dynamic
|
||||
members created on-demand by ``_missing_()`` so that
|
||||
``Platform("irc")`` works without modifying this enum. Dynamic members
|
||||
are cached in ``_value2member_map_`` for identity-stable comparisons.
|
||||
"""
|
||||
LOCAL = "local"
|
||||
TELEGRAM = "telegram"
|
||||
DISCORD = "discord"
|
||||
@@ -68,6 +79,76 @@ class Platform(Enum):
|
||||
BLUEBUBBLES = "bluebubbles"
|
||||
QQBOT = "qqbot"
|
||||
YUANBAO = "yuanbao"
|
||||
@classmethod
|
||||
def _missing_(cls, value):
|
||||
"""Accept unknown platform names only for known plugin adapters.
|
||||
|
||||
Creates a pseudo-member cached in ``_value2member_map_`` so that
|
||||
``Platform("irc") is Platform("irc")`` holds True (identity-stable).
|
||||
Arbitrary strings are rejected to prevent enum pollution.
|
||||
"""
|
||||
if not isinstance(value, str) or not value.strip():
|
||||
return None
|
||||
# Normalise to lowercase to avoid case mismatches in config
|
||||
value = value.strip().lower()
|
||||
# Check cache first (another call may have created it already)
|
||||
if value in cls._value2member_map_:
|
||||
return cls._value2member_map_[value]
|
||||
|
||||
# Only create pseudo-members for bundled plugin platforms (discovered
|
||||
# via filesystem scan) or runtime-registered plugin platforms.
|
||||
global _Platform__bundled_plugin_names
|
||||
if _Platform__bundled_plugin_names is None:
|
||||
_Platform__bundled_plugin_names = cls._scan_bundled_plugin_platforms()
|
||||
if value in _Platform__bundled_plugin_names:
|
||||
pseudo = object.__new__(cls)
|
||||
pseudo._value_ = value
|
||||
pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
|
||||
cls._value2member_map_[value] = pseudo
|
||||
cls._member_map_[pseudo._name_] = pseudo
|
||||
return pseudo
|
||||
|
||||
# Runtime-registered plugins (e.g. user-installed, discovered after
|
||||
# the enum was defined).
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
if platform_registry.is_registered(value):
|
||||
pseudo = object.__new__(cls)
|
||||
pseudo._value_ = value
|
||||
pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
|
||||
cls._value2member_map_[value] = pseudo
|
||||
cls._member_map_[pseudo._name_] = pseudo
|
||||
return pseudo
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _scan_bundled_plugin_platforms(cls) -> set:
|
||||
"""Return names of bundled platform plugins under ``plugins/platforms/``."""
|
||||
names: set = set()
|
||||
try:
|
||||
platforms_dir = Path(__file__).parent.parent / "plugins" / "platforms"
|
||||
if platforms_dir.is_dir():
|
||||
for child in platforms_dir.iterdir():
|
||||
if (
|
||||
child.is_dir()
|
||||
and (child / "__init__.py").exists()
|
||||
and (
|
||||
(child / "plugin.yaml").exists()
|
||||
or (child / "plugin.yml").exists()
|
||||
)
|
||||
):
|
||||
names.add(child.name.lower())
|
||||
except Exception:
|
||||
pass
|
||||
return names
|
||||
|
||||
|
||||
# Snapshot of built-in platform values before any dynamic _missing_ lookups.
|
||||
# Used to distinguish real platforms from arbitrary strings.
|
||||
_BUILTIN_PLATFORM_VALUES = frozenset(m.value for m in Platform.__members__.values())
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -231,6 +312,44 @@ class StreamingConfig:
|
||||
)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Built-in platform connection checkers
|
||||
# -----------------------------------------------------------------------------
|
||||
# Each callable receives a ``PlatformConfig`` and returns ``True`` when the
|
||||
# platform is sufficiently configured to be considered "connected". Platforms
|
||||
# that rely on the generic ``token or api_key`` check (Telegram, Discord,
|
||||
# Slack, Matrix, Mattermost, HomeAssistant) do not need an entry here.
|
||||
_PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] = {
|
||||
Platform.WEIXIN: lambda cfg: bool(
|
||||
cfg.extra.get("account_id") and (cfg.token or cfg.extra.get("token"))
|
||||
),
|
||||
Platform.WHATSAPP: lambda cfg: True, # bridge handles auth
|
||||
Platform.SIGNAL: lambda cfg: bool(cfg.extra.get("http_url")),
|
||||
Platform.EMAIL: lambda cfg: bool(cfg.extra.get("address")),
|
||||
Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
|
||||
Platform.API_SERVER: lambda cfg: True,
|
||||
Platform.WEBHOOK: lambda cfg: True,
|
||||
Platform.FEISHU: lambda cfg: bool(cfg.extra.get("app_id")),
|
||||
Platform.WECOM: lambda cfg: bool(cfg.extra.get("bot_id")),
|
||||
Platform.WECOM_CALLBACK: lambda cfg: bool(
|
||||
cfg.extra.get("corp_id") or cfg.extra.get("apps")
|
||||
),
|
||||
Platform.BLUEBUBBLES: lambda cfg: bool(
|
||||
cfg.extra.get("server_url") and cfg.extra.get("password")
|
||||
),
|
||||
Platform.QQBOT: lambda cfg: bool(
|
||||
cfg.extra.get("app_id") and cfg.extra.get("client_secret")
|
||||
),
|
||||
Platform.YUANBAO: lambda cfg: bool(
|
||||
cfg.extra.get("app_id") and cfg.extra.get("app_secret")
|
||||
),
|
||||
Platform.DINGTALK: lambda cfg: bool(
|
||||
(cfg.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID"))
|
||||
and (cfg.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET"))
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class GatewayConfig:
|
||||
"""
|
||||
@@ -284,61 +403,43 @@ class GatewayConfig:
|
||||
for platform, config in self.platforms.items():
|
||||
if not config.enabled:
|
||||
continue
|
||||
# Weixin requires both a token and an account_id
|
||||
if platform == Platform.WEIXIN:
|
||||
if config.extra.get("account_id") and (config.token or config.extra.get("token")):
|
||||
connected.append(platform)
|
||||
continue
|
||||
# Platforms that use token/api_key auth
|
||||
if config.token or config.api_key:
|
||||
if self._is_platform_connected(platform, config):
|
||||
connected.append(platform)
|
||||
# WhatsApp uses enabled flag only (bridge handles auth)
|
||||
elif platform == Platform.WHATSAPP:
|
||||
connected.append(platform)
|
||||
# Signal uses extra dict for config (http_url + account)
|
||||
elif platform == Platform.SIGNAL and config.extra.get("http_url"):
|
||||
connected.append(platform)
|
||||
# Email uses extra dict for config (address + imap_host + smtp_host)
|
||||
elif platform == Platform.EMAIL and config.extra.get("address"):
|
||||
connected.append(platform)
|
||||
# SMS uses api_key (Twilio auth token) — SID checked via env
|
||||
elif platform == Platform.SMS and os.getenv("TWILIO_ACCOUNT_SID"):
|
||||
connected.append(platform)
|
||||
# API Server uses enabled flag only (no token needed)
|
||||
elif platform == Platform.API_SERVER:
|
||||
connected.append(platform)
|
||||
# Webhook uses enabled flag only (secrets are per-route)
|
||||
elif platform == Platform.WEBHOOK:
|
||||
connected.append(platform)
|
||||
# Feishu uses extra dict for app credentials
|
||||
elif platform == Platform.FEISHU and config.extra.get("app_id"):
|
||||
connected.append(platform)
|
||||
# WeCom bot mode uses extra dict for bot credentials
|
||||
elif platform == Platform.WECOM and config.extra.get("bot_id"):
|
||||
connected.append(platform)
|
||||
# WeCom callback mode uses corp_id or apps list
|
||||
elif platform == Platform.WECOM_CALLBACK and (
|
||||
config.extra.get("corp_id") or config.extra.get("apps")
|
||||
):
|
||||
connected.append(platform)
|
||||
# BlueBubbles uses extra dict for local server config
|
||||
elif platform == Platform.BLUEBUBBLES and config.extra.get("server_url") and config.extra.get("password"):
|
||||
connected.append(platform)
|
||||
# QQBot uses extra dict for app credentials
|
||||
elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
|
||||
connected.append(platform)
|
||||
# Yuanbao uses extra dict for app credentials
|
||||
elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
|
||||
connected.append(platform)
|
||||
# DingTalk uses client_id/client_secret from config.extra or env vars
|
||||
elif platform == Platform.DINGTALK and (
|
||||
config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
|
||||
) and (
|
||||
config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
|
||||
):
|
||||
connected.append(platform)
|
||||
|
||||
return connected
|
||||
|
||||
def _is_platform_connected(self, platform: Platform, config: PlatformConfig) -> bool:
|
||||
"""Check whether a single platform is sufficiently configured."""
|
||||
# Weixin requires both a token and an account_id (checked first so
|
||||
# the generic token branch doesn't let it through without account_id).
|
||||
if platform == Platform.WEIXIN:
|
||||
return bool(
|
||||
config.extra.get("account_id")
|
||||
and (config.token or config.extra.get("token"))
|
||||
)
|
||||
|
||||
# Generic token/api_key auth covers Telegram, Discord, Slack, etc.
|
||||
if config.token or config.api_key:
|
||||
return True
|
||||
|
||||
# Platform-specific check
|
||||
checker = _PLATFORM_CONNECTED_CHECKERS.get(platform)
|
||||
if checker is not None:
|
||||
return checker(config)
|
||||
|
||||
# Plugin-registered platforms
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
entry = platform_registry.get(platform.value)
|
||||
if entry:
|
||||
if entry.is_connected is not None:
|
||||
return entry.is_connected(config)
|
||||
if entry.validate_config is not None:
|
||||
return entry.validate_config(config)
|
||||
return True
|
||||
except Exception:
|
||||
pass # Registry not yet initialised during early import
|
||||
|
||||
return False
|
||||
|
||||
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
|
||||
"""Get the home channel for a platform."""
|
||||
@@ -714,11 +815,21 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
|
||||
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
|
||||
if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
|
||||
gac = telegram_cfg["group_allowed_chats"]
|
||||
if isinstance(gac, list):
|
||||
gac = ",".join(str(v) for v in gac)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
|
||||
allowed_users = telegram_cfg.get("allow_from")
|
||||
if allowed_users is not None and not os.getenv("TELEGRAM_ALLOWED_USERS"):
|
||||
if isinstance(allowed_users, list):
|
||||
allowed_users = ",".join(str(v) for v in allowed_users)
|
||||
os.environ["TELEGRAM_ALLOWED_USERS"] = str(allowed_users)
|
||||
group_allowed_users = telegram_cfg.get("group_allow_from")
|
||||
if group_allowed_users is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
|
||||
if isinstance(group_allowed_users, list):
|
||||
group_allowed_users = ",".join(str(v) for v in group_allowed_users)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(group_allowed_users)
|
||||
group_allowed_chats = telegram_cfg.get("group_allowed_chats")
|
||||
if group_allowed_chats is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_CHATS"):
|
||||
if isinstance(group_allowed_chats, list):
|
||||
group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
|
||||
if "disable_link_previews" in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
@@ -1371,3 +1482,25 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
config.default_reset_policy.at_hour = int(reset_hour)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Registry-driven enable for plugin platforms. Built-ins have explicit
|
||||
# blocks above; plugins expose check_fn() which is the single source of
|
||||
# truth for "are my env vars set?". When it returns True, ensure the
|
||||
# platform is enabled so start() will create its adapter.
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
discover_plugins() # idempotent
|
||||
from gateway.platform_registry import platform_registry
|
||||
for entry in platform_registry.plugin_entries():
|
||||
try:
|
||||
if not entry.check_fn():
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.debug("check_fn for %s raised: %s", entry.name, e)
|
||||
continue
|
||||
platform = Platform(entry.name)
|
||||
if platform not in config.platforms:
|
||||
config.platforms[platform] = PlatformConfig()
|
||||
config.platforms[platform].enabled = True
|
||||
except Exception as e:
|
||||
logger.debug("Plugin platform enable pass failed: %s", e)
|
||||
|
||||
+22
-15
@@ -21,6 +21,7 @@ Errors in hooks are caught and logged but never block the main pipeline.
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
@@ -52,19 +53,13 @@ class HookRegistry:
|
||||
return list(self._loaded_hooks)
|
||||
|
||||
def _register_builtin_hooks(self) -> None:
|
||||
"""Register built-in hooks that are always active."""
|
||||
try:
|
||||
from gateway.builtin_hooks.boot_md import handle as boot_md_handle
|
||||
"""Register built-in hooks that are always active.
|
||||
|
||||
self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
|
||||
self._loaded_hooks.append({
|
||||
"name": "boot-md",
|
||||
"description": "Run ~/.hermes/BOOT.md on gateway startup",
|
||||
"events": ["gateway:startup"],
|
||||
"path": "(builtin)",
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
|
||||
Currently empty — no shipped built-in hooks. Kept as the extension
|
||||
point for future always-on gateway hooks so they drop in without
|
||||
re-plumbing discover_and_load().
|
||||
"""
|
||||
return
|
||||
|
||||
def discover_and_load(self) -> None:
|
||||
"""
|
||||
@@ -103,16 +98,28 @@ class HookRegistry:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module
|
||||
# Dynamically load the handler module.
|
||||
# Register in sys.modules BEFORE exec_module so Pydantic /
|
||||
# dataclasses / typing introspection can resolve forward
|
||||
# references (triggered by `from __future__ import annotations`
|
||||
# in the handler). Without this, a handler that declares a
|
||||
# Pydantic BaseModel for webhook/event payloads fails at first
|
||||
# dispatch with "TypeAdapter ... is not fully defined".
|
||||
module_name = f"hermes_hook_{hook_name}"
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
module_name, handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
sys.modules[module_name] = module
|
||||
try:
|
||||
spec.loader.exec_module(module)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
|
||||
+2
-1
@@ -28,6 +28,7 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_constants import get_hermes_dir
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
# Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
|
||||
@@ -59,7 +60,7 @@ def _secure_write(path: Path, data: str) -> None:
|
||||
f.write(data)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, str(path))
|
||||
atomic_replace(tmp_path, path)
|
||||
try:
|
||||
os.chmod(path, 0o600)
|
||||
except OSError:
|
||||
|
||||
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
Platform Adapter Registry
|
||||
|
||||
Allows platform adapters (built-in and plugin) to self-register so the gateway
|
||||
can discover and instantiate them without hardcoded if/elif chains.
|
||||
|
||||
Built-in adapters continue to use the existing if/elif in _create_adapter()
|
||||
for now. Plugin adapters register here via PluginContext.register_platform()
|
||||
and are looked up first -- if nothing is found the gateway falls through to
|
||||
the legacy code path.
|
||||
|
||||
Usage (plugin side):
|
||||
|
||||
from gateway.platform_registry import platform_registry, PlatformEntry
|
||||
|
||||
platform_registry.register(PlatformEntry(
|
||||
name="irc",
|
||||
label="IRC",
|
||||
adapter_factory=lambda cfg: IRCAdapter(cfg),
|
||||
check_fn=check_requirements,
|
||||
validate_config=lambda cfg: bool(cfg.extra.get("server")),
|
||||
required_env=["IRC_SERVER"],
|
||||
install_hint="pip install irc",
|
||||
))
|
||||
|
||||
Usage (gateway side):
|
||||
|
||||
adapter = platform_registry.create_adapter("irc", platform_config)
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlatformEntry:
|
||||
"""Metadata and factory for a single platform adapter."""
|
||||
|
||||
# Identifier used in config.yaml (e.g. "irc", "viber").
|
||||
name: str
|
||||
|
||||
# Human-readable label (e.g. "IRC", "Viber").
|
||||
label: str
|
||||
|
||||
# Factory callable: receives a PlatformConfig, returns an adapter instance.
|
||||
# Using a factory instead of a bare class lets plugins do custom init
|
||||
# (e.g. passing extra kwargs, wrapping in try/except).
|
||||
adapter_factory: Callable[[Any], Any]
|
||||
|
||||
# Returns True when the platform's dependencies are available.
|
||||
check_fn: Callable[[], bool]
|
||||
|
||||
# Optional: given a PlatformConfig, is it properly configured?
|
||||
# If None, the registry skips config validation and lets the adapter
|
||||
# fail at connect() time with a descriptive error.
|
||||
validate_config: Optional[Callable[[Any], bool]] = None
|
||||
|
||||
# Optional: given a PlatformConfig, is the platform connected/enabled?
|
||||
# Used by ``GatewayConfig.get_connected_platforms()`` and setup UI status.
|
||||
# If None, falls back to ``validate_config`` or ``check_fn``.
|
||||
is_connected: Optional[Callable[[Any], bool]] = None
|
||||
|
||||
# Env vars this platform needs (for ``hermes setup`` display).
|
||||
required_env: list = field(default_factory=list)
|
||||
|
||||
# Hint shown when check_fn returns False.
|
||||
install_hint: str = ""
|
||||
|
||||
# Optional setup function for interactive configuration.
|
||||
# Signature: () -> None (prompts user, saves env vars).
|
||||
# If None, falls back to _setup_standard_platform (needs token_var + vars)
|
||||
# or a generic "set these env vars" display.
|
||||
setup_fn: Optional[Callable[[], None]] = None
|
||||
|
||||
# "builtin" or "plugin"
|
||||
source: str = "plugin"
|
||||
|
||||
# Name of the plugin manifest that registered this entry (empty for
|
||||
# built-ins). Used by ``hermes gateway setup`` to auto-enable the
|
||||
# owning plugin when the user configures its platform.
|
||||
plugin_name: str = ""
|
||||
|
||||
# ── Auth env var names (for _is_user_authorized integration) ──
|
||||
# E.g. "IRC_ALLOWED_USERS" — checked for comma-separated user IDs.
|
||||
allowed_users_env: str = ""
|
||||
# E.g. "IRC_ALLOW_ALL_USERS" — if truthy, all users authorized.
|
||||
allow_all_env: str = ""
|
||||
|
||||
# ── Message limits ──
|
||||
# Max message length for smart-chunking. 0 = no limit.
|
||||
max_message_length: int = 0
|
||||
|
||||
# ── Privacy ──
|
||||
# If True, session descriptions redact PII (phone numbers, etc.)
|
||||
pii_safe: bool = False
|
||||
|
||||
# ── Display ──
|
||||
# Emoji for CLI/gateway display (e.g. "💬")
|
||||
emoji: str = "🔌"
|
||||
|
||||
# Whether this platform should appear in _UPDATE_ALLOWED_PLATFORMS
|
||||
# (allows /update command from this platform).
|
||||
allow_update_command: bool = True
|
||||
|
||||
# ── LLM guidance ──
|
||||
# Platform hint injected into the system prompt (e.g. "You are on IRC.
|
||||
# Do not use markdown."). Empty string = no hint.
|
||||
platform_hint: str = ""
|
||||
|
||||
|
||||
class PlatformRegistry:
|
||||
"""Central registry of platform adapters.
|
||||
|
||||
Thread-safe for reads (dict lookups are atomic under GIL).
|
||||
Writes happen at startup during sequential discovery.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._entries: dict[str, PlatformEntry] = {}
|
||||
|
||||
def register(self, entry: PlatformEntry) -> None:
|
||||
"""Register a platform adapter entry.
|
||||
|
||||
If an entry with the same name exists, it is replaced (last writer
|
||||
wins -- this lets plugins override built-in adapters if desired).
|
||||
"""
|
||||
if entry.name in self._entries:
|
||||
prev = self._entries[entry.name]
|
||||
logger.info(
|
||||
"Platform '%s' re-registered (was %s, now %s)",
|
||||
entry.name,
|
||||
prev.source,
|
||||
entry.source,
|
||||
)
|
||||
self._entries[entry.name] = entry
|
||||
logger.debug("Registered platform adapter: %s (%s)", entry.name, entry.source)
|
||||
|
||||
def unregister(self, name: str) -> bool:
|
||||
"""Remove a platform entry. Returns True if it existed."""
|
||||
return self._entries.pop(name, None) is not None
|
||||
|
||||
def get(self, name: str) -> Optional[PlatformEntry]:
|
||||
"""Look up a platform entry by name."""
|
||||
return self._entries.get(name)
|
||||
|
||||
def all_entries(self) -> list[PlatformEntry]:
|
||||
"""Return all registered platform entries."""
|
||||
return list(self._entries.values())
|
||||
|
||||
def plugin_entries(self) -> list[PlatformEntry]:
|
||||
"""Return only plugin-registered platform entries."""
|
||||
return [e for e in self._entries.values() if e.source == "plugin"]
|
||||
|
||||
def is_registered(self, name: str) -> bool:
|
||||
return name in self._entries
|
||||
|
||||
def create_adapter(self, name: str, config: Any) -> Optional[Any]:
|
||||
"""Create an adapter instance for the given platform name.
|
||||
|
||||
Returns None if:
|
||||
- No entry registered for *name*
|
||||
- check_fn() returns False (missing deps)
|
||||
- validate_config() returns False (misconfigured)
|
||||
- The factory raises an exception
|
||||
"""
|
||||
entry = self._entries.get(name)
|
||||
if entry is None:
|
||||
return None
|
||||
|
||||
if not entry.check_fn():
|
||||
hint = f" ({entry.install_hint})" if entry.install_hint else ""
|
||||
logger.warning(
|
||||
"Platform '%s' requirements not met%s",
|
||||
entry.label,
|
||||
hint,
|
||||
)
|
||||
return None
|
||||
|
||||
if entry.validate_config is not None:
|
||||
try:
|
||||
if not entry.validate_config(config):
|
||||
logger.warning(
|
||||
"Platform '%s' config validation failed",
|
||||
entry.label,
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Platform '%s' config validation error: %s",
|
||||
entry.label,
|
||||
e,
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
adapter = entry.adapter_factory(config)
|
||||
return adapter
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to create adapter for platform '%s': %s",
|
||||
entry.label,
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# Module-level singleton
|
||||
platform_registry = PlatformRegistry()
|
||||
@@ -1,9 +1,30 @@
|
||||
# Adding a New Messaging Platform
|
||||
|
||||
Checklist for integrating a new messaging platform into the Hermes gateway.
|
||||
Use this as a reference when building a new adapter — every item here is a
|
||||
real integration point that exists in the codebase. Missing any of them will
|
||||
cause broken functionality, missing features, or inconsistent behavior.
|
||||
There are two ways to add a platform to the Hermes gateway:
|
||||
|
||||
## Plugin Path (Recommended for Community/Third-Party)
|
||||
|
||||
Create a plugin directory in `~/.hermes/plugins/` with a `PLUGIN.yaml` and
|
||||
`adapter.py`. The adapter inherits from `BasePlatformAdapter` and registers
|
||||
via `ctx.register_platform()` in the `register(ctx)` entry point. This
|
||||
requires **zero changes to core Hermes code**.
|
||||
|
||||
The plugin system automatically handles: adapter creation, config parsing,
|
||||
user authorization, cron delivery, send_message routing, system prompt hints,
|
||||
status display, gateway setup, and more.
|
||||
|
||||
See `plugins/platforms/irc/` for a complete reference implementation, and
|
||||
`website/docs/developer-guide/adding-platform-adapters.md` for the full
|
||||
plugin guide with code examples.
|
||||
|
||||
---
|
||||
|
||||
## Built-in Path (Core Contributors Only)
|
||||
|
||||
Checklist for integrating a platform directly into the Hermes core.
|
||||
Use this as a reference when building a built-in adapter — every item here
|
||||
is a real integration point. Missing any of them will cause broken
|
||||
functionality, missing features, or inconsistent behavior.
|
||||
|
||||
---
|
||||
|
||||
|
||||
+207
-47
@@ -7,7 +7,9 @@ Exposes an HTTP server with endpoints:
|
||||
- GET /v1/responses/{response_id} — Retrieve a stored response
|
||||
- DELETE /v1/responses/{response_id} — Delete a stored response
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- GET /v1/capabilities — machine-readable API capabilities for external UIs
|
||||
- POST /v1/runs — start a run, returns run_id immediately (202)
|
||||
- GET /v1/runs/{run_id} — retrieve current run status
|
||||
- GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- GET /health — health check
|
||||
@@ -590,6 +592,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# Active run agent/task references for stop support
|
||||
self._active_run_agents: Dict[str, Any] = {}
|
||||
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
|
||||
# Pollable run status for dashboards and external control-plane UIs.
|
||||
self._run_statuses: Dict[str, Dict[str, Any]] = {}
|
||||
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
|
||||
|
||||
@staticmethod
|
||||
@@ -808,6 +812,51 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
],
|
||||
})
|
||||
|
||||
async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/capabilities — advertise the stable API surface.
|
||||
|
||||
External UIs and orchestrators use this endpoint to discover the API
|
||||
server's plugin-safe contract without scraping docs or assuming that
|
||||
every Hermes version exposes the same endpoints.
|
||||
"""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
return web.json_response({
|
||||
"object": "hermes.api_server.capabilities",
|
||||
"platform": "hermes-agent",
|
||||
"model": self._model_name,
|
||||
"auth": {
|
||||
"type": "bearer",
|
||||
"required": bool(self._api_key),
|
||||
},
|
||||
"features": {
|
||||
"chat_completions": True,
|
||||
"chat_completions_streaming": True,
|
||||
"responses_api": True,
|
||||
"responses_streaming": True,
|
||||
"run_submission": True,
|
||||
"run_status": True,
|
||||
"run_events_sse": True,
|
||||
"run_stop": True,
|
||||
"tool_progress_events": True,
|
||||
"session_continuity_header": "X-Hermes-Session-Id",
|
||||
"cors": bool(self._cors_origins),
|
||||
},
|
||||
"endpoints": {
|
||||
"health": {"method": "GET", "path": "/health"},
|
||||
"health_detailed": {"method": "GET", "path": "/health/detailed"},
|
||||
"models": {"method": "GET", "path": "/v1/models"},
|
||||
"chat_completions": {"method": "POST", "path": "/v1/chat/completions"},
|
||||
"responses": {"method": "POST", "path": "/v1/responses"},
|
||||
"runs": {"method": "POST", "path": "/v1/runs"},
|
||||
"run_status": {"method": "GET", "path": "/v1/runs/{run_id}"},
|
||||
"run_events": {"method": "GET", "path": "/v1/runs/{run_id}/events"},
|
||||
"run_stop": {"method": "POST", "path": "/v1/runs/{run_id}/stop"},
|
||||
},
|
||||
})
|
||||
|
||||
async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/chat/completions — OpenAI Chat Completions format."""
|
||||
auth_err = self._check_auth(request)
|
||||
@@ -932,39 +981,62 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if delta is not None:
|
||||
_stream_q.put(delta)
|
||||
|
||||
def _on_tool_progress(event_type, name, preview, args, **kwargs):
|
||||
"""Send tool progress as a separate SSE event.
|
||||
# Track which tool_call_ids we've emitted a "running" lifecycle
|
||||
# event for, so a "completed" event without a matching "running"
|
||||
# (e.g. internal/filtered tools) is silently dropped instead of
|
||||
# producing an orphaned event clients can't correlate.
|
||||
_started_tool_call_ids: set[str] = set()
|
||||
|
||||
Previously, progress markers like ``⏰ list`` were injected
|
||||
directly into ``delta.content``. OpenAI-compatible frontends
|
||||
(Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
|
||||
the assistant message and send it back on subsequent requests.
|
||||
After enough turns the model learns to *emit* the markers as
|
||||
plain text instead of issuing real tool calls — silently
|
||||
hallucinating tool results. See #6972.
|
||||
def _on_tool_start(tool_call_id, function_name, function_args):
|
||||
"""Emit ``hermes.tool.progress`` with ``status: running``.
|
||||
|
||||
The fix: push a tagged tuple ``("__tool_progress__", payload)``
|
||||
onto the stream queue. The SSE writer emits it as a custom
|
||||
``event: hermes.tool.progress`` line that compliant frontends
|
||||
can render for UX but will *not* persist into conversation
|
||||
history. Clients that don't understand the custom event type
|
||||
silently ignore it per the SSE specification.
|
||||
Replaces the old ``tool_progress_callback("tool.started",
|
||||
...)`` emit so SSE consumers receive a single event per
|
||||
tool start, carrying both the legacy ``tool``/``emoji``/
|
||||
``label`` payload (for #6972 frontends) and the new
|
||||
``toolCallId``/``status`` correlation fields (#16588).
|
||||
|
||||
Skips tools whose names start with ``_`` so internal
|
||||
events (``_thinking``, …) stay off the wire — matching
|
||||
the prior ``_on_tool_progress`` filter exactly.
|
||||
"""
|
||||
if event_type != "tool.started":
|
||||
if not tool_call_id or function_name.startswith("_"):
|
||||
return
|
||||
if name.startswith("_"):
|
||||
return
|
||||
from agent.display import get_tool_emoji
|
||||
emoji = get_tool_emoji(name)
|
||||
label = preview or name
|
||||
_started_tool_call_ids.add(tool_call_id)
|
||||
from agent.display import build_tool_preview, get_tool_emoji
|
||||
label = build_tool_preview(function_name, function_args) or function_name
|
||||
_stream_q.put(("__tool_progress__", {
|
||||
"tool": name,
|
||||
"emoji": emoji,
|
||||
"tool": function_name,
|
||||
"emoji": get_tool_emoji(function_name),
|
||||
"label": label,
|
||||
"toolCallId": tool_call_id,
|
||||
"status": "running",
|
||||
}))
|
||||
|
||||
def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
|
||||
"""Emit the matching ``status: completed`` event.
|
||||
|
||||
Dropped if the start was filtered (internal tool, missing
|
||||
id, or never seen) so clients never get an orphaned
|
||||
``completed`` they can't correlate to a prior ``running``.
|
||||
"""
|
||||
if not tool_call_id or tool_call_id not in _started_tool_call_ids:
|
||||
return
|
||||
_started_tool_call_ids.discard(tool_call_id)
|
||||
_stream_q.put(("__tool_progress__", {
|
||||
"tool": function_name,
|
||||
"toolCallId": tool_call_id,
|
||||
"status": "completed",
|
||||
}))
|
||||
|
||||
# Start agent in background. agent_ref is a mutable container
|
||||
# so the SSE writer can interrupt the agent on client disconnect.
|
||||
#
|
||||
# ``tool_progress_callback`` is intentionally not wired here:
|
||||
# it would duplicate every emit because ``run_agent`` fires it
|
||||
# side-by-side with ``tool_start_callback``/``tool_complete_callback``.
|
||||
# The structured callbacks are strictly richer (they carry the
|
||||
# tool_call id), so they own the chat-completions SSE channel.
|
||||
agent_ref = [None]
|
||||
agent_task = asyncio.ensure_future(self._run_agent(
|
||||
user_message=user_message,
|
||||
@@ -972,7 +1044,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
ephemeral_system_prompt=system_prompt,
|
||||
session_id=session_id,
|
||||
stream_delta_callback=_on_delta,
|
||||
tool_progress_callback=_on_tool_progress,
|
||||
tool_start_callback=_on_tool_start,
|
||||
tool_complete_callback=_on_tool_complete,
|
||||
agent_ref=agent_ref,
|
||||
))
|
||||
|
||||
@@ -1087,7 +1160,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
Tagged tuples ``("__tool_progress__", payload)`` are sent
|
||||
as a custom ``event: hermes.tool.progress`` SSE event so
|
||||
frontends can display them without storing the markers in
|
||||
conversation history. See #6972.
|
||||
conversation history. See #6972 for the original event,
|
||||
#16588 for the ``toolCallId``/``status`` lifecycle fields.
|
||||
"""
|
||||
if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
|
||||
event_data = json.dumps(item[1])
|
||||
@@ -2297,10 +2371,31 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
_MAX_CONCURRENT_RUNS = 10 # Prevent unbounded resource allocation
|
||||
_RUN_STREAM_TTL = 300 # seconds before orphaned runs are swept
|
||||
_RUN_STATUS_TTL = 3600 # seconds to retain terminal run status for polling
|
||||
|
||||
def _set_run_status(self, run_id: str, status: str, **fields: Any) -> Dict[str, Any]:
|
||||
"""Update pollable run status without exposing private agent objects."""
|
||||
now = time.time()
|
||||
current = self._run_statuses.get(run_id, {})
|
||||
current.update({
|
||||
"object": "hermes.run",
|
||||
"run_id": run_id,
|
||||
"status": status,
|
||||
"updated_at": now,
|
||||
})
|
||||
current.setdefault("created_at", fields.pop("created_at", now))
|
||||
current.update(fields)
|
||||
self._run_statuses[run_id] = current
|
||||
return current
|
||||
|
||||
def _make_run_event_callback(self, run_id: str, loop: "asyncio.AbstractEventLoop"):
|
||||
"""Return a tool_progress_callback that pushes structured events to the run's SSE queue."""
|
||||
def _push(event: Dict[str, Any]) -> None:
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
self._run_statuses.get(run_id, {}).get("status", "running"),
|
||||
last_event=event.get("event"),
|
||||
)
|
||||
q = self._run_streams.get(run_id)
|
||||
if q is None:
|
||||
return
|
||||
@@ -2365,28 +2460,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if not user_message:
|
||||
return web.json_response(_openai_error("No user message found in input"), status=400)
|
||||
|
||||
run_id = f"run_{uuid.uuid4().hex}"
|
||||
loop = asyncio.get_running_loop()
|
||||
q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
|
||||
self._run_streams[run_id] = q
|
||||
self._run_streams_created[run_id] = time.time()
|
||||
|
||||
event_cb = self._make_run_event_callback(run_id, loop)
|
||||
|
||||
# Also wire stream_delta_callback so message.delta events flow through
|
||||
def _text_cb(delta: Optional[str]) -> None:
|
||||
if delta is None:
|
||||
return
|
||||
try:
|
||||
loop.call_soon_threadsafe(q.put_nowait, {
|
||||
"event": "message.delta",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
"delta": delta,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
instructions = body.get("instructions")
|
||||
previous_response_id = body.get("previous_response_id")
|
||||
|
||||
@@ -2434,11 +2507,42 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
)
|
||||
conversation_history.append({"role": msg["role"], "content": str(content)})
|
||||
|
||||
run_id = f"run_{uuid.uuid4().hex}"
|
||||
session_id = body.get("session_id") or stored_session_id or run_id
|
||||
ephemeral_system_prompt = instructions
|
||||
loop = asyncio.get_running_loop()
|
||||
q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
|
||||
created_at = time.time()
|
||||
self._run_streams[run_id] = q
|
||||
self._run_streams_created[run_id] = created_at
|
||||
|
||||
event_cb = self._make_run_event_callback(run_id, loop)
|
||||
|
||||
# Also wire stream_delta_callback so message.delta events flow through.
|
||||
def _text_cb(delta: Optional[str]) -> None:
|
||||
if delta is None:
|
||||
return
|
||||
try:
|
||||
loop.call_soon_threadsafe(q.put_nowait, {
|
||||
"event": "message.delta",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
"delta": delta,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"queued",
|
||||
created_at=created_at,
|
||||
session_id=session_id,
|
||||
model=body.get("model", self._model_name),
|
||||
)
|
||||
|
||||
async def _run_and_close():
|
||||
try:
|
||||
self._set_run_status(run_id, "running")
|
||||
agent = self._create_agent(
|
||||
ephemeral_system_prompt=ephemeral_system_prompt,
|
||||
session_id=session_id,
|
||||
@@ -2468,8 +2572,36 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output": final_response,
|
||||
"usage": usage,
|
||||
})
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"completed",
|
||||
output=final_response,
|
||||
usage=usage,
|
||||
last_event="run.completed",
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"cancelled",
|
||||
last_event="run.cancelled",
|
||||
)
|
||||
try:
|
||||
q.put_nowait({
|
||||
"event": "run.cancelled",
|
||||
"run_id": run_id,
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.exception("[api_server] run %s failed", run_id)
|
||||
self._set_run_status(
|
||||
run_id,
|
||||
"failed",
|
||||
error=str(exc),
|
||||
last_event="run.failed",
|
||||
)
|
||||
try:
|
||||
q.put_nowait({
|
||||
"event": "run.failed",
|
||||
@@ -2499,6 +2631,21 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "started"}, status=202)
|
||||
|
||||
async def _handle_get_run(self, request: "web.Request") -> "web.Response":
|
||||
"""GET /v1/runs/{run_id} — return pollable run status for external UIs."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
run_id = request.match_info["run_id"]
|
||||
status = self._run_statuses.get(run_id)
|
||||
if status is None:
|
||||
return web.json_response(
|
||||
_openai_error(f"Run not found: {run_id}", code="run_not_found"),
|
||||
status=404,
|
||||
)
|
||||
return web.json_response(status)
|
||||
|
||||
async def _handle_run_events(self, request: "web.Request") -> "web.StreamResponse":
|
||||
"""GET /v1/runs/{run_id}/events — SSE stream of structured agent lifecycle events."""
|
||||
auth_err = self._check_auth(request)
|
||||
@@ -2561,6 +2708,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
if agent is None and task is None:
|
||||
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
|
||||
|
||||
self._set_run_status(run_id, "stopping", last_event="run.stopping")
|
||||
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("Stop requested via API")
|
||||
@@ -2603,6 +2752,15 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
stale_statuses = [
|
||||
run_id
|
||||
for run_id, status in list(self._run_statuses.items())
|
||||
if status.get("status") in {"completed", "failed", "cancelled"}
|
||||
and now - float(status.get("updated_at", 0) or 0) > self._RUN_STATUS_TTL
|
||||
]
|
||||
for run_id in stale_statuses:
|
||||
self._run_statuses.pop(run_id, None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BasePlatformAdapter interface
|
||||
# ------------------------------------------------------------------
|
||||
@@ -2621,6 +2779,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._app.router.add_get("/health/detailed", self._handle_health_detailed)
|
||||
self._app.router.add_get("/v1/health", self._handle_health)
|
||||
self._app.router.add_get("/v1/models", self._handle_models)
|
||||
self._app.router.add_get("/v1/capabilities", self._handle_capabilities)
|
||||
self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
|
||||
self._app.router.add_post("/v1/responses", self._handle_responses)
|
||||
self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response)
|
||||
@@ -2636,6 +2795,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._app.router.add_post("/api/jobs/{job_id}/run", self._handle_run_job)
|
||||
# Structured event streaming
|
||||
self._app.router.add_post("/v1/runs", self._handle_runs)
|
||||
self._app.router.add_get("/v1/runs/{run_id}", self._handle_get_run)
|
||||
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
|
||||
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
|
||||
# Start background sweep to clean up orphaned (unconsumed) run streams
|
||||
|
||||
+353
-94
@@ -23,6 +23,45 @@ from utils import normalize_proxy_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Audio file extensions Hermes recognizes for native audio delivery.
|
||||
# Kept in sync with tools/send_message_tool.py and cron/scheduler.py via
|
||||
# should_send_media_as_audio() below.
|
||||
_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a', '.flac'})
|
||||
# Telegram's Bot API sendAudio only accepts MP3 / M4A. Other audio
|
||||
# formats either need to go through sendVoice (Opus/OGG) or must be
|
||||
# delivered as a regular document.
|
||||
_TELEGRAM_AUDIO_ATTACHMENT_EXTS = frozenset({'.mp3', '.m4a'})
|
||||
_TELEGRAM_VOICE_EXTS = frozenset({'.ogg', '.opus'})
|
||||
|
||||
|
||||
def _platform_name(platform) -> str:
|
||||
"""Normalize a Platform enum / raw string into a lowercase name."""
|
||||
value = getattr(platform, "value", platform)
|
||||
return str(value or "").lower()
|
||||
|
||||
|
||||
def should_send_media_as_audio(platform, ext: str, is_voice: bool = False) -> bool:
|
||||
"""Return True when a media file should use the platform's audio sender.
|
||||
|
||||
Other platforms: every recognized audio extension routes through the
|
||||
audio sender.
|
||||
|
||||
Telegram: the Bot API only accepts MP3/M4A for sendAudio and
|
||||
Opus/OGG for sendVoice. Opus/OGG is only routed as audio when the
|
||||
caller flagged ``is_voice=True`` (so we don't turn a regular audio
|
||||
attachment into a voice bubble just because the file happens to be
|
||||
Opus). Everything else falls through to document delivery by
|
||||
returning ``False``.
|
||||
"""
|
||||
normalized_ext = (ext or "").lower()
|
||||
if normalized_ext not in _AUDIO_EXTS:
|
||||
return False
|
||||
if _platform_name(platform) == "telegram":
|
||||
if normalized_ext in _TELEGRAM_VOICE_EXTS:
|
||||
return is_voice
|
||||
return normalized_ext in _TELEGRAM_AUDIO_ATTACHMENT_EXTS
|
||||
return True
|
||||
|
||||
|
||||
def utf16_len(s: str) -> int:
|
||||
"""Count UTF-16 code units in *s*.
|
||||
@@ -307,9 +346,14 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
|
||||
"""Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.
|
||||
|
||||
Returns ``(session_kwargs, request_kwargs)`` where:
|
||||
- SOCKS → ``({"connector": ProxyConnector(...)}, {})``
|
||||
- HTTP → ``({}, {"proxy": url})``
|
||||
- None → ``({}, {})``
|
||||
- With aiohttp-socks → ``({"connector": ProxyConnector(...)}, {})``
|
||||
for *all* proxy schemes (SOCKS **and** HTTP/HTTPS).
|
||||
- HTTP without aiohttp-socks → ``({}, {"proxy": url})``.
|
||||
- None → ``({}, {})``.
|
||||
|
||||
Prefer the connector path: it works transparently with libraries
|
||||
(like mautrix) that call ``session.request()`` without forwarding
|
||||
per-request ``proxy=`` kwargs.
|
||||
|
||||
Usage::
|
||||
|
||||
@@ -320,20 +364,20 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
|
||||
"""
|
||||
if not proxy_url:
|
||||
return {}, {}
|
||||
if proxy_url.lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}, {}
|
||||
except ImportError:
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}, {}
|
||||
except ImportError:
|
||||
if proxy_url.lower().startswith("socks"):
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return {}, {}
|
||||
return {}, {"proxy": proxy_url}
|
||||
return {}, {"proxy": proxy_url}
|
||||
|
||||
|
||||
def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
|
||||
@@ -902,6 +946,41 @@ class MessageEvent:
|
||||
return args
|
||||
|
||||
|
||||
_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
|
||||
re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
|
||||
re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
|
||||
re.compile(r"^(?:please\s+)?restart\s+hermes[.!?\s]*$", re.IGNORECASE),
|
||||
)
|
||||
|
||||
|
||||
def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
|
||||
"""Rewrite a tiny set of DM plaintext admin phrases into slash commands.
|
||||
|
||||
This keeps high-impact operational phrases like ``restart gateway`` out of
|
||||
the LLM/tool path, where they can trigger a self-restart from inside the
|
||||
currently running agent and leave the gateway stuck in ``draining`` while it
|
||||
waits for that same agent to finish.
|
||||
|
||||
Scope is intentionally narrow: DM text messages only, exact restart-style
|
||||
phrases only. Group chats keep natural-language semantics.
|
||||
"""
|
||||
try:
|
||||
if event is None or event.message_type != MessageType.TEXT:
|
||||
return
|
||||
text = (event.text or "").strip()
|
||||
if not text or text.startswith("/"):
|
||||
return
|
||||
source = getattr(event, "source", None)
|
||||
if getattr(source, "chat_type", None) != "dm":
|
||||
return
|
||||
for pattern in _PLAINTEXT_GATEWAY_RESTART_PATTERNS:
|
||||
if pattern.match(text):
|
||||
event.text = "/restart"
|
||||
return
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
@dataclass
|
||||
class SendResult:
|
||||
"""Result of sending a message."""
|
||||
@@ -1375,6 +1454,41 @@ class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
return False
|
||||
|
||||
async def send_slash_confirm(
|
||||
self,
|
||||
chat_id: str,
|
||||
title: str,
|
||||
message: str,
|
||||
session_key: str,
|
||||
confirm_id: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a three-option slash-command confirmation prompt.
|
||||
|
||||
Used by the gateway's generic slash-confirm primitive (see
|
||||
``GatewayRunner._request_slash_confirm``) for commands that have a
|
||||
non-destructive but expensive side effect the user should explicitly
|
||||
acknowledge — the current caller is ``/reload-mcp``, which
|
||||
invalidates the provider prompt cache.
|
||||
|
||||
Platforms with inline-button support (Telegram, Discord, Slack,
|
||||
Matrix, Feishu) should override this to render three buttons:
|
||||
Approve Once / Always Approve / Cancel. Button callbacks MUST be
|
||||
routed back through the gateway by calling
|
||||
``GatewayRunner._resolve_slash_confirm(confirm_id, choice)`` where
|
||||
``choice`` is ``"once"`` / ``"always"`` / ``"cancel"``.
|
||||
|
||||
Platforms without button UIs leave this as the default and fall
|
||||
through to the gateway's text fallback (which sends ``message`` as
|
||||
plain text and intercepts the next ``/approve`` / ``/always`` /
|
||||
``/cancel`` reply).
|
||||
|
||||
``confirm_id`` is a short string generated by the gateway; the
|
||||
adapter stores it alongside any platform-specific state needed to
|
||||
route the callback (e.g. Telegram's ``_approval_state`` dict).
|
||||
"""
|
||||
return SendResult(success=False, error="Not supported")
|
||||
|
||||
async def send_typing(self, chat_id: str, metadata=None) -> None:
|
||||
"""
|
||||
Send a typing indicator.
|
||||
@@ -1391,7 +1505,64 @@ class BasePlatformAdapter(ABC):
|
||||
Default is a no-op for platforms with one-shot typing indicators.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images.
|
||||
|
||||
Accepts ``http(s)://``, ``file://`` URIs in the first tuple
|
||||
element.
|
||||
|
||||
Default implementation sends each item individually,
|
||||
routing animated GIFs through ``send_animation`` and local
|
||||
files through ``send_image_file``.
|
||||
|
||||
Override in subclasses to bundle into a single native API call
|
||||
(e.g. Signal's multi-attachment RPC)
|
||||
"""
|
||||
from urllib.parse import unquote as _unquote
|
||||
|
||||
for image_url, alt_text in images:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
logger.info(
|
||||
"[%s] Sending image: %s (alt=%s)",
|
||||
self.name,
|
||||
safe_url_for_log(image_url),
|
||||
alt_text[:30] if alt_text else "",
|
||||
)
|
||||
if image_url.startswith("file://"):
|
||||
img_result = await self.send_image_file(
|
||||
chat_id=chat_id,
|
||||
image_path=_unquote(image_url[7:]),
|
||||
caption=alt_text if alt_text else None,
|
||||
metadata=metadata,
|
||||
)
|
||||
elif self._is_animation_url(image_url):
|
||||
img_result = await self.send_animation(
|
||||
chat_id=chat_id,
|
||||
animation_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
metadata=metadata,
|
||||
)
|
||||
else:
|
||||
img_result = await self.send_image(
|
||||
chat_id=chat_id,
|
||||
image_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
metadata=metadata,
|
||||
)
|
||||
if not img_result.success:
|
||||
logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
|
||||
except Exception as img_err:
|
||||
logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -1600,7 +1771,7 @@ class BasePlatformAdapter(ABC):
|
||||
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
|
||||
# and quoted/backticked paths for LLM-formatted outputs.
|
||||
media_pattern = re.compile(
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
|
||||
)
|
||||
for match in media_pattern.finditer(content):
|
||||
path = match.group("path").strip()
|
||||
@@ -1740,11 +1911,19 @@ class BasePlatformAdapter(ABC):
|
||||
if stop_event is None:
|
||||
await asyncio.sleep(interval)
|
||||
continue
|
||||
try:
|
||||
await asyncio.wait_for(stop_event.wait(), timeout=interval)
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
deadline = loop.time() + interval
|
||||
while not stop_event.is_set():
|
||||
remaining = deadline - loop.time()
|
||||
if remaining <= 0:
|
||||
break
|
||||
# Poll instead of wait_for(stop_event.wait()). Cancelling
|
||||
# wait_for while it owns the inner Event.wait task can leave
|
||||
# shutdown paths stuck awaiting the typing task on Python
|
||||
# 3.11/pytest-asyncio; sleep cancellation is immediate.
|
||||
await asyncio.sleep(min(0.25, remaining))
|
||||
if stop_event.is_set():
|
||||
return
|
||||
except asyncio.CancelledError:
|
||||
pass # Normal cancellation when handler completes
|
||||
finally:
|
||||
@@ -2077,6 +2256,12 @@ class BasePlatformAdapter(ABC):
|
||||
``release_guard=False`` keeps the adapter-level session guard in place
|
||||
so reset-like commands can finish atomically before follow-up messages
|
||||
are allowed to start a fresh background task.
|
||||
|
||||
Bounded by a 5s timeout so a wedged finally block in the cancelled
|
||||
task (typing-task cleanup, on_processing_complete hook, etc.) can't
|
||||
stall the calling dispatch coroutine — particularly under pytest-
|
||||
asyncio where the event loop's cancellation-propagation semantics
|
||||
differ subtly from a bare ``asyncio.run`` harness.
|
||||
"""
|
||||
task = self._session_tasks.pop(session_key, None)
|
||||
if task is not None and not task.done():
|
||||
@@ -2088,9 +2273,15 @@ class BasePlatformAdapter(ABC):
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[%s] Cancelled task for %s did not exit within 5s; "
|
||||
"unblocking dispatch and letting the task unwind in the background",
|
||||
self.name, session_key,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Session cancellation raised while unwinding %s",
|
||||
@@ -2188,6 +2379,8 @@ class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
if not self._message_handler:
|
||||
return
|
||||
|
||||
coerce_plaintext_gateway_command(event)
|
||||
|
||||
session_key = build_session_key(
|
||||
event.source,
|
||||
@@ -2340,6 +2533,16 @@ class BasePlatformAdapter(ABC):
|
||||
**_keep_typing_kwargs,
|
||||
)
|
||||
)
|
||||
|
||||
async def _stop_typing_task() -> None:
|
||||
typing_task.cancel()
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.shield(typing_task), timeout=0.5)
|
||||
except (asyncio.CancelledError, asyncio.TimeoutError):
|
||||
# Cancellation cleanup must not block adapter shutdown. The
|
||||
# typing task is already cancelled; if the parent task is also
|
||||
# cancelling, let this message-processing task unwind now.
|
||||
pass
|
||||
|
||||
try:
|
||||
await self._run_processing_hook("on_processing_start", event)
|
||||
@@ -2441,47 +2644,57 @@ class BasePlatformAdapter(ABC):
|
||||
# Send extracted images as native attachments
|
||||
if images:
|
||||
logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
|
||||
for image_url, alt_text in images:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
logger.info(
|
||||
"[%s] Sending image: %s (alt=%s)",
|
||||
self.name,
|
||||
safe_url_for_log(image_url),
|
||||
alt_text[:30] if alt_text else "",
|
||||
await self.send_multiple_images(
|
||||
chat_id=event.source.chat_id,
|
||||
images=images,
|
||||
metadata=_thread_metadata,
|
||||
human_delay=human_delay,
|
||||
)
|
||||
# Route animated GIFs through send_animation for proper playback
|
||||
if self._is_animation_url(image_url):
|
||||
img_result = await self.send_animation(
|
||||
chat_id=event.source.chat_id,
|
||||
animation_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
metadata=_thread_metadata,
|
||||
)
|
||||
else:
|
||||
img_result = await self.send_image(
|
||||
chat_id=event.source.chat_id,
|
||||
image_url=image_url,
|
||||
caption=alt_text if alt_text else None,
|
||||
metadata=_thread_metadata,
|
||||
)
|
||||
if not img_result.success:
|
||||
logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
|
||||
except Exception as img_err:
|
||||
logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
|
||||
except Exception as batch_err:
|
||||
logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
|
||||
|
||||
|
||||
# Send extracted media files — route by file type
|
||||
_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
|
||||
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
|
||||
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
|
||||
|
||||
# Partition images out of media_files + local_files so they
|
||||
# can be sent as a single batch (Signal RPC)
|
||||
from urllib.parse import quote as _quote
|
||||
_image_paths: list = []
|
||||
_non_image_media: list = []
|
||||
for media_path, is_voice in media_files:
|
||||
_ext = Path(media_path).suffix.lower()
|
||||
if _ext in _IMAGE_EXTS and not is_voice:
|
||||
_image_paths.append(media_path)
|
||||
else:
|
||||
_non_image_media.append((media_path, is_voice))
|
||||
_non_image_local: list = []
|
||||
for file_path in local_files:
|
||||
if Path(file_path).suffix.lower() in _IMAGE_EXTS:
|
||||
_image_paths.append(file_path)
|
||||
else:
|
||||
_non_image_local.append(file_path)
|
||||
|
||||
if _image_paths:
|
||||
try:
|
||||
_batch = [(f"file://{_quote(p)}", "") for p in _image_paths]
|
||||
await self.send_multiple_images(
|
||||
chat_id=event.source.chat_id,
|
||||
images=_batch,
|
||||
metadata=_thread_metadata,
|
||||
human_delay=human_delay,
|
||||
)
|
||||
except Exception as batch_err:
|
||||
logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
|
||||
|
||||
for media_path, is_voice in _non_image_media:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
ext = Path(media_path).suffix.lower()
|
||||
if ext in _AUDIO_EXTS:
|
||||
if should_send_media_as_audio(self.platform, ext, is_voice=is_voice):
|
||||
media_result = await self.send_voice(
|
||||
chat_id=event.source.chat_id,
|
||||
audio_path=media_path,
|
||||
@@ -2493,12 +2706,6 @@ class BasePlatformAdapter(ABC):
|
||||
video_path=media_path,
|
||||
metadata=_thread_metadata,
|
||||
)
|
||||
elif ext in _IMAGE_EXTS:
|
||||
media_result = await self.send_image_file(
|
||||
chat_id=event.source.chat_id,
|
||||
image_path=media_path,
|
||||
metadata=_thread_metadata,
|
||||
)
|
||||
else:
|
||||
media_result = await self.send_document(
|
||||
chat_id=event.source.chat_id,
|
||||
@@ -2511,19 +2718,13 @@ class BasePlatformAdapter(ABC):
|
||||
except Exception as media_err:
|
||||
logger.warning("[%s] Error sending media: %s", self.name, media_err)
|
||||
|
||||
# Send auto-detected local files as native attachments
|
||||
for file_path in local_files:
|
||||
# Send auto-detected local non-image files as native attachments
|
||||
for file_path in _non_image_local:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
ext = Path(file_path).suffix.lower()
|
||||
if ext in _IMAGE_EXTS:
|
||||
await self.send_image_file(
|
||||
chat_id=event.source.chat_id,
|
||||
image_path=file_path,
|
||||
metadata=_thread_metadata,
|
||||
)
|
||||
elif ext in _VIDEO_EXTS:
|
||||
if ext in _VIDEO_EXTS:
|
||||
await self.send_video(
|
||||
chat_id=event.source.chat_id,
|
||||
video_path=file_path,
|
||||
@@ -2562,14 +2763,28 @@ class BasePlatformAdapter(ABC):
|
||||
_active = self._active_sessions.get(session_key)
|
||||
if _active is not None:
|
||||
_active.clear()
|
||||
typing_task.cancel()
|
||||
await _stop_typing_task()
|
||||
# Spawn a fresh task for the pending message instead of
|
||||
# recursing. Issue #17758: `await
|
||||
# self._process_message_background(...)` here grew the
|
||||
# call stack one frame per chained follow-up, and under
|
||||
# sustained pending-queue activity the C stack would
|
||||
# exhaust at ~2000 frames and SIGSEGV the process.
|
||||
# Mirror the late-arrival drain pattern below: hand off
|
||||
# to a new task and return so this frame can unwind.
|
||||
drain_task = asyncio.create_task(
|
||||
self._process_message_background(pending_event, session_key)
|
||||
)
|
||||
# Hand ownership of the session to the drain task so
|
||||
# stale-lock detection keeps working while it runs.
|
||||
self._session_tasks[session_key] = drain_task
|
||||
try:
|
||||
await typing_task
|
||||
except asyncio.CancelledError:
|
||||
self._background_tasks.add(drain_task)
|
||||
drain_task.add_done_callback(self._background_tasks.discard)
|
||||
except TypeError:
|
||||
# Tests stub create_task() with non-hashable sentinels; tolerate.
|
||||
pass
|
||||
# Process pending message in new background task
|
||||
await self._process_message_background(pending_event, session_key)
|
||||
return # Already cleaned up
|
||||
return # Drain task owns the session now.
|
||||
|
||||
except asyncio.CancelledError:
|
||||
current_task = asyncio.current_task()
|
||||
@@ -2614,11 +2829,7 @@ class BasePlatformAdapter(ABC):
|
||||
except Exception:
|
||||
pass
|
||||
# Stop typing indicator
|
||||
typing_task.cancel()
|
||||
try:
|
||||
await typing_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
await _stop_typing_task()
|
||||
# Also cancel any platform-level persistent typing tasks (e.g. Discord)
|
||||
# that may have been recreated by _keep_typing after the last stop_typing()
|
||||
try:
|
||||
@@ -2635,25 +2846,41 @@ class BasePlatformAdapter(ABC):
|
||||
# dropped (user never gets a reply).
|
||||
late_pending = self._pending_messages.pop(session_key, None)
|
||||
if late_pending is not None:
|
||||
logger.debug(
|
||||
"[%s] Late-arrival pending message during cleanup — spawning drain task",
|
||||
self.name,
|
||||
)
|
||||
_active = self._active_sessions.get(session_key)
|
||||
if _active is not None:
|
||||
_active.clear()
|
||||
drain_task = asyncio.create_task(
|
||||
self._process_message_background(late_pending, session_key)
|
||||
)
|
||||
# Hand ownership of the session to the drain task so stale-lock
|
||||
# detection keeps working while it runs.
|
||||
self._session_tasks[session_key] = drain_task
|
||||
try:
|
||||
self._background_tasks.add(drain_task)
|
||||
drain_task.add_done_callback(self._background_tasks.discard)
|
||||
except TypeError:
|
||||
# Tests stub create_task() with non-hashable sentinels; tolerate.
|
||||
pass
|
||||
current_task = asyncio.current_task()
|
||||
existing_task = self._session_tasks.get(session_key)
|
||||
if (
|
||||
existing_task is not None
|
||||
and existing_task is not current_task
|
||||
):
|
||||
# The in-band drain (or an earlier late-arrival drain)
|
||||
# already spawned a follow-up task that owns this
|
||||
# session. Re-queue the late-arrival event so that
|
||||
# task picks it up — avoids spawning two concurrent
|
||||
# _process_message_background tasks for the same key
|
||||
# (#17758 follow-up: prevents the create_task path
|
||||
# from racing with itself across the in-band/finally
|
||||
# boundary).
|
||||
self._pending_messages[session_key] = late_pending
|
||||
else:
|
||||
logger.debug(
|
||||
"[%s] Late-arrival pending message during cleanup — spawning drain task",
|
||||
self.name,
|
||||
)
|
||||
_active = self._active_sessions.get(session_key)
|
||||
if _active is not None:
|
||||
_active.clear()
|
||||
drain_task = asyncio.create_task(
|
||||
self._process_message_background(late_pending, session_key)
|
||||
)
|
||||
# Hand ownership of the session to the drain task so stale-lock
|
||||
# detection keeps working while it runs.
|
||||
self._session_tasks[session_key] = drain_task
|
||||
try:
|
||||
self._background_tasks.add(drain_task)
|
||||
drain_task.add_done_callback(self._background_tasks.discard)
|
||||
except TypeError:
|
||||
# Tests stub create_task() with non-hashable sentinels; tolerate.
|
||||
pass
|
||||
# Leave _active_sessions[session_key] populated — the drain
|
||||
# task's own lifecycle will clean it up.
|
||||
else:
|
||||
@@ -2661,16 +2888,34 @@ class BasePlatformAdapter(ABC):
|
||||
# reset-like command that already swapped in its own
|
||||
# command_guard (and cancelled us) can't be accidentally
|
||||
# cleared by our unwind. The command owns the session now.
|
||||
#
|
||||
# The owner-check also covers the in-band drain handoff
|
||||
# above: when we spawned a drain_task and transferred
|
||||
# ownership via ``_session_tasks[session_key] = drain_task``,
|
||||
# ``_session_tasks.get(session_key) is current_task`` is
|
||||
# False, so we leave _active_sessions populated. Without
|
||||
# this guard, the drain task picks up the same
|
||||
# interrupt_event in its own _process_message_background
|
||||
# entry, _release_session_guard's guard-match succeeds,
|
||||
# and we'd delete the entry while the drain task is still
|
||||
# running — letting a concurrent inbound message pass
|
||||
# the Level-1 guard and spawn a second handler for the
|
||||
# same session.
|
||||
current_task = asyncio.current_task()
|
||||
if current_task is not None and self._session_tasks.get(session_key) is current_task:
|
||||
del self._session_tasks[session_key]
|
||||
self._release_session_guard(session_key, guard=interrupt_event)
|
||||
self._release_session_guard(session_key, guard=interrupt_event)
|
||||
|
||||
async def cancel_background_tasks(self) -> None:
|
||||
"""Cancel any in-flight background message-processing tasks.
|
||||
|
||||
Used during gateway shutdown/replacement so active sessions from the old
|
||||
process do not keep running after adapters are being torn down.
|
||||
|
||||
Each cancelled task is awaited with a 5s bound so a wedged finally
|
||||
(typing-task cleanup, on_processing_complete hook) can't stall the
|
||||
whole shutdown path. Stragglers are released from our tracking and
|
||||
allowed to finish unwinding on their own.
|
||||
"""
|
||||
# Loop until no new tasks appear. Without this, a message
|
||||
# arriving during the `await asyncio.gather` below would spawn
|
||||
@@ -2689,7 +2934,21 @@ class BasePlatformAdapter(ABC):
|
||||
for task in tasks:
|
||||
self._expected_cancelled_tasks.add(task)
|
||||
task.cancel()
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
asyncio.gather(
|
||||
*(asyncio.shield(t) for t in tasks),
|
||||
return_exceptions=True,
|
||||
),
|
||||
timeout=5.0,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[%s] %d background task(s) did not exit within 5s; "
|
||||
"releasing tracking and letting them unwind in the background",
|
||||
self.name, len([t for t in tasks if not t.done()]),
|
||||
)
|
||||
break
|
||||
# Loop: late-arrival tasks spawned during the gather above
|
||||
# will be in self._background_tasks now. Re-check.
|
||||
self._background_tasks.clear()
|
||||
|
||||
@@ -18,7 +18,7 @@ import tempfile
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from typing import Callable, Dict, Optional, Any
|
||||
from typing import Callable, Dict, List, Optional, Any, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -305,7 +305,7 @@ class VoiceReceiver:
|
||||
encrypted = bytes(payload_with_nonce[:-4])
|
||||
|
||||
try:
|
||||
import nacl.secret # noqa: delayed import – only in voice path
|
||||
import nacl.secret # noqa: E402 — delayed import, only in voice path
|
||||
box = nacl.secret.Aead(self._secret_key)
|
||||
decrypted = box.decrypt(encrypted, header, bytes(nonce))
|
||||
except Exception as e:
|
||||
@@ -813,7 +813,14 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
|
||||
return
|
||||
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
|
||||
# Discord's per-app command-management bucket is ~5 writes / 20 s,
|
||||
# so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
|
||||
# desired = 107 writes) takes several minutes of forced waits.
|
||||
# A flat 30 s budget blew up reliably under bucket pressure and
|
||||
# left slash commands broken for ~60 min until the bucket fully
|
||||
# recovered. Use a wide ceiling; the cap still guards against a
|
||||
# true hang. (#16713)
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
|
||||
logger.info(
|
||||
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
|
||||
self.name,
|
||||
@@ -825,7 +832,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
summary["deleted"],
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
|
||||
logger.warning(
|
||||
"[%s] Slash command sync timed out — Discord rate-limit bucket "
|
||||
"may be saturated; will retry on next reconnect",
|
||||
self.name,
|
||||
)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
@@ -1332,6 +1343,134 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
msg = await channel.send(content=caption if caption else None, file=file)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images as a single Discord message with multiple attachments.
|
||||
|
||||
Discord permits up to 10 file attachments per message. Batches are
|
||||
chunked accordingly. URL images are downloaded into memory and
|
||||
uploaded as inline attachments (same pattern as ``send_image`` so
|
||||
they render inline, not as bare links). Local files are opened
|
||||
directly. On per-chunk failure the remaining images in that chunk
|
||||
fall back to the base per-image loop.
|
||||
"""
|
||||
if not self._client:
|
||||
return
|
||||
if not images:
|
||||
return
|
||||
|
||||
try:
|
||||
import discord as _discord_mod
|
||||
import io as _io
|
||||
from urllib.parse import unquote as _unquote
|
||||
except Exception: # pragma: no cover
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
return
|
||||
|
||||
try:
|
||||
channel = self._client.get_channel(int(chat_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(chat_id))
|
||||
if not channel:
|
||||
logger.warning("[%s] Channel %s not found for multi-image send", self.name, chat_id)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning("[%s] Failed to resolve channel for multi-image send: %s", self.name, e)
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
return
|
||||
|
||||
CHUNK = 10
|
||||
chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
|
||||
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
if human_delay > 0 and chunk_idx > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
|
||||
files: List[Any] = []
|
||||
captions: List[str] = []
|
||||
aiohttp_session = None
|
||||
try:
|
||||
for image_url, alt_text in chunk:
|
||||
if alt_text:
|
||||
captions.append(alt_text)
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
if not os.path.exists(local_path):
|
||||
logger.warning("[%s] Skipping missing image: %s", self.name, local_path)
|
||||
continue
|
||||
files.append(_discord_mod.File(local_path, filename=os.path.basename(local_path)))
|
||||
else:
|
||||
if not is_safe_url(image_url):
|
||||
logger.warning("[%s] Blocked unsafe image URL in batch", self.name)
|
||||
continue
|
||||
# Download to BytesIO so it renders inline
|
||||
try:
|
||||
import aiohttp as _aiohttp
|
||||
from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
|
||||
_proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
|
||||
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
|
||||
if aiohttp_session is None:
|
||||
aiohttp_session = _aiohttp.ClientSession(**_sess_kw)
|
||||
async with aiohttp_session.get(
|
||||
image_url, timeout=_aiohttp.ClientTimeout(total=30), **_req_kw,
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
logger.warning(
|
||||
"[%s] Failed to download image (HTTP %d) in batch: %s",
|
||||
self.name, resp.status, image_url[:80],
|
||||
)
|
||||
continue
|
||||
data = await resp.read()
|
||||
ct = resp.headers.get("content-type", "image/png")
|
||||
ext = "png"
|
||||
if "jpeg" in ct or "jpg" in ct:
|
||||
ext = "jpg"
|
||||
elif "gif" in ct:
|
||||
ext = "gif"
|
||||
elif "webp" in ct:
|
||||
ext = "webp"
|
||||
files.append(_discord_mod.File(_io.BytesIO(data), filename=f"image_{len(files)}.{ext}"))
|
||||
except Exception as dl_err:
|
||||
logger.warning("[%s] Download failed for %s: %s", self.name, image_url[:80], dl_err)
|
||||
continue
|
||||
|
||||
if not files:
|
||||
continue
|
||||
|
||||
# Use the first caption if any (Discord only has one message body for the group)
|
||||
content = captions[0] if captions else None
|
||||
logger.info(
|
||||
"[%s] Sending %d image(s) as single Discord message (chunk %d/%d)",
|
||||
self.name, len(files), chunk_idx + 1, len(chunks),
|
||||
)
|
||||
|
||||
if self._is_forum_parent(channel):
|
||||
await self._forum_post_file(
|
||||
channel,
|
||||
content=(content or "").strip(),
|
||||
files=files,
|
||||
)
|
||||
else:
|
||||
await channel.send(content=content, files=files)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[%s] Multi-image Discord send failed (chunk %d/%d), falling back to per-image: %s",
|
||||
self.name, chunk_idx + 1, len(chunks), e,
|
||||
exc_info=True,
|
||||
)
|
||||
await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
|
||||
finally:
|
||||
if aiohttp_session is not None:
|
||||
try:
|
||||
await aiohttp_session.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def play_tts(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -2259,6 +2398,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_reload_mcp(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/reload-mcp")
|
||||
|
||||
@tree.command(name="reload-skills", description="Re-scan ~/.hermes/skills/ for new or removed skills")
|
||||
async def slash_reload_skills(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/reload-skills")
|
||||
|
||||
@tree.command(name="voice", description="Toggle voice reply mode")
|
||||
@discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
|
||||
@discord.app_commands.choices(mode=[
|
||||
@@ -2895,6 +3038,43 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_slash_confirm(
|
||||
self, chat_id: str, title: str, message: str, session_key: str,
|
||||
confirm_id: str, metadata: Optional[dict] = None,
|
||||
) -> SendResult:
|
||||
"""Send a three-button slash-command confirmation prompt."""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
target_id = chat_id
|
||||
if metadata and metadata.get("thread_id"):
|
||||
target_id = metadata["thread_id"]
|
||||
|
||||
channel = self._client.get_channel(int(target_id))
|
||||
if not channel:
|
||||
channel = await self._client.fetch_channel(int(target_id))
|
||||
|
||||
# Embed description limit is 4096; message usually fits easily.
|
||||
max_desc = 4088
|
||||
body = message if len(message) <= max_desc else message[: max_desc - 3] + "..."
|
||||
embed = discord.Embed(
|
||||
title=title or "Confirm",
|
||||
description=body,
|
||||
color=discord.Color.orange(),
|
||||
)
|
||||
|
||||
view = SlashConfirmView(
|
||||
session_key=session_key,
|
||||
confirm_id=confirm_id,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_update_prompt(
|
||||
self, chat_id: str, prompt: str, default: str = "",
|
||||
session_key: str = "",
|
||||
@@ -3628,6 +3808,103 @@ if DISCORD_AVAILABLE:
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
class SlashConfirmView(discord.ui.View):
|
||||
"""Three-button view for generic slash-command confirmations.
|
||||
|
||||
Used by ``/reload-mcp`` and any future slash command routed through
|
||||
``GatewayRunner._request_slash_confirm``. Buttons map to the
|
||||
gateway's three choices:
|
||||
|
||||
* "Approve Once" → ``choice="once"``
|
||||
* "Always Approve" → ``choice="always"``
|
||||
* "Cancel" → ``choice="cancel"``
|
||||
|
||||
Clicking calls the module-level
|
||||
``tools.slash_confirm.resolve(session_key, confirm_id, choice)``
|
||||
which runs the handler the runner stored for this ``session_key``.
|
||||
Only users in the adapter's allowlist can click. Times out after
|
||||
5 minutes (matches the gateway primitive's timeout).
|
||||
"""
|
||||
|
||||
def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
|
||||
super().__init__(timeout=300)
|
||||
self.session_key = session_key
|
||||
self.confirm_id = confirm_id
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, choice: str,
|
||||
color: discord.Color, label: str,
|
||||
):
|
||||
if self.resolved:
|
||||
await interaction.response.send_message(
|
||||
"This prompt has already been resolved~", ephemeral=True,
|
||||
)
|
||||
return
|
||||
if not self._check_auth(interaction):
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized to answer this prompt~", ephemeral=True,
|
||||
)
|
||||
return
|
||||
|
||||
self.resolved = True
|
||||
|
||||
embed = interaction.message.embeds[0] if interaction.message.embeds else None
|
||||
if embed:
|
||||
embed.color = color
|
||||
embed.set_footer(text=f"{label} by {interaction.user.display_name}")
|
||||
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
await interaction.response.edit_message(embed=embed, view=self)
|
||||
|
||||
# Resolve via the module-level primitive. If the handler
|
||||
# returns a follow-up message, post it in the same channel.
|
||||
try:
|
||||
from tools import slash_confirm as _slash_confirm_mod
|
||||
result_text = await _slash_confirm_mod.resolve(
|
||||
self.session_key, self.confirm_id, choice,
|
||||
)
|
||||
if result_text:
|
||||
await interaction.followup.send(result_text)
|
||||
logger.info(
|
||||
"Discord button resolved slash-confirm for session %s "
|
||||
"(choice=%s, user=%s)",
|
||||
self.session_key, choice, interaction.user.display_name,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Discord slash-confirm resolve failed: %s", exc, exc_info=True)
|
||||
|
||||
@discord.ui.button(label="Approve Once", style=discord.ButtonStyle.green)
|
||||
async def approve_once(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button,
|
||||
):
|
||||
await self._resolve(interaction, "once", discord.Color.green(), "Approved once")
|
||||
|
||||
@discord.ui.button(label="Always Approve", style=discord.ButtonStyle.blurple)
|
||||
async def approve_always(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button,
|
||||
):
|
||||
await self._resolve(interaction, "always", discord.Color.purple(), "Always approved")
|
||||
|
||||
@discord.ui.button(label="Cancel", style=discord.ButtonStyle.red)
|
||||
async def cancel(
|
||||
self, interaction: discord.Interaction, button: discord.ui.Button,
|
||||
):
|
||||
await self._resolve(interaction, "cancel", discord.Color.greyple(), "Cancelled")
|
||||
|
||||
async def on_timeout(self):
|
||||
self.resolved = True
|
||||
for child in self.children:
|
||||
child.disabled = True
|
||||
|
||||
class UpdatePromptView(discord.ui.View):
|
||||
"""Interactive Yes/No buttons for ``hermes update`` prompts.
|
||||
|
||||
|
||||
+108
-1
@@ -31,7 +31,7 @@ from email.mime.base import MIMEBase
|
||||
from email.utils import formatdate
|
||||
from email import encoders
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
@@ -540,6 +540,113 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
text += f"\n\nImage: {image_url}"
|
||||
return await self.send(chat_id, text.strip(), reply_to)
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images as a single email with multiple MIME attachments.
|
||||
|
||||
Local files are attached directly. URL images have their URL
|
||||
appended to the body (email adapter does not download remote
|
||||
images). No hard cap — email clients handle dozens of
|
||||
attachments fine, subject to SMTP message size limits.
|
||||
"""
|
||||
if not images:
|
||||
return
|
||||
|
||||
from urllib.parse import unquote as _unquote
|
||||
|
||||
body_parts: List[str] = []
|
||||
local_paths: List[str] = []
|
||||
for image_url, alt_text in images:
|
||||
if alt_text:
|
||||
body_parts.append(alt_text)
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
if Path(local_path).exists():
|
||||
local_paths.append(local_path)
|
||||
else:
|
||||
logger.warning("[Email] Skipping missing image: %s", local_path)
|
||||
else:
|
||||
# Remote URLs just get linked in the body (parity with send_image)
|
||||
body_parts.append(f"Image: {image_url}")
|
||||
|
||||
if not local_paths and not body_parts:
|
||||
return
|
||||
|
||||
body = "\n\n".join(body_parts)
|
||||
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
self._send_email_with_attachments,
|
||||
chat_id,
|
||||
body,
|
||||
local_paths,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("[Email] Multi-image send failed, falling back: %s", e, exc_info=True)
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
|
||||
def _send_email_with_attachments(
|
||||
self,
|
||||
to_addr: str,
|
||||
body: str,
|
||||
file_paths: List[str],
|
||||
) -> str:
|
||||
"""Send an email with multiple file attachments via SMTP."""
|
||||
msg = MIMEMultipart()
|
||||
msg["From"] = self._address
|
||||
msg["To"] = to_addr
|
||||
|
||||
ctx = self._thread_context.get(to_addr, {})
|
||||
subject = ctx.get("subject", "Hermes Agent")
|
||||
if not subject.startswith("Re:"):
|
||||
subject = f"Re: {subject}"
|
||||
msg["Subject"] = subject
|
||||
|
||||
original_msg_id = ctx.get("message_id")
|
||||
if original_msg_id:
|
||||
msg["In-Reply-To"] = original_msg_id
|
||||
msg["References"] = original_msg_id
|
||||
|
||||
msg["Date"] = formatdate(localtime=True)
|
||||
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
|
||||
msg["Message-ID"] = msg_id
|
||||
|
||||
if body:
|
||||
msg.attach(MIMEText(body, "plain", "utf-8"))
|
||||
|
||||
for file_path in file_paths:
|
||||
p = Path(file_path)
|
||||
try:
|
||||
with open(p, "rb") as f:
|
||||
part = MIMEBase("application", "octet-stream")
|
||||
part.set_payload(f.read())
|
||||
encoders.encode_base64(part)
|
||||
part.add_header("Content-Disposition", f"attachment; filename={p.name}")
|
||||
msg.attach(part)
|
||||
except Exception as e:
|
||||
logger.warning("[Email] Failed to attach %s: %s", file_path, e)
|
||||
|
||||
smtp = smtplib.SMTP(self._smtp_host, self._smtp_port, timeout=30)
|
||||
try:
|
||||
smtp.starttls(context=ssl.create_default_context())
|
||||
smtp.login(self._address, self._password)
|
||||
smtp.send_message(msg)
|
||||
finally:
|
||||
try:
|
||||
smtp.quit()
|
||||
except Exception:
|
||||
smtp.close()
|
||||
|
||||
logger.info("[Email] Sent multi-attachment email to %s (%d files)", to_addr, len(file_paths))
|
||||
return msg_id
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
||||
@@ -974,7 +974,6 @@ def build_whole_comment_prompt(
|
||||
|
||||
def _resolve_model_and_runtime() -> Tuple[str, dict]:
|
||||
"""Resolve model and provider credentials, same as gateway message handling."""
|
||||
import os
|
||||
from gateway.run import _load_gateway_config, _resolve_gateway_model
|
||||
|
||||
user_config = _load_gateway_config()
|
||||
|
||||
@@ -11,10 +11,10 @@ import logging
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Dict, Optional
|
||||
from typing import TYPE_CHECKING, Dict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gateway.platforms.base import BasePlatformAdapter, MessageEvent
|
||||
from gateway.platforms.base import MessageEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
+420
-44
@@ -11,6 +11,7 @@ Environment variables:
|
||||
MATRIX_PASSWORD Password (alternative to access token)
|
||||
MATRIX_ENCRYPTION Set "true" to enable E2EE
|
||||
MATRIX_DEVICE_ID Stable device ID for E2EE persistence across restarts
|
||||
MATRIX_PROXY HTTP(S) or SOCKS proxy URL for Matrix traffic
|
||||
MATRIX_ALLOWED_USERS Comma-separated Matrix user IDs (@user:server)
|
||||
MATRIX_HOME_ROOM Room ID for cron/notification delivery
|
||||
MATRIX_REACTIONS Set "false" to disable processing lifecycle reactions
|
||||
@@ -18,6 +19,7 @@ Environment variables:
|
||||
MATRIX_REQUIRE_MENTION Require @mention in rooms (default: true)
|
||||
MATRIX_FREE_RESPONSE_ROOMS Comma-separated room IDs exempt from mention requirement
|
||||
MATRIX_AUTO_THREAD Auto-create threads for room messages (default: true)
|
||||
MATRIX_DM_AUTO_THREAD Auto-create threads for DM messages (default: false)
|
||||
MATRIX_RECOVERY_KEY Recovery key for cross-signing verification after device key rotation
|
||||
MATRIX_DM_MENTION_THREADS Create a thread when bot is @mentioned in a DM (default: false)
|
||||
"""
|
||||
@@ -30,6 +32,8 @@ import mimetypes
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
from html import escape as _html_escape
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Set
|
||||
@@ -95,11 +99,25 @@ from gateway.platforms.base import (
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
resolve_proxy_url,
|
||||
proxy_kwargs_for_aiohttp,
|
||||
)
|
||||
from gateway.platforms.helpers import ThreadParticipationTracker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _MatrixApprovalPrompt:
|
||||
"""Tracks a pending Matrix reaction-based exec approval prompt."""
|
||||
|
||||
def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False):
|
||||
self.session_key = session_key
|
||||
self.chat_id = chat_id
|
||||
self.message_id = message_id
|
||||
self.resolved = resolved
|
||||
self.bot_reaction_events: dict[str, str] = {} # emoji -> event_id
|
||||
|
||||
# Matrix message size limit (4000 chars practical, spec has no hard limit
|
||||
# but clients render poorly above this).
|
||||
MAX_MESSAGE_LENGTH = 4000
|
||||
@@ -114,11 +132,85 @@ _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
|
||||
# Grace period: ignore messages older than this many seconds before startup.
|
||||
_STARTUP_GRACE_SECONDS = 5
|
||||
|
||||
_OUTBOUND_MENTION_RE = re.compile(
|
||||
r"(?<![\w/])(@[0-9A-Za-z._=/-]+:[0-9A-Za-z.-]+(?::\d+)?)"
|
||||
)
|
||||
|
||||
_E2EE_INSTALL_HINT = (
|
||||
"Install with: pip install 'mautrix[encryption]' (requires libolm C library)"
|
||||
)
|
||||
|
||||
_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".webp",
|
||||
".bmp",
|
||||
".svg",
|
||||
".heic",
|
||||
".heif",
|
||||
".avif",
|
||||
})
|
||||
|
||||
|
||||
def _looks_like_matrix_image_filename(text: str) -> bool:
|
||||
"""Return True when Matrix image body text is probably just a transport filename.
|
||||
|
||||
Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
|
||||
filename when the user did not add a caption. Treating that raw filename as
|
||||
user-authored text confuses downstream vision enrichment.
|
||||
"""
|
||||
candidate = str(text or "").strip()
|
||||
if not candidate or "\n" in candidate or candidate.endswith("/"):
|
||||
return False
|
||||
|
||||
name = Path(candidate).name
|
||||
if not name or name != candidate:
|
||||
return False
|
||||
|
||||
suffix = Path(name).suffix.lower()
|
||||
if not suffix:
|
||||
return False
|
||||
|
||||
guessed_type, _ = mimetypes.guess_type(name)
|
||||
if guessed_type and guessed_type.startswith("image/"):
|
||||
return True
|
||||
return suffix in _MATRIX_IMAGE_FILENAME_EXTS
|
||||
|
||||
|
||||
def _create_matrix_session(proxy_url: str | None):
|
||||
"""Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests.
|
||||
|
||||
mautrix's ``HTTPAPI._send()`` calls ``session.request()`` without forwarding
|
||||
per-request ``proxy=`` kwargs. For HTTP(S) proxies we use aiohttp's native
|
||||
``proxy=`` session parameter which sets a default for every request. For SOCKS
|
||||
we use ``aiohttp_socks.ProxyConnector`` (connector-level).
|
||||
When no proxy is configured we enable ``trust_env`` so standard env vars
|
||||
(``HTTP_PROXY`` / ``HTTPS_PROXY``) are honoured automatically.
|
||||
"""
|
||||
import aiohttp
|
||||
|
||||
if not proxy_url:
|
||||
return aiohttp.ClientSession(trust_env=True)
|
||||
|
||||
if proxy_url.split("://")[0].lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
return aiohttp.ClientSession(
|
||||
connector=ProxyConnector.from_url(proxy_url, rdns=True),
|
||||
)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return aiohttp.ClientSession(trust_env=True)
|
||||
|
||||
return aiohttp.ClientSession(proxy=proxy_url)
|
||||
|
||||
|
||||
def _check_e2ee_deps() -> bool:
|
||||
"""Return True if mautrix E2EE dependencies (python-olm) are available."""
|
||||
@@ -260,6 +352,9 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"1",
|
||||
"yes",
|
||||
)
|
||||
self._dm_auto_thread: bool = os.getenv(
|
||||
"MATRIX_DM_AUTO_THREAD", "false"
|
||||
).lower() in ("true", "1", "yes")
|
||||
self._dm_mention_threads: bool = os.getenv(
|
||||
"MATRIX_DM_MENTION_THREADS", "false"
|
||||
).lower() in ("true", "1", "yes")
|
||||
@@ -270,6 +365,11 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
).lower() not in ("false", "0", "no")
|
||||
self._pending_reactions: dict[tuple[str, str], str] = {}
|
||||
|
||||
# Proxy support — resolve once at init, reuse for all HTTP traffic.
|
||||
self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY")
|
||||
if self._proxy_url:
|
||||
logger.info("Matrix: proxy configured — %s", self._proxy_url)
|
||||
|
||||
# Text batching: merge rapid successive messages (Telegram-style).
|
||||
# Matrix clients split long messages around 4000 chars.
|
||||
self._text_batch_delay_seconds = float(
|
||||
@@ -281,6 +381,18 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
|
||||
# Matrix reaction-based dangerous command approvals.
|
||||
self._approval_reaction_map = {
|
||||
"✅": "once",
|
||||
"❎": "deny",
|
||||
}
|
||||
self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {}
|
||||
self._approval_prompt_by_session: Dict[str, str] = {}
|
||||
allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "")
|
||||
self._allowed_user_ids: Set[str] = {
|
||||
u.strip() for u in allowed_users_raw.split(",") if u.strip()
|
||||
}
|
||||
|
||||
def _is_duplicate_event(self, event_id) -> bool:
|
||||
"""Return True if this event was already processed. Tracks the ID otherwise."""
|
||||
if not event_id:
|
||||
@@ -326,7 +438,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return False
|
||||
except Exception as exc:
|
||||
logger.error("Matrix: post-upload key verification failed: %s", exc)
|
||||
logger.error("Matrix: post-upload key verification failed: %s", exc, exc_info=True)
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -342,6 +454,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.error(
|
||||
"Matrix: cannot verify device keys on server: %s — refusing E2EE",
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -356,7 +469,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
await olm.share_keys()
|
||||
except Exception as exc:
|
||||
logger.error("Matrix: failed to re-upload device keys: %s", exc)
|
||||
logger.error("Matrix: failed to re-upload device keys: %s", exc, exc_info=True)
|
||||
return False
|
||||
return await self._reverify_keys_after_upload(client, local_ed25519)
|
||||
|
||||
@@ -396,6 +509,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"Try generating a new access token to get a fresh device.",
|
||||
client.device_id,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
return await self._reverify_keys_after_upload(client, local_ed25519)
|
||||
@@ -420,9 +534,11 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
_STORE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create the HTTP API layer.
|
||||
client_session = _create_matrix_session(self._proxy_url)
|
||||
api = HTTPAPI(
|
||||
base_url=self._homeserver,
|
||||
token=self._access_token or "",
|
||||
client_session=client_session,
|
||||
)
|
||||
|
||||
# Create the client.
|
||||
@@ -465,6 +581,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.error(
|
||||
"Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER: %s",
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
await api.session.close()
|
||||
return False
|
||||
@@ -607,6 +724,44 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.warning(
|
||||
"Matrix: recovery key verification failed: %s", exc
|
||||
)
|
||||
else:
|
||||
# No recovery key — bootstrap cross-signing if the bot
|
||||
# has none yet. Without this, Element shows "Encrypted
|
||||
# by a device not verified by its owner" on every
|
||||
# message from this bot, indefinitely. mautrix's
|
||||
# generate_recovery_key does the full flow: generates
|
||||
# MSK/SSK/USK, uploads private keys to SSSS, publishes
|
||||
# public keys to the homeserver, and signs the current
|
||||
# device with the new SSK. Some homeservers require UIA
|
||||
# for /keys/device_signing/upload — those will need an
|
||||
# alternate path; Continuwuity and Synapse-with-shared-
|
||||
# secret accept the unauthenticated upload.
|
||||
try:
|
||||
own_xsign = await olm.get_own_cross_signing_public_keys()
|
||||
except Exception as exc:
|
||||
own_xsign = None
|
||||
logger.warning(
|
||||
"Matrix: cross-signing key lookup failed: %s", exc
|
||||
)
|
||||
if own_xsign is None:
|
||||
try:
|
||||
new_recovery_key = await olm.generate_recovery_key()
|
||||
logger.warning(
|
||||
"Matrix: bootstrapped cross-signing for %s. "
|
||||
"SAVE THIS RECOVERY KEY — set "
|
||||
"MATRIX_RECOVERY_KEY for future restarts so "
|
||||
"the bot can re-sign its device after key "
|
||||
"rotation: %s",
|
||||
client.mxid,
|
||||
new_recovery_key,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Matrix: cross-signing bootstrap failed "
|
||||
"(non-fatal — Element will show 'not "
|
||||
"verified by its owner'): %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
client.crypto = olm
|
||||
logger.info(
|
||||
@@ -664,6 +819,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
await asyncio.gather(*tasks)
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: initial sync event dispatch error: %s", exc)
|
||||
await self._join_pending_invites(sync_data)
|
||||
else:
|
||||
logger.warning(
|
||||
"Matrix: initial sync returned unexpected type %s",
|
||||
@@ -727,17 +883,8 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)
|
||||
|
||||
last_event_id = None
|
||||
for chunk in chunks:
|
||||
msg_content: Dict[str, Any] = {
|
||||
"msgtype": "m.text",
|
||||
"body": chunk,
|
||||
}
|
||||
|
||||
# Convert markdown to HTML for rich rendering.
|
||||
html = self._markdown_to_html(chunk)
|
||||
if html and html != chunk:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
for i, chunk in enumerate(chunks):
|
||||
msg_content = self._build_text_message_content(chunk)
|
||||
|
||||
# Reply-to support.
|
||||
if reply_to:
|
||||
@@ -844,25 +991,21 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"""Edit an existing message (via m.replace)."""
|
||||
|
||||
formatted = self.format_message(content)
|
||||
new_content = self._build_text_message_content(formatted)
|
||||
msg_content: Dict[str, Any] = {
|
||||
"msgtype": "m.text",
|
||||
"body": f"* {formatted}",
|
||||
"m.new_content": {
|
||||
"msgtype": "m.text",
|
||||
"body": formatted,
|
||||
},
|
||||
"m.relates_to": {
|
||||
"rel_type": "m.replace",
|
||||
"event_id": message_id,
|
||||
},
|
||||
"m.new_content": new_content,
|
||||
}
|
||||
|
||||
html = self._markdown_to_html(formatted)
|
||||
if html and html != formatted:
|
||||
msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
|
||||
msg_content["m.new_content"]["formatted_body"] = html
|
||||
if "m.mentions" in new_content:
|
||||
msg_content["m.mentions"] = new_content["m.mentions"]
|
||||
if "formatted_body" in new_content:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = f"* {html}"
|
||||
msg_content["formatted_body"] = f'* {new_content["formatted_body"]}'
|
||||
msg_content["m.relates_to"] = {
|
||||
"rel_type": "m.replace",
|
||||
"event_id": message_id,
|
||||
}
|
||||
|
||||
try:
|
||||
event_id = await self._client.send_message_event(
|
||||
@@ -895,10 +1038,12 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
# Try aiohttp first (always available), fall back to httpx
|
||||
try:
|
||||
import aiohttp as _aiohttp
|
||||
|
||||
async with _aiohttp.ClientSession(trust_env=True) as http:
|
||||
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url)
|
||||
async with _aiohttp.ClientSession(**_sess_kw) as http:
|
||||
async with http.get(
|
||||
image_url, timeout=_aiohttp.ClientTimeout(total=30)
|
||||
image_url,
|
||||
timeout=_aiohttp.ClientTimeout(total=30),
|
||||
**_req_kw,
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
data = await resp.read()
|
||||
@@ -908,8 +1053,10 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
except ImportError:
|
||||
import httpx
|
||||
|
||||
async with httpx.AsyncClient() as http:
|
||||
_httpx_kw: dict = {}
|
||||
if self._proxy_url:
|
||||
_httpx_kw["proxy"] = self._proxy_url
|
||||
async with httpx.AsyncClient(**_httpx_kw) as http:
|
||||
resp = await http.get(image_url, follow_redirects=True, timeout=30)
|
||||
resp.raise_for_status()
|
||||
data = resp.content
|
||||
@@ -984,6 +1131,56 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
chat_id, video_path, "m.video", caption, reply_to, metadata=metadata
|
||||
)
|
||||
|
||||
async def send_exec_approval(
|
||||
self,
|
||||
chat_id: str,
|
||||
command: str,
|
||||
session_key: str,
|
||||
description: str = "dangerous command",
|
||||
metadata: Optional[dict] = None,
|
||||
) -> SendResult:
|
||||
"""Send a reaction-based exec approval prompt for Matrix."""
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
cmd_preview = command[:2000] + "..." if len(command) > 2000 else command
|
||||
text = (
|
||||
"⚠️ **Dangerous command requires approval**\n"
|
||||
f"```\n{cmd_preview}\n```\n"
|
||||
f"Reason: {description}\n\n"
|
||||
"Reply `/approve` to execute, `/approve session` to approve this pattern for the session, "
|
||||
"`/approve always` to approve permanently, or `/deny` to cancel.\n\n"
|
||||
"You can also click the reaction to approve:\n"
|
||||
"✅ = /approve\n"
|
||||
"❎ = /deny"
|
||||
)
|
||||
|
||||
result = await self.send(chat_id, text, metadata=metadata)
|
||||
if not result.success or not result.message_id:
|
||||
return result
|
||||
|
||||
prompt = _MatrixApprovalPrompt(
|
||||
session_key=session_key,
|
||||
chat_id=chat_id,
|
||||
message_id=result.message_id,
|
||||
)
|
||||
old_event = self._approval_prompt_by_session.get(session_key)
|
||||
if old_event:
|
||||
self._approval_prompts_by_event.pop(old_event, None)
|
||||
self._approval_prompts_by_event[result.message_id] = prompt
|
||||
self._approval_prompt_by_session[session_key] = result.message_id
|
||||
|
||||
for emoji in ("✅", "❎"):
|
||||
try:
|
||||
reaction_result = await self._send_reaction(chat_id, result.message_id, emoji)
|
||||
# Save the bot's reaction event_id for later cleanup
|
||||
if reaction_result:
|
||||
prompt.bot_reaction_events[emoji] = str(reaction_result)
|
||||
except Exception as exc:
|
||||
logger.debug("Matrix: failed to add approval reaction %s: %s", emoji, exc)
|
||||
|
||||
return result
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Pass-through — Matrix supports standard Markdown natively."""
|
||||
# Strip image markdown; media is uploaded separately.
|
||||
@@ -1115,9 +1312,15 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
next_batch = await client.sync_store.get_next_batch()
|
||||
while not self._closing:
|
||||
try:
|
||||
sync_data = await client.sync(
|
||||
since=next_batch,
|
||||
timeout=30000,
|
||||
# Wrap in asyncio.wait_for to guard against TCP-level hangs
|
||||
# that the Matrix long-poll timeout cannot catch. Long-poll
|
||||
# is 30s, so 45s gives 15s slack for network drain.
|
||||
sync_data = await asyncio.wait_for(
|
||||
client.sync(
|
||||
since=next_batch,
|
||||
timeout=30000,
|
||||
),
|
||||
timeout=45.0,
|
||||
)
|
||||
|
||||
# nio returns SyncError objects (not exceptions) for auth
|
||||
@@ -1153,6 +1356,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
await asyncio.gather(*tasks)
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: sync event dispatch error: %s", exc)
|
||||
await self._join_pending_invites(sync_data)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
@@ -1239,6 +1443,15 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
room_id = str(getattr(event, "room_id", ""))
|
||||
sender = str(getattr(event, "sender", ""))
|
||||
|
||||
# Diagnostic: confirm the callback is firing at all when DEBUG is on.
|
||||
# Helps users troubleshoot silent inbound issues like #5819, #7914, #12614.
|
||||
logger.debug(
|
||||
"Matrix: callback fired — event %s from %s in %s",
|
||||
getattr(event, "event_id", "?"),
|
||||
sender,
|
||||
room_id,
|
||||
)
|
||||
|
||||
# Ignore own messages (case-insensitive; also drops when our own
|
||||
# user_id hasn't been resolved yet — see _is_self_sender docstring
|
||||
# and issue #15763).
|
||||
@@ -1350,6 +1563,12 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
in_bot_thread = bool(thread_id and thread_id in self._threads)
|
||||
if self._require_mention and not is_free_room and not in_bot_thread:
|
||||
if not is_mentioned:
|
||||
logger.debug(
|
||||
"Matrix: ignoring message %s in %s — no @mention "
|
||||
"(set MATRIX_REQUIRE_MENTION=false to disable)",
|
||||
event_id,
|
||||
room_id,
|
||||
)
|
||||
return None
|
||||
|
||||
# DM mention-thread.
|
||||
@@ -1362,7 +1581,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
body = self._strip_mention(body)
|
||||
|
||||
# Auto-thread.
|
||||
if not is_dm and not thread_id and self._auto_thread:
|
||||
if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)):
|
||||
thread_id = event_id
|
||||
self._threads.mark(thread_id)
|
||||
|
||||
@@ -1604,6 +1823,9 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
return
|
||||
body, is_dm, chat_type, thread_id, display_name, source = ctx
|
||||
|
||||
if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
|
||||
body = ""
|
||||
|
||||
allow_http_fallback = bool(http_url) and not is_encrypted_media
|
||||
media_urls = (
|
||||
[cached_path]
|
||||
@@ -1633,13 +1855,35 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"Matrix: invited to %s — joining",
|
||||
room_id,
|
||||
)
|
||||
await self._join_room_by_id(room_id)
|
||||
|
||||
async def _join_room_by_id(self, room_id: str) -> bool:
|
||||
"""Join a room by ID and refresh local caches on success."""
|
||||
if not room_id:
|
||||
return False
|
||||
if room_id in self._joined_rooms:
|
||||
return True
|
||||
try:
|
||||
await self._client.join_room(RoomID(room_id))
|
||||
self._joined_rooms.add(room_id)
|
||||
logger.info("Matrix: joined %s", room_id)
|
||||
await self._refresh_dm_cache()
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: error joining %s: %s", room_id, exc)
|
||||
return False
|
||||
|
||||
async def _join_pending_invites(self, sync_data: Dict[str, Any]) -> None:
|
||||
"""Join rooms still present in rooms.invite after sync processing."""
|
||||
rooms = sync_data.get("rooms", {}) if isinstance(sync_data, dict) else {}
|
||||
invites = rooms.get("invite", {})
|
||||
if not isinstance(invites, dict):
|
||||
return
|
||||
for room_id in invites:
|
||||
if room_id in self._joined_rooms:
|
||||
continue
|
||||
logger.info("Matrix: reconciling pending invite for %s", room_id)
|
||||
await self._join_room_by_id(str(room_id))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Reactions (send, receive, processing lifecycle)
|
||||
@@ -1754,6 +1998,51 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
room_id,
|
||||
)
|
||||
|
||||
# Check if this reaction resolves a pending approval prompt.
|
||||
prompt = self._approval_prompts_by_event.get(reacts_to)
|
||||
if prompt and not prompt.resolved:
|
||||
if room_id != prompt.chat_id:
|
||||
return
|
||||
if self._allowed_user_ids and sender not in self._allowed_user_ids:
|
||||
logger.info(
|
||||
"Matrix: ignoring approval reaction from unauthorized user %s on %s",
|
||||
sender, reacts_to,
|
||||
)
|
||||
return
|
||||
choice = self._approval_reaction_map.get(key)
|
||||
if not choice:
|
||||
return
|
||||
try:
|
||||
from tools.approval import resolve_gateway_approval
|
||||
|
||||
count = resolve_gateway_approval(prompt.session_key, choice)
|
||||
if count:
|
||||
prompt.resolved = True
|
||||
self._approval_prompts_by_event.pop(reacts_to, None)
|
||||
self._approval_prompt_by_session.pop(prompt.session_key, None)
|
||||
logger.info(
|
||||
"Matrix reaction resolved %d approval(s) for session %s "
|
||||
"(choice=%s, user=%s)",
|
||||
count, prompt.session_key, choice, sender,
|
||||
)
|
||||
# Redact bot's seed reactions, leaving only the user's
|
||||
await self._redact_bot_approval_reactions(room_id, prompt)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc)
|
||||
|
||||
async def _redact_bot_approval_reactions(
|
||||
self,
|
||||
room_id: str,
|
||||
prompt: "_MatrixApprovalPrompt",
|
||||
) -> None:
|
||||
"""Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction."""
|
||||
for emoji, evt_id in prompt.bot_reaction_events.items():
|
||||
try:
|
||||
await self.redact_message(room_id, evt_id, "approval resolved")
|
||||
logger.debug("Matrix: redacted bot reaction %s (%s)", emoji, evt_id)
|
||||
except Exception as exc:
|
||||
logger.debug("Matrix: failed to redact bot reaction %s: %s", emoji, exc)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text message aggregation (handles Matrix client-side splits)
|
||||
# ------------------------------------------------------------------
|
||||
@@ -1979,11 +2268,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
if not self._client or not text:
|
||||
return SendResult(success=False, error="No client or empty text")
|
||||
|
||||
msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
|
||||
html = self._markdown_to_html(text)
|
||||
if html and html != text:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
msg_content = self._build_text_message_content(text, msgtype=msgtype)
|
||||
|
||||
try:
|
||||
event_id = await self._client.send_message_event(
|
||||
@@ -2046,6 +2331,77 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
# Mention detection helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_text_message_content(self, text: str, msgtype: str = "m.text") -> Dict[str, Any]:
|
||||
"""Build Matrix text content with HTML and outbound mention metadata."""
|
||||
msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
|
||||
mention_user_ids = self._extract_outbound_mentions(text)
|
||||
if mention_user_ids:
|
||||
msg_content["m.mentions"] = {"user_ids": mention_user_ids}
|
||||
|
||||
html_source = self._inject_outbound_mention_links(text)
|
||||
html = self._markdown_to_html(html_source)
|
||||
if html and html != text:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
|
||||
return msg_content
|
||||
|
||||
def _extract_outbound_mentions(self, text: str) -> list[str]:
|
||||
"""Return unique Matrix user IDs mentioned in outbound text."""
|
||||
protected, _ = self._protect_outbound_mention_regions(text)
|
||||
seen: Set[str] = set()
|
||||
mentions: list[str] = []
|
||||
for match in _OUTBOUND_MENTION_RE.finditer(protected):
|
||||
user_id = match.group(1)
|
||||
if user_id not in seen:
|
||||
seen.add(user_id)
|
||||
mentions.append(user_id)
|
||||
return mentions
|
||||
|
||||
def _inject_outbound_mention_links(self, text: str) -> str:
|
||||
"""Wrap outbound Matrix mentions in markdown links outside code spans."""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
protected, placeholders = self._protect_outbound_mention_regions(text)
|
||||
|
||||
linked = _OUTBOUND_MENTION_RE.sub(
|
||||
lambda match: f"[{match.group(1)}](https://matrix.to/#/{match.group(1)})",
|
||||
protected,
|
||||
)
|
||||
|
||||
for idx, original in enumerate(placeholders):
|
||||
linked = linked.replace(f"\x00MENTION_PROTECTED{idx}\x00", original)
|
||||
|
||||
return linked
|
||||
|
||||
def _protect_outbound_mention_regions(self, text: str) -> tuple[str, list[str]]:
|
||||
"""Protect markdown regions where outbound mentions should stay literal."""
|
||||
placeholders: list[str] = []
|
||||
|
||||
def _protect(fragment: str) -> str:
|
||||
idx = len(placeholders)
|
||||
placeholders.append(fragment)
|
||||
return f"\x00MENTION_PROTECTED{idx}\x00"
|
||||
|
||||
protected = re.sub(
|
||||
r"```[\s\S]*?```",
|
||||
lambda match: _protect(match.group(0)),
|
||||
text or "",
|
||||
)
|
||||
protected = re.sub(
|
||||
r"`[^`\n]+`",
|
||||
lambda match: _protect(match.group(0)),
|
||||
protected,
|
||||
)
|
||||
protected = re.sub(
|
||||
r"\[[^\]]+\]\([^)]+\)",
|
||||
lambda match: _protect(match.group(0)),
|
||||
protected,
|
||||
)
|
||||
|
||||
return protected, placeholders
|
||||
|
||||
def _is_bot_mentioned(
|
||||
self,
|
||||
body: str,
|
||||
@@ -2080,13 +2436,33 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
def _strip_mention(self, body: str) -> str:
|
||||
"""Strip the bot's full MXID (``@user:server``) from *body*.
|
||||
"""Remove explicit bot mentions from message body.
|
||||
|
||||
The bare localpart is intentionally *not* stripped — it would
|
||||
mangle file paths like ``/home/hermes/media/file.png``.
|
||||
Important: only strip explicit mention tokens (``@user:server`` or
|
||||
``@localpart``). Do NOT strip bare words matching the bot localpart,
|
||||
otherwise normal phrases like "Hermes Agent" become "Agent".
|
||||
"""
|
||||
if not body:
|
||||
return ""
|
||||
|
||||
# Strip explicit full MXID mentions.
|
||||
if self._user_id:
|
||||
body = body.replace(self._user_id, "")
|
||||
|
||||
# Strip explicit @localpart mentions only (not bare localpart words).
|
||||
if self._user_id and ":" in self._user_id:
|
||||
localpart = self._user_id.split(":")[0].lstrip("@")
|
||||
if localpart:
|
||||
body = re.sub(
|
||||
r'(?<![\w])@' + re.escape(localpart) + r'\b',
|
||||
'',
|
||||
body,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Normalize spacing after mention removal.
|
||||
body = re.sub(r'[ \t]{2,}', ' ', body)
|
||||
body = re.sub(r'\s+([,.;:!?])', r'\1', body)
|
||||
return body.strip()
|
||||
|
||||
async def _get_display_name(self, room_id: str, user_id: str) -> str:
|
||||
|
||||
@@ -19,7 +19,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.helpers import MessageDeduplicator
|
||||
@@ -412,7 +412,6 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
|
||||
import aiohttp
|
||||
|
||||
last_exc = None
|
||||
file_data = None
|
||||
ct = "application/octet-stream"
|
||||
fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
|
||||
@@ -497,6 +496,100 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
return SendResult(success=False, error="Failed to post with file")
|
||||
return SendResult(success=True, message_id=data["id"])
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images as a single Mattermost post with multiple attachments.
|
||||
|
||||
Mattermost supports up to 5 ``file_ids`` per post. Each image is
|
||||
uploaded individually (Mattermost's file API is one-at-a-time),
|
||||
then a single post is created referencing all uploaded file_ids
|
||||
at once. Batches larger than 5 are chunked. Falls back to the
|
||||
base per-image loop on total failure.
|
||||
"""
|
||||
if not images:
|
||||
return
|
||||
|
||||
import mimetypes
|
||||
import aiohttp
|
||||
from urllib.parse import unquote as _unquote
|
||||
|
||||
CHUNK = 5 # Mattermost post file_ids cap
|
||||
chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
|
||||
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
if human_delay > 0 and chunk_idx > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
|
||||
file_ids: List[str] = []
|
||||
caption_parts: List[str] = []
|
||||
try:
|
||||
for image_url, alt_text in chunk:
|
||||
if alt_text:
|
||||
caption_parts.append(alt_text)
|
||||
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
p = Path(local_path)
|
||||
if not p.exists():
|
||||
logger.warning("Mattermost: skipping missing image %s", local_path)
|
||||
continue
|
||||
fname = p.name
|
||||
ct = mimetypes.guess_type(fname)[0] or "image/png"
|
||||
file_data = p.read_bytes()
|
||||
else:
|
||||
from tools.url_safety import is_safe_url
|
||||
if not is_safe_url(image_url):
|
||||
logger.warning("Mattermost: blocked unsafe image URL in batch")
|
||||
continue
|
||||
try:
|
||||
async with self._session.get(
|
||||
image_url, timeout=aiohttp.ClientTimeout(total=30)
|
||||
) as resp:
|
||||
if resp.status >= 400:
|
||||
logger.warning(
|
||||
"Mattermost: failed to download image (HTTP %d): %s",
|
||||
resp.status, image_url[:80],
|
||||
)
|
||||
continue
|
||||
file_data = await resp.read()
|
||||
ct = resp.content_type or "image/png"
|
||||
except Exception as dl_err:
|
||||
logger.warning("Mattermost: download failed for %s: %s", image_url[:80], dl_err)
|
||||
continue
|
||||
fname = image_url.rsplit("/", 1)[-1].split("?")[0] or f"image_{len(file_ids)}.png"
|
||||
|
||||
fid = await self._upload_file(chat_id, file_data, fname, ct)
|
||||
if fid:
|
||||
file_ids.append(fid)
|
||||
|
||||
if not file_ids:
|
||||
continue
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"channel_id": chat_id,
|
||||
"message": "\n".join(caption_parts),
|
||||
"file_ids": file_ids,
|
||||
}
|
||||
logger.info(
|
||||
"Mattermost: sending %d image(s) as single post (chunk %d/%d)",
|
||||
len(file_ids), chunk_idx + 1, len(chunks),
|
||||
)
|
||||
data = await self._api_post("posts", payload)
|
||||
if not data or "id" not in data:
|
||||
logger.warning("Mattermost: multi-image post failed, falling back")
|
||||
await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Mattermost: multi-image send failed (chunk %d/%d), falling back: %s",
|
||||
chunk_idx + 1, len(chunks), e, exc_info=True,
|
||||
)
|
||||
await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# WebSocket
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -976,6 +976,18 @@ class QQAdapter(BasePlatformAdapter):
|
||||
if not channel_id:
|
||||
return
|
||||
|
||||
# Apply group_policy ACL — guild channels are group-like contexts.
|
||||
# Without this check any member of any guild the bot is in could
|
||||
# bypass the configured allowlist.
|
||||
guild_id = str(d.get("guild_id", ""))
|
||||
author_id = str(author.get("id", ""))
|
||||
if not self._is_group_allowed(guild_id or channel_id, author_id):
|
||||
logger.debug(
|
||||
"[%s] Guild message blocked by ACL: channel=%s user=%s",
|
||||
self._log_tag, channel_id, author_id,
|
||||
)
|
||||
return
|
||||
|
||||
member = d.get("member") if isinstance(d.get("member"), dict) else {}
|
||||
nick = str(member.get("nick", "")) or str(author.get("username", ""))
|
||||
|
||||
@@ -1032,6 +1044,17 @@ class QQAdapter(BasePlatformAdapter):
|
||||
if not guild_id:
|
||||
return
|
||||
|
||||
# Apply dm_policy ACL — guild DMs were previously unauthenticated.
|
||||
# Without this check any member of any guild the bot is in could
|
||||
# bypass the configured allowlist via direct messages.
|
||||
author_id = str(author.get("id", ""))
|
||||
if not self._is_dm_allowed(author_id):
|
||||
logger.debug(
|
||||
"[%s] Guild DM blocked by ACL: guild=%s user=%s",
|
||||
self._log_tag, guild_id, author_id,
|
||||
)
|
||||
return
|
||||
|
||||
text = content
|
||||
att_result = await self._process_attachments(d.get("attachments"))
|
||||
image_urls = att_result["image_urls"]
|
||||
@@ -1957,7 +1980,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self, openid: str, content: str, reply_to: Optional[str] = None
|
||||
) -> SendResult:
|
||||
"""Send text to a C2C user via REST API."""
|
||||
msg_seq = self._next_msg_seq(reply_to or openid)
|
||||
self._next_msg_seq(reply_to or openid)
|
||||
body = self._build_text_body(content, reply_to)
|
||||
if reply_to:
|
||||
body["msg_id"] = reply_to
|
||||
@@ -1970,7 +1993,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self, group_openid: str, content: str, reply_to: Optional[str] = None
|
||||
) -> SendResult:
|
||||
"""Send text to a group via REST API."""
|
||||
msg_seq = self._next_msg_seq(reply_to or group_openid)
|
||||
self._next_msg_seq(reply_to or group_openid)
|
||||
body = self._build_text_body(content, reply_to)
|
||||
if reply_to:
|
||||
body["msg_id"] = reply_to
|
||||
@@ -2135,11 +2158,6 @@ class QQAdapter(BasePlatformAdapter):
|
||||
|
||||
# Route
|
||||
chat_type = self._guess_chat_type(chat_id)
|
||||
target_path = (
|
||||
f"/v2/users/{chat_id}/files"
|
||||
if chat_type == "c2c"
|
||||
else f"/v2/groups/{chat_id}/files"
|
||||
)
|
||||
|
||||
if chat_type == "guild":
|
||||
# Guild channels don't support native media upload in the same way
|
||||
|
||||
+490
-12
@@ -21,7 +21,7 @@ import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import quote, unquote
|
||||
|
||||
import httpx
|
||||
@@ -31,6 +31,7 @@ from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
cache_image_from_bytes,
|
||||
cache_audio_from_bytes,
|
||||
@@ -38,6 +39,17 @@ from gateway.platforms.base import (
|
||||
cache_image_from_url,
|
||||
)
|
||||
from gateway.platforms.helpers import redact_phone
|
||||
from gateway.platforms.signal_rate_limit import (
|
||||
SIGNAL_BATCH_PACING_NOTICE_THRESHOLD,
|
||||
SIGNAL_MAX_ATTACHMENTS_PER_MSG,
|
||||
SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
|
||||
SignalRateLimitError,
|
||||
_extract_retry_after_seconds,
|
||||
_format_wait,
|
||||
_is_signal_rate_limit_error,
|
||||
_signal_send_timeout,
|
||||
get_scheduler,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -52,6 +64,7 @@ SSE_RETRY_DELAY_MAX = 60.0
|
||||
HEALTH_CHECK_INTERVAL = 30.0 # seconds between health checks
|
||||
HEALTH_CHECK_STALE_THRESHOLD = 120.0 # seconds without SSE activity before concern
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -162,6 +175,10 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
"""Signal messenger adapter using signal-cli HTTP daemon."""
|
||||
|
||||
platform = Platform.SIGNAL
|
||||
# Signal has no real edit API for already-sent messages. Mark it explicitly
|
||||
# so streaming suppresses the visible cursor instead of leaving a stale tofu
|
||||
# square behind in chat clients when edit attempts fail.
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.SIGNAL)
|
||||
@@ -488,6 +505,11 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
if text and mentions:
|
||||
text = _render_mentions(text, mentions)
|
||||
|
||||
# Extract quote (reply-to) context from Signal dataMessage
|
||||
quote_data = data_message.get("quote") or {}
|
||||
reply_to_id = str(quote_data.get("id")) if quote_data.get("id") else None
|
||||
reply_to_text = quote_data.get("text")
|
||||
|
||||
# Process attachments
|
||||
attachments_data = data_message.get("attachments", [])
|
||||
media_urls = []
|
||||
@@ -541,7 +563,9 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
else:
|
||||
timestamp = datetime.now(tz=timezone.utc)
|
||||
|
||||
# Build and dispatch event
|
||||
# Build and dispatch event.
|
||||
# Store raw envelope data in raw_message so on_processing_start/complete
|
||||
# can extract targetAuthor + targetTimestamp for sendReaction.
|
||||
event = MessageEvent(
|
||||
source=source,
|
||||
text=text or "",
|
||||
@@ -549,6 +573,9 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
media_urls=media_urls,
|
||||
media_types=media_types,
|
||||
timestamp=timestamp,
|
||||
raw_message={"sender": sender, "timestamp_ms": ts_ms},
|
||||
reply_to_message_id=reply_to_id,
|
||||
reply_to_text=reply_to_text,
|
||||
)
|
||||
|
||||
logger.debug("Signal: message from %s in %s: %s",
|
||||
@@ -659,6 +686,8 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
rpc_id: str = None,
|
||||
*,
|
||||
log_failures: bool = True,
|
||||
raise_on_rate_limit: bool = False,
|
||||
timeout: float = 30.0,
|
||||
) -> Any:
|
||||
"""Send a JSON-RPC 2.0 request to signal-cli daemon.
|
||||
|
||||
@@ -667,6 +696,11 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
repeated NETWORK_FAILURE spam for unreachable recipients while
|
||||
still preserving visibility for the first occurrence and for
|
||||
unrelated RPCs.
|
||||
|
||||
When ``raise_on_rate_limit=True``, a Signal ``[429]`` /
|
||||
``RateLimitException`` response raises ``SignalRateLimitError``
|
||||
instead of being swallowed — lets callers (multi-attachment send)
|
||||
opt into backoff-retry without changing default behaviour.
|
||||
"""
|
||||
if not self.client:
|
||||
logger.warning("Signal: RPC called but client not connected")
|
||||
@@ -686,20 +720,28 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
resp = await self.client.post(
|
||||
f"{self.http_url}/api/v1/rpc",
|
||||
json=payload,
|
||||
timeout=30.0,
|
||||
timeout=timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if "error" in data:
|
||||
err = data["error"]
|
||||
if raise_on_rate_limit:
|
||||
if _is_signal_rate_limit_error(err):
|
||||
err_msg = str(err.get("message", "")) if isinstance(err, dict) else str(err)
|
||||
retry_after = _extract_retry_after_seconds(err)
|
||||
raise SignalRateLimitError(err_msg, retry_after=retry_after)
|
||||
if log_failures:
|
||||
logger.warning("Signal RPC error (%s): %s", method, data["error"])
|
||||
logger.warning("Signal RPC error (%s): %s", method, err)
|
||||
else:
|
||||
logger.debug("Signal RPC error (%s): %s", method, data["error"])
|
||||
logger.debug("Signal RPC error (%s): %s", method, err)
|
||||
return None
|
||||
|
||||
return data.get("result")
|
||||
|
||||
except SignalRateLimitError:
|
||||
raise
|
||||
except Exception as e:
|
||||
if log_failures:
|
||||
logger.warning("Signal RPC %s failed: %s", method, e)
|
||||
@@ -707,6 +749,159 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
logger.debug("Signal RPC %s failed: %s", method, e)
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Formatting — markdown → Signal body ranges
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _markdown_to_signal(text: str) -> tuple:
|
||||
"""Convert markdown to plain text + Signal textStyles list.
|
||||
|
||||
Signal doesn't render markdown. Instead it uses ``bodyRanges``
|
||||
(exposed by signal-cli as ``textStyle`` / ``textStyles`` params)
|
||||
with the format ``start:length:STYLE``.
|
||||
|
||||
Positions are measured in **UTF-16 code units** (not Python code
|
||||
points) because that's what the Signal protocol uses.
|
||||
|
||||
Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
|
||||
(Signal's SPOILER style is not currently mapped — no standard
|
||||
markdown syntax for it; would need ``||spoiler||`` parsing.)
|
||||
|
||||
Returns ``(plain_text, styles_list)`` where *styles_list* may be
|
||||
empty if there's nothing to format.
|
||||
"""
|
||||
import re
|
||||
|
||||
def _utf16_len(s: str) -> int:
|
||||
"""Length of *s* in UTF-16 code units."""
|
||||
return len(s.encode("utf-16-le")) // 2
|
||||
|
||||
# Pre-process: normalize whitespace before any position tracking
|
||||
# so later operations don't invalidate recorded offsets.
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
text = text.strip()
|
||||
|
||||
styles: list = []
|
||||
|
||||
# --- Phase 1: fenced code blocks ```...``` → MONOSPACE ---
|
||||
_CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
|
||||
while m := _CB.search(text):
|
||||
inner = m.group(1).rstrip("\n")
|
||||
start = m.start()
|
||||
text = text[: m.start()] + inner + text[m.end() :]
|
||||
styles.append((start, len(inner), "MONOSPACE"))
|
||||
|
||||
# --- Phase 2: heading markers # Foo → Foo (BOLD) ---
|
||||
_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
|
||||
new_text = ""
|
||||
last_end = 0
|
||||
for m in _HEADING.finditer(text):
|
||||
new_text += text[last_end : m.start()]
|
||||
last_end = m.end()
|
||||
eol = text.find("\n", m.end())
|
||||
if eol == -1:
|
||||
eol = len(text)
|
||||
heading_text = text[m.end() : eol]
|
||||
start = len(new_text)
|
||||
new_text += heading_text
|
||||
styles.append((start, len(heading_text), "BOLD"))
|
||||
last_end = eol
|
||||
new_text += text[last_end:]
|
||||
text = new_text
|
||||
|
||||
# --- Phase 3: inline patterns (single-pass to avoid offset drift) ---
|
||||
# The old code processed each pattern sequentially, stripping markers
|
||||
# and recording positions per-pass. Later passes shifted text without
|
||||
# adjusting earlier positions → bold/italic landed mid-word.
|
||||
#
|
||||
# Fix: collect ALL non-overlapping matches first, then strip every
|
||||
# marker in one pass so positions are computed against the final text.
|
||||
_PATTERNS = [
|
||||
(re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
|
||||
(re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
|
||||
(re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
|
||||
(re.compile(r"`(.+?)`"), "MONOSPACE"),
|
||||
(re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
|
||||
(re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
|
||||
]
|
||||
|
||||
# Collect all non-overlapping matches (earlier patterns win ties).
|
||||
all_matches: list = [] # (start, end, g1_start, g1_end, style)
|
||||
occupied: list = [] # (start, end) intervals already claimed
|
||||
for pat, style in _PATTERNS:
|
||||
for m in pat.finditer(text):
|
||||
ms, me = m.start(), m.end()
|
||||
if not any(ms < oe and me > os for os, oe in occupied):
|
||||
all_matches.append((ms, me, m.start(1), m.end(1), style))
|
||||
occupied.append((ms, me))
|
||||
all_matches.sort()
|
||||
|
||||
# Build removal list so we can adjust Phase 1/2 styles.
|
||||
# Each match removes its prefix markers (start..g1_start) and
|
||||
# suffix markers (g1_end..end).
|
||||
removals: list = [] # (position, length) sorted
|
||||
for ms, me, g1s, g1e, _ in all_matches:
|
||||
if g1s > ms:
|
||||
removals.append((ms, g1s - ms))
|
||||
if me > g1e:
|
||||
removals.append((g1e, me - g1e))
|
||||
removals.sort()
|
||||
|
||||
# Adjust Phase 1/2 styles for characters about to be removed.
|
||||
def _adj(pos: int) -> int:
|
||||
shift = 0
|
||||
for rp, rl in removals:
|
||||
if rp < pos:
|
||||
shift += min(rl, pos - rp)
|
||||
else:
|
||||
break
|
||||
return pos - shift
|
||||
|
||||
adjusted_prior: list = []
|
||||
for s, l, st in styles:
|
||||
ns = _adj(s)
|
||||
ne = _adj(s + l)
|
||||
if ne > ns:
|
||||
adjusted_prior.append((ns, ne - ns, st))
|
||||
|
||||
# Strip all inline markers in one pass → positions are correct.
|
||||
result = ""
|
||||
last_end = 0
|
||||
inline_styles: list = []
|
||||
for ms, me, g1s, g1e, sty in all_matches:
|
||||
result += text[last_end:ms]
|
||||
pos = len(result)
|
||||
inner = text[g1s:g1e]
|
||||
result += inner
|
||||
inline_styles.append((pos, len(inner), sty))
|
||||
last_end = me
|
||||
result += text[last_end:]
|
||||
text = result
|
||||
|
||||
styles = adjusted_prior + inline_styles
|
||||
|
||||
# Convert code-point offsets → UTF-16 code-unit offsets
|
||||
style_strings = []
|
||||
for cp_start, cp_len, stype in sorted(styles):
|
||||
# Safety: skip any out-of-bounds styles
|
||||
if cp_start < 0 or cp_start + cp_len > len(text):
|
||||
continue
|
||||
u16_start = _utf16_len(text[:cp_start])
|
||||
u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
|
||||
style_strings.append(f"{u16_start}:{u16_len}:{stype}")
|
||||
|
||||
return text, style_strings
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Strip markdown for plain-text fallback (used by base class).
|
||||
|
||||
The actual rich formatting happens in send() via _markdown_to_signal().
|
||||
"""
|
||||
# This is only called if someone uses the base-class send path.
|
||||
# Our send() override bypasses this entirely.
|
||||
return content
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Sending
|
||||
# ------------------------------------------------------------------
|
||||
@@ -718,14 +913,22 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a text message."""
|
||||
"""Send a text message with native Signal formatting."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
plain_text, text_styles = self._markdown_to_signal(content)
|
||||
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"message": content,
|
||||
"message": plain_text,
|
||||
}
|
||||
|
||||
if text_styles:
|
||||
if len(text_styles) == 1:
|
||||
params["textStyle"] = text_styles[0]
|
||||
else:
|
||||
params["textStyles"] = text_styles
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
@@ -735,11 +938,10 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
|
||||
if result is not None:
|
||||
self._track_sent_timestamp(result)
|
||||
# Use the timestamp from the RPC result as a pseudo message_id.
|
||||
# Signal doesn't have real message IDs, but the stream consumer
|
||||
# needs a truthy value to follow its edit→fallback path correctly.
|
||||
_msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
|
||||
return SendResult(success=True, message_id=_msg_id or None)
|
||||
# Signal has no editable message identifier. Returning None keeps the
|
||||
# stream consumer on the non-edit fallback path instead of pretending
|
||||
# future edits can remove an in-progress cursor from the chat thread.
|
||||
return SendResult(success=True, message_id=None)
|
||||
return SendResult(success=False, error="RPC send failed")
|
||||
|
||||
def _track_sent_timestamp(self, rpc_result) -> None:
|
||||
@@ -803,6 +1005,178 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
self._typing_failures.pop(chat_id, None)
|
||||
self._typing_skip_until.pop(chat_id, None)
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images via chunked Signal RPC calls.
|
||||
|
||||
Per-image alt texts are dropped — Signal's send RPC only carries
|
||||
one shared message body. Bad images (download failure, missing
|
||||
file, oversize) are skipped with a warning so one bad URL
|
||||
doesn't lose the rest of the batch. ``human_delay`` is ignored:
|
||||
the rate-limit scheduler handles inter-batch pacing.
|
||||
"""
|
||||
if not images:
|
||||
return
|
||||
|
||||
scheduler = get_scheduler()
|
||||
logger.info(
|
||||
"Signal send_multiple_images: received %d image(s) for %s — "
|
||||
"scheduler state: %s",
|
||||
len(images), chat_id[:30], scheduler.state(),
|
||||
)
|
||||
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
attachments: List[str] = []
|
||||
skipped_download = 0
|
||||
skipped_missing = 0
|
||||
skipped_oversize = 0
|
||||
for image_url, _alt_text in images:
|
||||
if image_url.startswith("file://"):
|
||||
file_path = unquote(image_url[7:])
|
||||
else:
|
||||
try:
|
||||
file_path = await cache_image_from_url(image_url)
|
||||
except Exception as e:
|
||||
logger.warning("Signal: failed to download image %s: %s", image_url, e)
|
||||
skipped_download += 1
|
||||
continue
|
||||
|
||||
if not file_path or not Path(file_path).exists():
|
||||
logger.warning("Signal: image file not found for %s", image_url)
|
||||
skipped_missing += 1
|
||||
continue
|
||||
|
||||
file_size = Path(file_path).stat().st_size
|
||||
if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
|
||||
logger.warning(
|
||||
"Signal: image too large (%d bytes), skipping %s", file_size, image_url
|
||||
)
|
||||
skipped_oversize += 1
|
||||
continue
|
||||
|
||||
attachments.append(file_path)
|
||||
|
||||
if not attachments:
|
||||
logger.error(
|
||||
"Signal: no valid images in batch of %d "
|
||||
"(download=%d missing=%d oversize=%d)",
|
||||
len(images), skipped_download, skipped_missing, skipped_oversize,
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Signal send_multiple_images: %d/%d images valid, sending in chunks",
|
||||
len(attachments), len(images),
|
||||
)
|
||||
|
||||
base_params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"message": "",
|
||||
}
|
||||
if chat_id.startswith("group:"):
|
||||
base_params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
base_params["recipient"] = [await self._resolve_recipient(chat_id)]
|
||||
|
||||
att_batches = [
|
||||
attachments[i:i + SIGNAL_MAX_ATTACHMENTS_PER_MSG]
|
||||
for i in range(0, len(attachments), SIGNAL_MAX_ATTACHMENTS_PER_MSG)
|
||||
]
|
||||
|
||||
for idx, att_batch in enumerate(att_batches):
|
||||
n = len(att_batch)
|
||||
estimated = scheduler.estimate_wait(n)
|
||||
logger.debug(
|
||||
"Signal batch %d/%d: %d attachments, estimated wait=%.1fs",
|
||||
idx + 1, len(att_batches), n, estimated,
|
||||
)
|
||||
if estimated >= SIGNAL_BATCH_PACING_NOTICE_THRESHOLD:
|
||||
await self._notify_batch_pacing(
|
||||
chat_id, idx + 1, len(att_batches), estimated
|
||||
)
|
||||
|
||||
params = dict(base_params, attachments=att_batch)
|
||||
send_timeout = _signal_send_timeout(n)
|
||||
|
||||
for attempt in range(1, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS + 1):
|
||||
await scheduler.acquire(n)
|
||||
try:
|
||||
_rpc_t0 = time.monotonic()
|
||||
result = await self._rpc(
|
||||
"send", params, raise_on_rate_limit=True, timeout=send_timeout,
|
||||
)
|
||||
_rpc_duration = time.monotonic() - _rpc_t0
|
||||
if result is not None:
|
||||
self._track_sent_timestamp(result)
|
||||
await scheduler.report_rpc_duration(_rpc_duration, n)
|
||||
logger.info(
|
||||
"Signal batch %d/%d: %d attachments sent in %.1fs "
|
||||
"(attempt %d/%d)",
|
||||
idx + 1, len(att_batches), n, _rpc_duration,
|
||||
attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
|
||||
)
|
||||
else:
|
||||
# Assume the server didn't accept the batch, don't deduce tokens
|
||||
logger.error(
|
||||
"Signal: RPC send failed for batch %d/%d (%d attachments, "
|
||||
"attempt %d/%d, rpc_duration=%.1fs)",
|
||||
idx + 1, len(att_batches), n,
|
||||
attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
|
||||
_rpc_duration,
|
||||
)
|
||||
# Retry transient (non-rate-limit) failures once
|
||||
if attempt < SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
|
||||
backoff = 2.0 ** attempt
|
||||
logger.info(
|
||||
"Signal: retrying batch %d/%d after %.1fs backoff",
|
||||
idx + 1, len(att_batches), backoff,
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
continue
|
||||
break
|
||||
except SignalRateLimitError as e:
|
||||
scheduler.feedback(e.retry_after, n)
|
||||
if attempt >= SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
|
||||
logger.error(
|
||||
"Signal: rate-limit retries exhausted on batch %d/%d "
|
||||
"(%d attachments lost, server retry_after=%s)",
|
||||
idx + 1, len(att_batches), n,
|
||||
f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
|
||||
)
|
||||
break
|
||||
logger.warning(
|
||||
"Signal: rate-limited on batch %d/%d "
|
||||
"(attempt %d/%d, server retry_after=%s); "
|
||||
"scheduler will pace the retry",
|
||||
idx + 1, len(att_batches),
|
||||
attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
|
||||
f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
|
||||
)
|
||||
|
||||
async def _notify_batch_pacing(
|
||||
self,
|
||||
chat_id: str,
|
||||
next_batch_idx: int,
|
||||
total_batches: int,
|
||||
wait_s: float,
|
||||
) -> None:
|
||||
"""Inform the user when an inter-batch pacing wait crosses the
|
||||
notice threshold. Best-effort; logs and continues on failure."""
|
||||
try:
|
||||
await self.send(
|
||||
chat_id,
|
||||
f"(More images coming — pausing ~{_format_wait(wait_s)} "
|
||||
f"for Signal rate limit, batch {next_batch_idx}/{total_batches}.)",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Signal: failed to send pacing notice: %s", e)
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -963,6 +1337,110 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
_keep_typing finally block to clean up platform-level typing tasks."""
|
||||
await self._stop_typing_indicator(chat_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Reactions
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def send_reaction(
|
||||
self,
|
||||
chat_id: str,
|
||||
emoji: str,
|
||||
target_author: str,
|
||||
target_timestamp: int,
|
||||
) -> bool:
|
||||
"""Send a reaction emoji to a specific message via signal-cli RPC.
|
||||
|
||||
Args:
|
||||
chat_id: The chat (phone number or "group:<id>")
|
||||
emoji: Reaction emoji string (e.g. "👀", "✅")
|
||||
target_author: Phone number / UUID of the message author
|
||||
target_timestamp: Signal timestamp (ms) of the message to react to
|
||||
"""
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"emoji": emoji,
|
||||
"targetAuthor": target_author,
|
||||
"targetTimestamp": target_timestamp,
|
||||
}
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("sendReaction", params)
|
||||
if result is not None:
|
||||
return True
|
||||
logger.debug("Signal: sendReaction failed (chat=%s, emoji=%s)", chat_id[:20], emoji)
|
||||
return False
|
||||
|
||||
async def remove_reaction(
|
||||
self,
|
||||
chat_id: str,
|
||||
target_author: str,
|
||||
target_timestamp: int,
|
||||
) -> bool:
|
||||
"""Remove a reaction by sending an empty-string emoji."""
|
||||
params: Dict[str, Any] = {
|
||||
"account": self.account,
|
||||
"emoji": "",
|
||||
"targetAuthor": target_author,
|
||||
"targetTimestamp": target_timestamp,
|
||||
"remove": True,
|
||||
}
|
||||
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
result = await self._rpc("sendReaction", params)
|
||||
return result is not None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Processing Lifecycle Hooks (reactions as progress indicators)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_reaction_target(self, event: MessageEvent) -> Optional[tuple]:
|
||||
"""Extract (target_author, target_timestamp) from a MessageEvent.
|
||||
|
||||
Returns None if the event doesn't carry the raw Signal envelope data
|
||||
needed for sendReaction.
|
||||
"""
|
||||
raw = event.raw_message
|
||||
if not isinstance(raw, dict):
|
||||
return None
|
||||
author = raw.get("sender")
|
||||
ts = raw.get("timestamp_ms")
|
||||
if not author or not ts:
|
||||
return None
|
||||
return (author, ts)
|
||||
|
||||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""React with 👀 when processing begins."""
|
||||
target = self._extract_reaction_target(event)
|
||||
if target:
|
||||
await self.send_reaction(event.source.chat_id, "👀", *target)
|
||||
|
||||
async def on_processing_complete(self, event: MessageEvent, outcome: "ProcessingOutcome") -> None:
|
||||
"""Swap the 👀 reaction for ✅ (success) or ❌ (failure).
|
||||
|
||||
On CANCELLED we leave the 👀 in place — no terminal outcome means
|
||||
the reaction should keep reflecting "in progress" (matches Telegram).
|
||||
"""
|
||||
if outcome == ProcessingOutcome.CANCELLED:
|
||||
return
|
||||
target = self._extract_reaction_target(event)
|
||||
if not target:
|
||||
return
|
||||
chat_id = event.source.chat_id
|
||||
# Remove the in-progress reaction, then add the final one
|
||||
await self.remove_reaction(chat_id, *target)
|
||||
if outcome == ProcessingOutcome.SUCCESS:
|
||||
await self.send_reaction(chat_id, "✅", *target)
|
||||
elif outcome == ProcessingOutcome.FAILURE:
|
||||
await self.send_reaction(chat_id, "❌", *target)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Chat Info
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,369 @@
|
||||
"""
|
||||
Signal attachment rate-limit scheduler.
|
||||
|
||||
Process-wide token-bucket simulator that mirrors the per-account
|
||||
attachment rate limit signal-cli/Signal-Server enforce. Producers
|
||||
(``SignalAdapter.send_multiple_images`` and the ``send_message`` tool's
|
||||
Signal path) call ``acquire(n)`` before an attachment send; on a 429
|
||||
they call ``feedback(retry_after, n)`` so the model recalibrates from
|
||||
the server's authoritative hint.
|
||||
|
||||
The scheduler serializes concurrent calls through an ``asyncio.Lock``,
|
||||
giving FIFO fairness across agent sessions sharing one signal-cli
|
||||
daemon.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SIGNAL_MAX_ATTACHMENTS_PER_MSG = 32 # per-message attachment cap (source: Signal-{Android,Desktop} source code)
|
||||
SIGNAL_RATE_LIMIT_BUCKET_CAPACITY = 50 # server-side token-bucket capacity for attachments rate limiting
|
||||
SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER = 4 # fallback token refill interval for signal-cli < v0.14.3
|
||||
SIGNAL_RATE_LIMIT_MAX_ATTEMPTS = 2 # initial attempt + 1 retry
|
||||
SIGNAL_BATCH_PACING_NOTICE_THRESHOLD = 10.0 # if estimated waiting time > 10s, notify the user about the delay
|
||||
SIGNAL_RPC_ERROR_RATELIMIT = -5 # signal-cli (v0.14.3+) JSON-RPC error code for RateLimitException
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SignalRateLimitError(Exception):
|
||||
"""
|
||||
Raised by ``SignalAdapter._rpc`` for rate-limit responses when the
|
||||
caller has opted in via ``raise_on_rate_limit=True``.
|
||||
|
||||
Carries the server-supplied per-token Retry-After (in seconds) on
|
||||
signal-cli ≥ v0.14.3
|
||||
``retry_after`` is None when the version doesn't expose it.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
|
||||
super().__init__(message)
|
||||
self.retry_after = retry_after
|
||||
|
||||
|
||||
class SignalSchedulerError(Exception):
|
||||
pass
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Detection helpers — used to fish a 429 out of signal-cli's various error
|
||||
# shapes (typed code, [429] substring, libsignal-net RetryLaterException
|
||||
# leaked through AttachmentInvalidException).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# "Retry after 4 seconds" / "retry after 4 second" — libsignal-net's
|
||||
# RetryLaterException string form, surfaced when 429s hit during
|
||||
# attachment upload (signal-cli wraps these as AttachmentInvalidException
|
||||
# rather than RateLimitException, so the typed path doesn't fire).
|
||||
_RETRY_AFTER_RE = re.compile(r"Retry after (\d+(?:\.\d+)?)\s*second", re.IGNORECASE)
|
||||
|
||||
|
||||
def _extract_retry_after_seconds(err: Any) -> Optional[float]:
|
||||
"""Pull the per-token Retry-After window from a signal-cli rate-limit error.
|
||||
|
||||
Tries two sources, in order:
|
||||
1. ``error.data.response.results[*].retryAfterSeconds`` — the
|
||||
structured field signal-cli ≥ v0.14.3 surfaces for plain
|
||||
RateLimitException.
|
||||
2. ``"Retry after N seconds"`` parsed out of the message — covers
|
||||
libsignal-net's RetryLaterException that gets wrapped as
|
||||
AttachmentInvalidException during attachment upload, where the
|
||||
structured field stays null.
|
||||
|
||||
Returns None when neither yields a value.
|
||||
"""
|
||||
msg = ""
|
||||
if isinstance(err, dict):
|
||||
data = err.get("data") or {}
|
||||
response = data.get("response") or {}
|
||||
results = response.get("results") or []
|
||||
candidates = [
|
||||
r.get("retryAfterSeconds") for r in results
|
||||
if isinstance(r, dict) and r.get("retryAfterSeconds")
|
||||
]
|
||||
if candidates:
|
||||
return float(max(candidates))
|
||||
msg = str(err.get("message", ""))
|
||||
else:
|
||||
msg = str(err)
|
||||
match = _RETRY_AFTER_RE.search(msg)
|
||||
return float(match.group(1)) if match else None
|
||||
|
||||
|
||||
def _is_signal_rate_limit_error(err: Any) -> bool:
|
||||
"""True if a signal-cli RPC error reflects a rate-limit failure.
|
||||
|
||||
Matches three layers:
|
||||
- typed ``RATELIMIT_ERROR`` code (signal-cli ≥ v0.14.3, plain
|
||||
RateLimitException)
|
||||
- legacy ``[429] / RateLimitException`` substrings
|
||||
- libsignal-net's ``RetryLaterException`` / ``Retry after N seconds``
|
||||
surfaced inside ``AttachmentInvalidException`` when the rate
|
||||
limit is hit during attachment upload — signal-cli never re-tags
|
||||
these as RateLimitException, so substring is the only signal.
|
||||
"""
|
||||
if isinstance(err, dict) and err.get("code") == SIGNAL_RPC_ERROR_RATELIMIT:
|
||||
return True
|
||||
|
||||
message = (
|
||||
str(err.get("message", ""))
|
||||
if isinstance(err, dict)
|
||||
else str(err)
|
||||
)
|
||||
msg_lower = message.lower()
|
||||
return (
|
||||
"[429]" in message
|
||||
or "ratelimit" in msg_lower
|
||||
or "retrylaterexception" in msg_lower
|
||||
or "retry after" in msg_lower
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Misc helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _format_wait(seconds: float) -> str:
|
||||
"""Human-friendly wait label for user-facing pacing notices."""
|
||||
s = max(0.0, seconds)
|
||||
if s < 90:
|
||||
return f"{int(round(s))}s"
|
||||
return f"{max(1, int(round(s / 60)))} min"
|
||||
|
||||
|
||||
def _signal_send_timeout(num_attachments: int) -> float:
|
||||
"""HTTP timeout for a Signal ``send`` RPC.
|
||||
|
||||
signal-cli uploads attachments serially during the call, so the
|
||||
server-side time scales with batch size. Default 30s is fine for
|
||||
text-only sends but truncates large attachment batches mid-upload —
|
||||
we then log a phantom failure even though signal-cli completes the
|
||||
send a few seconds later. Scale at 5s/attachment with a 60s floor.
|
||||
"""
|
||||
if num_attachments <= 0:
|
||||
return 30.0
|
||||
return max(60.0, 5.0 * num_attachments)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scheduler
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SignalAttachmentScheduler:
|
||||
"""Process-wide token-bucket simulator for Signal attachment sends.
|
||||
|
||||
The bucket holds up to ``capacity`` tokens (default 50, matching
|
||||
Signal's server-side rate-limit bucket size). Each attachment consumes one
|
||||
token. Tokens refill at ``refill_rate`` tokens/second, calibrated
|
||||
from the per-token Retry-After hint we get from the server when a
|
||||
429 fires. Until we've observed one, we use the documented default
|
||||
(1 token / 4 seconds).
|
||||
|
||||
Concurrent ``acquire(n)`` calls serialize through an
|
||||
``asyncio.Lock`` — natural FIFO across agent sessions hitting the
|
||||
same daemon.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
capacity: float = float(SIGNAL_RATE_LIMIT_BUCKET_CAPACITY),
|
||||
default_retry_after: float = float(SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER),
|
||||
) -> None:
|
||||
self.capacity = float(capacity)
|
||||
self.tokens = float(capacity)
|
||||
self.refill_rate = 1.0 / float(default_retry_after)
|
||||
self.last_refill = time.monotonic()
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internals
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _refill(self) -> None:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
if elapsed > 0 and self.tokens < self.capacity:
|
||||
self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate)
|
||||
self.last_refill = now
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def estimate_wait(self, n: int) -> float:
|
||||
"""Best-effort estimate of the seconds until ``n`` tokens would
|
||||
be available. Used to decide whether to emit a user-facing
|
||||
pacing notice *before* committing to an ``acquire`` that may
|
||||
block silently. Lock-free; small races vs. concurrent acquires
|
||||
are benign for an informational notice.
|
||||
"""
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
projected = self.tokens
|
||||
if elapsed > 0 and projected < self.capacity:
|
||||
projected = min(self.capacity, projected + elapsed * self.refill_rate)
|
||||
deficit = n - projected
|
||||
if deficit <= 0:
|
||||
return 0.0
|
||||
return deficit / self.refill_rate
|
||||
|
||||
async def acquire(self, n: int) -> float:
|
||||
"""Block until at least ``n`` tokens are available, return the
|
||||
seconds slept.
|
||||
|
||||
Does **not** deduct tokens — the bucket is a read-only model of
|
||||
server-side capacity. Call ``report_rpc_duration()`` after the
|
||||
RPC to synchronise the model with the server timeline.
|
||||
|
||||
Not perfect in case lots of coroutines try to acquire for big
|
||||
uploads (``report_rpc_duration`` will take a long time to get hit)
|
||||
but this is just a simulation. Signal server is ground truth and
|
||||
will raise rate-limit exceptions triggering requeues.
|
||||
|
||||
The lock is released during ``asyncio.sleep`` so other callers
|
||||
can interleave. A retry loop re-checks after each sleep in
|
||||
case the deadline was pessimistic.
|
||||
"""
|
||||
if n <= 0:
|
||||
return 0.0
|
||||
if n > self.capacity:
|
||||
raise SignalSchedulerError(
|
||||
f"Signal scheduler was called requesting {n} tokens "
|
||||
f"(max is {self.capacity})",
|
||||
)
|
||||
|
||||
total_slept = 0.0
|
||||
first_pass = True
|
||||
while True:
|
||||
async with self._lock:
|
||||
self._refill()
|
||||
if self.tokens >= n:
|
||||
if not first_pass or total_slept > 0:
|
||||
logger.debug(
|
||||
"Signal scheduler: tokens sufficient for %d "
|
||||
"(remaining=%.1f, total_slept=%.1fs)",
|
||||
n, self.tokens, total_slept,
|
||||
)
|
||||
return total_slept
|
||||
deficit = n - self.tokens
|
||||
wait = deficit / self.refill_rate
|
||||
if first_pass:
|
||||
logger.info(
|
||||
"Signal scheduler: pausing %.1fs for %d tokens "
|
||||
"(available=%.1f, deficit=%.1f, refill=%.4f/s ≈ %.1fs/token)",
|
||||
wait, n, self.tokens, deficit,
|
||||
self.refill_rate, 1.0 / self.refill_rate,
|
||||
)
|
||||
first_pass = False
|
||||
await asyncio.sleep(wait)
|
||||
total_slept += wait
|
||||
|
||||
async def report_rpc_duration(self, rpc_duration: float, n_attachments: int) -> None:
|
||||
"""Record an attachment-send RPC that just completed.
|
||||
|
||||
Deducts ``n_attachments`` tokens without crediting refill during
|
||||
the upload window. Signal's server checks the bucket at RPC start
|
||||
and does *not* refill during request processing — refill resumes
|
||||
after the response. Crediting upload-time refill causes cumulative
|
||||
drift that eventually triggers 429s.
|
||||
|
||||
Advances ``last_refill`` so the next ``acquire`` / ``_refill``
|
||||
starts counting from this point.
|
||||
"""
|
||||
if n_attachments <= 0:
|
||||
return
|
||||
|
||||
async with self._lock:
|
||||
now = time.monotonic()
|
||||
token_before = self.tokens
|
||||
self.tokens = max(0.0, token_before - float(n_attachments))
|
||||
self.last_refill = now
|
||||
logger.log(
|
||||
logging.INFO if rpc_duration > 10 and n_attachments > 5 else logging.DEBUG,
|
||||
"Signal scheduler: RPC for %d att took %.1fs — "
|
||||
"tokens %.1f → %.1f (deducted=%d, no upload refill credited, refill=%.4fs⁻¹)",
|
||||
n_attachments, rpc_duration,
|
||||
token_before, self.tokens,
|
||||
n_attachments, self.refill_rate,
|
||||
)
|
||||
|
||||
def feedback(self, retry_after: Optional[float], n_attempted: int) -> None:
|
||||
"""Apply server feedback after a 429.
|
||||
|
||||
``retry_after`` is the per-*token* refill window the server
|
||||
reports (None when signal-cli is older than v0.14.3 and didn't
|
||||
surface it).
|
||||
|
||||
When present we calibrate ``refill_rate`` from it:
|
||||
the server is authoritative.
|
||||
"""
|
||||
if retry_after and retry_after > 0:
|
||||
new_rate = 1.0 / float(retry_after)
|
||||
if new_rate != self.refill_rate:
|
||||
logger.info(
|
||||
"Signal scheduler: calibrating refill_rate to %.4f tokens/sec "
|
||||
"(server retry_after=%.1fs per token)",
|
||||
new_rate, retry_after,
|
||||
)
|
||||
self.refill_rate = new_rate
|
||||
self.tokens = 0.0
|
||||
self.last_refill = time.monotonic()
|
||||
|
||||
def state(self) -> dict:
|
||||
"""Return current scheduler state for diagnostic logging (read-only).
|
||||
|
||||
Does not advance ``last_refill`` — safe to call from logging paths
|
||||
without perturbing the bucket.
|
||||
"""
|
||||
now = time.monotonic()
|
||||
elapsed = now - self.last_refill
|
||||
projected = self.tokens
|
||||
if elapsed > 0 and projected < self.capacity:
|
||||
projected = min(self.capacity, projected + elapsed * self.refill_rate)
|
||||
return {
|
||||
"tokens": round(projected, 1),
|
||||
"capacity": int(self.capacity),
|
||||
"refill_rate": round(self.refill_rate, 4),
|
||||
"refill_seconds_per_token": round(1.0 / self.refill_rate, 1) if self.refill_rate > 0 else float("inf"),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-wide singleton
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_scheduler: Optional[SignalAttachmentScheduler] = None
|
||||
|
||||
|
||||
def get_scheduler() -> SignalAttachmentScheduler:
|
||||
"""Return the process-wide scheduler, creating it on first access."""
|
||||
global _scheduler
|
||||
if _scheduler is None:
|
||||
_scheduler = SignalAttachmentScheduler()
|
||||
logger.info(
|
||||
"Signal scheduler: created (capacity=%d tokens, refill=%.4f/s ≈ %.1fs/token)",
|
||||
int(_scheduler.capacity),
|
||||
_scheduler.refill_rate,
|
||||
1.0 / _scheduler.refill_rate,
|
||||
)
|
||||
return _scheduler
|
||||
|
||||
|
||||
def _reset_scheduler() -> None:
|
||||
"""Drop the cached scheduler so the next ``get_scheduler`` call
|
||||
builds a fresh one. Test-only — never call from production paths."""
|
||||
global _scheduler
|
||||
_scheduler = None
|
||||
@@ -514,6 +514,15 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
):
|
||||
self._app.action(_action_id)(self._handle_approval_action)
|
||||
|
||||
# Register Block Kit action handlers for slash-confirm buttons
|
||||
# (generic three-option prompts; see tools/slash_confirm.py).
|
||||
for _action_id in (
|
||||
"hermes_confirm_once",
|
||||
"hermes_confirm_always",
|
||||
"hermes_confirm_cancel",
|
||||
):
|
||||
self._app.action(_action_id)(self._handle_slash_confirm_action)
|
||||
|
||||
# Start Socket Mode handler in background
|
||||
self._handler = AsyncSocketModeHandler(self._app, app_token, proxy=proxy_url)
|
||||
_apply_slack_proxy(self._handler.client, proxy_url)
|
||||
@@ -783,6 +792,111 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
|
||||
raise last_exc
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[Tuple[str, str]],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images as a single Slack message with multiple file uploads.
|
||||
|
||||
Uses ``files_upload_v2`` with its ``file_uploads`` parameter so all
|
||||
images show up attached to one ``initial_comment`` message instead
|
||||
of N separate messages. Falls back to the base per-image loop on
|
||||
any failure.
|
||||
|
||||
The batch limit is 10 file uploads per call (Slack server-side cap).
|
||||
"""
|
||||
if not self._app:
|
||||
return
|
||||
if not images:
|
||||
return
|
||||
|
||||
try:
|
||||
import httpx as _httpx
|
||||
from urllib.parse import unquote as _unquote
|
||||
from tools.url_safety import is_safe_url as _is_safe_url
|
||||
except Exception:
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
return
|
||||
|
||||
thread_ts = self._resolve_thread_ts(None, metadata)
|
||||
|
||||
CHUNK = 10
|
||||
chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
|
||||
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
if human_delay > 0 and chunk_idx > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
|
||||
file_uploads: List[Dict[str, Any]] = []
|
||||
initial_comment_parts: List[str] = []
|
||||
try:
|
||||
async with _httpx.AsyncClient(timeout=30.0, follow_redirects=True) as http_client:
|
||||
for image_url, alt_text in chunk:
|
||||
if alt_text:
|
||||
initial_comment_parts.append(alt_text)
|
||||
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
if not os.path.exists(local_path):
|
||||
logger.warning("[Slack] Skipping missing image: %s", local_path)
|
||||
continue
|
||||
file_uploads.append({
|
||||
"file": local_path,
|
||||
"filename": os.path.basename(local_path),
|
||||
})
|
||||
else:
|
||||
if not _is_safe_url(image_url):
|
||||
logger.warning("[Slack] Blocked unsafe image URL in batch")
|
||||
continue
|
||||
try:
|
||||
response = await http_client.get(image_url)
|
||||
response.raise_for_status()
|
||||
ext = "png"
|
||||
ct = response.headers.get("content-type", "")
|
||||
if "jpeg" in ct or "jpg" in ct:
|
||||
ext = "jpg"
|
||||
elif "gif" in ct:
|
||||
ext = "gif"
|
||||
elif "webp" in ct:
|
||||
ext = "webp"
|
||||
file_uploads.append({
|
||||
"content": response.content,
|
||||
"filename": f"image_{len(file_uploads)}.{ext}",
|
||||
})
|
||||
except Exception as dl_err:
|
||||
logger.warning(
|
||||
"[Slack] Download failed for %s: %s",
|
||||
safe_url_for_log(image_url), dl_err,
|
||||
)
|
||||
continue
|
||||
|
||||
if not file_uploads:
|
||||
continue
|
||||
|
||||
initial_comment = "\n".join(initial_comment_parts) if initial_comment_parts else ""
|
||||
logger.info(
|
||||
"[Slack] Sending %d image(s) in single files_upload_v2 (chunk %d/%d)",
|
||||
len(file_uploads), chunk_idx + 1, len(chunks),
|
||||
)
|
||||
result = await self._get_client(chat_id).files_upload_v2(
|
||||
channel=chat_id,
|
||||
file_uploads=file_uploads,
|
||||
initial_comment=initial_comment,
|
||||
thread_ts=thread_ts,
|
||||
)
|
||||
self._record_uploaded_file_thread(chat_id, thread_ts)
|
||||
_ = result
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[Slack] Multi-image files_upload_v2 failed (chunk %d/%d), falling back to per-image: %s",
|
||||
chunk_idx + 1, len(chunks), e,
|
||||
exc_info=True,
|
||||
)
|
||||
await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
|
||||
|
||||
def _record_uploaded_file_thread(self, chat_id: str, thread_ts: Optional[str]) -> None:
|
||||
"""Treat successful file uploads as bot participation in a thread."""
|
||||
if not thread_ts:
|
||||
@@ -1931,6 +2045,168 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
logger.error("[Slack] send_exec_approval failed: %s", e, exc_info=True)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_slash_confirm(
|
||||
self, chat_id: str, title: str, message: str, session_key: str,
|
||||
confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a Block Kit three-option slash-command confirmation prompt."""
|
||||
if not self._app:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
body = message[:2900] + "..." if len(message) > 2900 else message
|
||||
thread_ts = self._resolve_thread_ts(None, metadata)
|
||||
# Encode session_key and confirm_id into the button value so the
|
||||
# callback handler can resolve without extra bookkeeping.
|
||||
value = f"{session_key}|{confirm_id}"
|
||||
|
||||
blocks = [
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": f"*{title or 'Confirm'}*\n\n{body}",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "actions",
|
||||
"elements": [
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Approve Once"},
|
||||
"style": "primary",
|
||||
"action_id": "hermes_confirm_once",
|
||||
"value": value,
|
||||
},
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Always Approve"},
|
||||
"action_id": "hermes_confirm_always",
|
||||
"value": value,
|
||||
},
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Cancel"},
|
||||
"style": "danger",
|
||||
"action_id": "hermes_confirm_cancel",
|
||||
"value": value,
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"channel": chat_id,
|
||||
"text": f"{title or 'Confirm'}: {body[:100]}",
|
||||
"blocks": blocks,
|
||||
}
|
||||
if thread_ts:
|
||||
kwargs["thread_ts"] = thread_ts
|
||||
|
||||
result = await self._get_client(chat_id).chat_postMessage(**kwargs)
|
||||
return SendResult(success=True, message_id=result.get("ts", ""), raw_response=result)
|
||||
except Exception as e:
|
||||
logger.error("[Slack] send_slash_confirm failed: %s", e, exc_info=True)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def _handle_slash_confirm_action(self, ack, body, action) -> None:
|
||||
"""Handle a slash-confirm button click from Block Kit."""
|
||||
await ack()
|
||||
|
||||
action_id = action.get("action_id", "")
|
||||
value = action.get("value", "")
|
||||
message = body.get("message", {})
|
||||
msg_ts = message.get("ts", "")
|
||||
channel_id = body.get("channel", {}).get("id", "")
|
||||
user_name = body.get("user", {}).get("name", "unknown")
|
||||
user_id = body.get("user", {}).get("id", "")
|
||||
|
||||
# Authorization — reuse the exec-approval allowlist.
|
||||
allowed_csv = os.getenv("SLACK_ALLOWED_USERS", "").strip()
|
||||
if allowed_csv:
|
||||
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
|
||||
if "*" not in allowed_ids and user_id not in allowed_ids:
|
||||
logger.warning(
|
||||
"[Slack] Unauthorized slash-confirm click by %s (%s) — ignoring",
|
||||
user_name, user_id,
|
||||
)
|
||||
return
|
||||
|
||||
# Parse session_key|confirm_id back out
|
||||
if "|" not in value:
|
||||
logger.warning("[Slack] Malformed slash-confirm value: %s", value)
|
||||
return
|
||||
session_key, confirm_id = value.split("|", 1)
|
||||
|
||||
choice_map = {
|
||||
"hermes_confirm_once": "once",
|
||||
"hermes_confirm_always": "always",
|
||||
"hermes_confirm_cancel": "cancel",
|
||||
}
|
||||
choice = choice_map.get(action_id, "cancel")
|
||||
|
||||
label_map = {
|
||||
"once": f"✅ Approved once by {user_name}",
|
||||
"always": f"🔒 Always approved by {user_name}",
|
||||
"cancel": f"❌ Cancelled by {user_name}",
|
||||
}
|
||||
decision_text = label_map.get(choice, f"Resolved by {user_name}")
|
||||
|
||||
# Pull original prompt body out of the section block so we can show
|
||||
# the decision inline without losing context.
|
||||
original_text = ""
|
||||
for block in message.get("blocks", []):
|
||||
if block.get("type") == "section":
|
||||
original_text = block.get("text", {}).get("text", "")
|
||||
break
|
||||
|
||||
updated_blocks = [
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": original_text or "Confirmation prompt",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "context",
|
||||
"elements": [
|
||||
{"type": "mrkdwn", "text": decision_text},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
try:
|
||||
await self._get_client(channel_id).chat_update(
|
||||
channel=channel_id,
|
||||
ts=msg_ts,
|
||||
text=decision_text,
|
||||
blocks=updated_blocks,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[Slack] Failed to update slash-confirm message: %s", e)
|
||||
|
||||
# Resolve via the module-level primitive and post any follow-up.
|
||||
try:
|
||||
from tools import slash_confirm as _slash_confirm_mod
|
||||
result_text = await _slash_confirm_mod.resolve(session_key, confirm_id, choice)
|
||||
if result_text:
|
||||
post_kwargs: Dict[str, Any] = {
|
||||
"channel": channel_id,
|
||||
"text": result_text,
|
||||
}
|
||||
# Inherit the thread so the reply stays in the same place.
|
||||
thread_ts = message.get("thread_ts") or msg_ts
|
||||
if thread_ts:
|
||||
post_kwargs["thread_ts"] = thread_ts
|
||||
await self._get_client(channel_id).chat_postMessage(**post_kwargs)
|
||||
logger.info(
|
||||
"Slack button resolved slash-confirm for session %s (choice=%s, user=%s)",
|
||||
session_key, choice, user_name,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to resolve slash-confirm from Slack button: %s", exc, exc_info=True)
|
||||
|
||||
async def _handle_approval_action(self, ack, body, action) -> None:
|
||||
"""Handle an approval button click from Block Kit."""
|
||||
await ack()
|
||||
|
||||
+338
-30
@@ -84,6 +84,7 @@ from gateway.platforms.telegram_network import (
|
||||
discover_fallback_ips,
|
||||
parse_fallback_ip_env,
|
||||
)
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
def check_telegram_requirements() -> bool:
|
||||
@@ -122,12 +123,12 @@ def _strip_mdv2(text: str) -> str:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown table → code block conversion
|
||||
# Markdown table → Telegram-friendly row groups
|
||||
# ---------------------------------------------------------------------------
|
||||
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
|
||||
# so pipe tables render as noisy backslash-pipe text with no alignment.
|
||||
# Wrapping the table in a fenced code block makes Telegram render it as
|
||||
# monospace preformatted text with columns intact.
|
||||
# Reformating each row into a bold heading plus bullet list keeps the content
|
||||
# readable on mobile clients while preserving the source data.
|
||||
|
||||
# Matches a GFM table delimiter row: optional outer pipes, cells containing
|
||||
# only dashes (with optional leading/trailing colons for alignment) separated
|
||||
@@ -144,13 +145,49 @@ def _is_table_row(line: str) -> bool:
|
||||
return bool(stripped) and '|' in stripped
|
||||
|
||||
|
||||
def _split_markdown_table_row(line: str) -> list[str]:
|
||||
"""Split a simple GFM table row into stripped cell values."""
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("|"):
|
||||
stripped = stripped[1:]
|
||||
if stripped.endswith("|"):
|
||||
stripped = stripped[:-1]
|
||||
return [cell.strip() for cell in stripped.split("|")]
|
||||
|
||||
|
||||
def _render_table_block_for_telegram(table_block: list[str]) -> str:
|
||||
"""Render a detected GFM table as Telegram-friendly row groups."""
|
||||
if len(table_block) < 3:
|
||||
return "\n".join(table_block)
|
||||
|
||||
headers = _split_markdown_table_row(table_block[0])
|
||||
if len(headers) < 2:
|
||||
return "\n".join(table_block)
|
||||
|
||||
rendered_rows: list[str] = []
|
||||
for index, row in enumerate(table_block[2:], start=1):
|
||||
cells = _split_markdown_table_row(row)
|
||||
if len(cells) < len(headers):
|
||||
cells.extend([""] * (len(headers) - len(cells)))
|
||||
elif len(cells) > len(headers):
|
||||
cells = cells[: len(headers)]
|
||||
|
||||
heading = next((cell for cell in cells if cell), f"Row {index}")
|
||||
rendered_rows.append(f"**{heading}**")
|
||||
rendered_rows.extend(
|
||||
f"• {header}: {value}" for header, value in zip(headers, cells)
|
||||
)
|
||||
|
||||
return "\n\n".join(rendered_rows)
|
||||
|
||||
|
||||
def _wrap_markdown_tables(text: str) -> str:
|
||||
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
|
||||
"""Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.
|
||||
|
||||
Detected by a row containing '|' immediately followed by a delimiter
|
||||
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
|
||||
non-blank lines are consumed as the table body and included in the
|
||||
wrapped block. Tables inside existing fenced code blocks are left
|
||||
non-blank lines are consumed as the table body and rewritten as
|
||||
per-row bullet groups. Tables inside existing fenced code blocks are left
|
||||
alone.
|
||||
"""
|
||||
if '|' not in text or '-' not in text:
|
||||
@@ -187,9 +224,7 @@ def _wrap_markdown_tables(text: str) -> str:
|
||||
while j < len(lines) and _is_table_row(lines[j]):
|
||||
table_block.append(lines[j])
|
||||
j += 1
|
||||
out.append('```')
|
||||
out.extend(table_block)
|
||||
out.append('```')
|
||||
out.append(_render_table_block_for_telegram(table_block))
|
||||
i = j
|
||||
continue
|
||||
|
||||
@@ -202,14 +237,14 @@ def _wrap_markdown_tables(text: str) -> str:
|
||||
class TelegramAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
Telegram bot adapter.
|
||||
|
||||
|
||||
Handles:
|
||||
- Receiving messages from users and groups
|
||||
- Sending responses with Telegram markdown
|
||||
- Forum topics (thread_id support)
|
||||
- Media messages
|
||||
"""
|
||||
|
||||
|
||||
# Telegram message limits
|
||||
MAX_MESSAGE_LENGTH = 4096
|
||||
# Threshold for detecting Telegram client-side message splits.
|
||||
@@ -217,7 +252,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
_SPLIT_THRESHOLD = 4000
|
||||
MEDIA_GROUP_WAIT_SECONDS = 0.8
|
||||
_GENERAL_TOPIC_THREAD_ID = "1"
|
||||
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.TELEGRAM)
|
||||
self._app: Optional[Application] = None
|
||||
@@ -251,6 +286,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self._model_picker_state: Dict[str, dict] = {}
|
||||
# Approval button state: message_id → session_key
|
||||
self._approval_state: Dict[int, str] = {}
|
||||
# Slash-confirm button state: confirm_id → session_key (for /reload-mcp
|
||||
# and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
|
||||
self._slash_confirm_state: Dict[str, str] = {}
|
||||
|
||||
@staticmethod
|
||||
def _is_callback_user_authorized(user_id: str) -> bool:
|
||||
@@ -334,6 +372,49 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
|
||||
return {"disable_web_page_preview": True}
|
||||
|
||||
async def _drain_polling_connections(self) -> None:
|
||||
"""Reset the httpx connection pool used for getUpdates polling.
|
||||
|
||||
Network errors (especially through proxies like sing-box) can leave
|
||||
httpx connections in a half-closed state that still occupy pool slots.
|
||||
After enough reconnect cycles the pool fills up entirely, causing
|
||||
``Pool timeout: All connections in the connection pool are occupied.``
|
||||
|
||||
We reset ONLY ``_request[0]`` (the getUpdates request) — the general
|
||||
request (``_request[1]``) is left untouched so concurrent
|
||||
``send_message`` / ``edit_message`` calls are never interrupted.
|
||||
|
||||
Implementation note: accesses ``Bot._request[0]`` which is the
|
||||
get-updates ``BaseRequest`` in the PTB 22.x internal tuple
|
||||
``(get_updates_request, general_request)``. There is no public
|
||||
accessor for the polling request; review if upgrading to PTB 23+.
|
||||
"""
|
||||
if not (self._app and self._app.bot):
|
||||
return
|
||||
try:
|
||||
# PTB 22.x: _request is a (get_updates, general) tuple;
|
||||
# no public accessor exists for the polling request.
|
||||
polling_req = self._app.bot._request[0] # noqa: SLF001
|
||||
except Exception:
|
||||
return
|
||||
try:
|
||||
await polling_req.shutdown()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Polling request shutdown failed (non-fatal)",
|
||||
self.name, exc_info=True,
|
||||
)
|
||||
try:
|
||||
await polling_req.initialize()
|
||||
logger.debug(
|
||||
"[%s] Polling request pool drained before reconnect", self.name
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Polling request re-initialize failed (non-fatal)",
|
||||
self.name, exc_info=True,
|
||||
)
|
||||
|
||||
async def _handle_polling_network_error(self, error: Exception) -> None:
|
||||
"""Reconnect polling after a transient network interruption.
|
||||
|
||||
@@ -379,6 +460,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await self._drain_polling_connections()
|
||||
|
||||
try:
|
||||
await self._app.updater.start_polling(
|
||||
allowed_updates=Update.ALL_TYPES,
|
||||
@@ -426,6 +509,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.sleep(RETRY_DELAY)
|
||||
await self._drain_polling_connections()
|
||||
try:
|
||||
await self._app.updater.start_polling(
|
||||
allowed_updates=Update.ALL_TYPES,
|
||||
@@ -554,7 +638,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, config_path)
|
||||
atomic_replace(tmp_path, config_path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -913,7 +997,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
self._set_fatal_error("telegram_connect_error", message, retryable=True)
|
||||
logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
"""Stop polling/webhook, cancel pending album flushes, and disconnect."""
|
||||
pending_media_group_tasks = list(self._media_group_tasks.values())
|
||||
@@ -1330,6 +1414,48 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_slash_confirm(
|
||||
self, chat_id: str, title: str, message: str, session_key: str,
|
||||
confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Render a three-button slash-command confirmation prompt."""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
# Message body: render as plain text (message already contains
|
||||
# markdown formatting from the gateway primitive).
|
||||
preview = message if len(message) <= 3800 else message[:3800] + "..."
|
||||
|
||||
keyboard = InlineKeyboardMarkup([
|
||||
[
|
||||
InlineKeyboardButton("✅ Approve Once", callback_data=f"sc:once:{confirm_id}"),
|
||||
InlineKeyboardButton("🔒 Always Approve", callback_data=f"sc:always:{confirm_id}"),
|
||||
],
|
||||
[
|
||||
InlineKeyboardButton("❌ Cancel", callback_data=f"sc:cancel:{confirm_id}"),
|
||||
],
|
||||
])
|
||||
|
||||
thread_id = self._metadata_thread_id(metadata)
|
||||
kwargs: Dict[str, Any] = {
|
||||
"chat_id": int(chat_id),
|
||||
"text": preview,
|
||||
"parse_mode": ParseMode.MARKDOWN,
|
||||
"reply_markup": keyboard,
|
||||
**self._link_preview_kwargs(),
|
||||
}
|
||||
message_thread_id = self._message_thread_id_for_send(thread_id)
|
||||
if message_thread_id is not None:
|
||||
kwargs["message_thread_id"] = message_thread_id
|
||||
|
||||
msg = await self._bot.send_message(**kwargs)
|
||||
self._slash_confirm_state[confirm_id] = session_key
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_model_picker(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -1698,6 +1824,68 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
|
||||
return
|
||||
|
||||
# --- Slash-confirm callbacks (sc:choice:confirm_id) ---
|
||||
if data.startswith("sc:"):
|
||||
parts = data.split(":", 2)
|
||||
if len(parts) == 3:
|
||||
choice = parts[1] # once, always, cancel
|
||||
confirm_id = parts[2]
|
||||
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
if not self._is_callback_user_authorized(caller_id):
|
||||
await query.answer(text="⛔ You are not authorized to answer this prompt.")
|
||||
return
|
||||
|
||||
session_key = self._slash_confirm_state.pop(confirm_id, None)
|
||||
if not session_key:
|
||||
await query.answer(text="This prompt has already been resolved.")
|
||||
return
|
||||
|
||||
label_map = {
|
||||
"once": "✅ Approved once",
|
||||
"always": "🔒 Always approve",
|
||||
"cancel": "❌ Cancelled",
|
||||
}
|
||||
user_display = getattr(query.from_user, "first_name", "User")
|
||||
label = label_map.get(choice, "Resolved")
|
||||
|
||||
await query.answer(text=label)
|
||||
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=f"{label} by {user_display}",
|
||||
parse_mode=ParseMode.MARKDOWN,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Resolve via the module-level primitive. The runner stored
|
||||
# a handler keyed by session_key; we run it on the event
|
||||
# loop and (if it returns a string) send it as a follow-up
|
||||
# message in the same chat.
|
||||
try:
|
||||
from tools import slash_confirm as _slash_confirm_mod
|
||||
result_text = await _slash_confirm_mod.resolve(
|
||||
session_key, confirm_id, choice,
|
||||
)
|
||||
if result_text and query.message:
|
||||
# Inherit the prompt message's thread so the reply
|
||||
# lands in the same supergroup topic / reply chain.
|
||||
thread_id = getattr(query.message, "message_thread_id", None)
|
||||
send_kwargs: Dict[str, Any] = {
|
||||
"chat_id": int(query.message.chat_id),
|
||||
"text": result_text,
|
||||
"parse_mode": ParseMode.MARKDOWN,
|
||||
**self._link_preview_kwargs(),
|
||||
}
|
||||
if thread_id is not None:
|
||||
send_kwargs["message_thread_id"] = thread_id
|
||||
await self._bot.send_message(**send_kwargs)
|
||||
except Exception as exc:
|
||||
logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
|
||||
return
|
||||
|
||||
# --- Update prompt callbacks ---
|
||||
if not data.startswith("update_prompt:"):
|
||||
return
|
||||
@@ -1763,8 +1951,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
|
||||
|
||||
with open(audio_path, "rb") as audio_file:
|
||||
# .ogg files -> send as voice (round playable bubble)
|
||||
if audio_path.endswith((".ogg", ".opus")):
|
||||
ext = os.path.splitext(audio_path)[1].lower()
|
||||
# .ogg / .opus files -> send as voice (round playable bubble)
|
||||
if ext in (".ogg", ".opus"):
|
||||
_voice_thread = self._metadata_thread_id(metadata)
|
||||
msg = await self._bot.send_voice(
|
||||
chat_id=int(chat_id),
|
||||
@@ -1773,8 +1962,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
reply_to_message_id=int(reply_to) if reply_to else None,
|
||||
message_thread_id=self._message_thread_id_for_send(_voice_thread),
|
||||
)
|
||||
else:
|
||||
# .mp3 and others -> send as audio file
|
||||
elif ext in (".mp3", ".m4a"):
|
||||
# Telegram's Bot API sendAudio only accepts MP3 / M4A.
|
||||
_audio_thread = self._metadata_thread_id(metadata)
|
||||
msg = await self._bot.send_audio(
|
||||
chat_id=int(chat_id),
|
||||
@@ -1783,6 +1972,16 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
reply_to_message_id=int(reply_to) if reply_to else None,
|
||||
message_thread_id=self._message_thread_id_for_send(_audio_thread),
|
||||
)
|
||||
else:
|
||||
# Formats Telegram can't play natively (.wav, .flac, ...)
|
||||
# — fall back to document delivery instead of raising.
|
||||
return await self.send_document(
|
||||
chat_id=chat_id,
|
||||
file_path=audio_path,
|
||||
caption=caption,
|
||||
reply_to=reply_to,
|
||||
metadata=metadata,
|
||||
)
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
@@ -1792,7 +1991,118 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
exc_info=True,
|
||||
)
|
||||
return await super().send_voice(chat_id, audio_path, caption, reply_to)
|
||||
|
||||
|
||||
async def send_multiple_images(
|
||||
self,
|
||||
chat_id: str,
|
||||
images: List[tuple],
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
human_delay: float = 0.0,
|
||||
) -> None:
|
||||
"""Send a batch of images natively via Telegram's media group API.
|
||||
|
||||
Telegram's ``send_media_group`` bundles up to 10 photos/videos into
|
||||
a single album. Larger batches are chunked. Animated GIFs cannot
|
||||
go into a media group (they require ``send_animation``), so they
|
||||
are peeled off and sent individually via the base default path.
|
||||
|
||||
URL-based photos go into the group directly; local files are
|
||||
opened as byte streams. On failure the whole batch falls back to
|
||||
the base adapter's per-image loop.
|
||||
"""
|
||||
if not self._bot:
|
||||
return
|
||||
if not images:
|
||||
return
|
||||
|
||||
try:
|
||||
from telegram import InputMediaPhoto
|
||||
except Exception as exc: # pragma: no cover - missing SDK
|
||||
logger.warning(
|
||||
"[%s] InputMediaPhoto unavailable, falling back to per-image send: %s",
|
||||
self.name, exc,
|
||||
)
|
||||
await super().send_multiple_images(chat_id, images, metadata, human_delay)
|
||||
return
|
||||
|
||||
# Peel off animations — they need send_animation, not send_media_group
|
||||
animations: List[tuple] = []
|
||||
photos: List[tuple] = []
|
||||
for image_url, alt_text in images:
|
||||
if not image_url.startswith("file://") and self._is_animation_url(image_url):
|
||||
animations.append((image_url, alt_text))
|
||||
else:
|
||||
photos.append((image_url, alt_text))
|
||||
|
||||
# Animations: route through the base default (per-image send_animation)
|
||||
if animations:
|
||||
await super().send_multiple_images(
|
||||
chat_id, animations, metadata, human_delay=human_delay,
|
||||
)
|
||||
|
||||
if not photos:
|
||||
return
|
||||
|
||||
from urllib.parse import unquote as _unquote
|
||||
_thread = self._metadata_thread_id(metadata)
|
||||
_thread_id = self._message_thread_id_for_send(_thread)
|
||||
|
||||
# Chunk into groups of 10 (Telegram's album limit)
|
||||
CHUNK = 10
|
||||
chunks = [photos[i:i + CHUNK] for i in range(0, len(photos), CHUNK)]
|
||||
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
if human_delay > 0 and chunk_idx > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
|
||||
media: List[Any] = []
|
||||
opened_files: List[Any] = []
|
||||
try:
|
||||
for image_url, alt_text in chunk:
|
||||
caption = alt_text[:1024] if alt_text else None
|
||||
if image_url.startswith("file://"):
|
||||
local_path = _unquote(image_url[7:])
|
||||
if not os.path.exists(local_path):
|
||||
logger.warning(
|
||||
"[%s] Skipping missing image in media group: %s",
|
||||
self.name, local_path,
|
||||
)
|
||||
continue
|
||||
fh = open(local_path, "rb")
|
||||
opened_files.append(fh)
|
||||
media.append(InputMediaPhoto(media=fh, caption=caption))
|
||||
else:
|
||||
media.append(InputMediaPhoto(media=image_url, caption=caption))
|
||||
|
||||
if not media:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"[%s] Sending media group of %d photo(s) (chunk %d/%d)",
|
||||
self.name, len(media), chunk_idx + 1, len(chunks),
|
||||
)
|
||||
await self._bot.send_media_group(
|
||||
chat_id=int(chat_id),
|
||||
media=media,
|
||||
message_thread_id=_thread_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[%s] send_media_group failed (chunk %d/%d), falling back to per-image: %s",
|
||||
self.name, chunk_idx + 1, len(chunks), e,
|
||||
exc_info=True,
|
||||
)
|
||||
# Fallback: send each photo in this chunk individually
|
||||
await super().send_multiple_images(
|
||||
chat_id, chunk, metadata, human_delay=human_delay,
|
||||
)
|
||||
finally:
|
||||
for fh in opened_files:
|
||||
try:
|
||||
fh.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -1959,7 +2269,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
# Final fallback: send URL as text
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
|
||||
|
||||
async def send_animation(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -2021,7 +2331,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
|
||||
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
||||
"""Get information about a Telegram chat."""
|
||||
if not self._bot:
|
||||
@@ -2055,7 +2365,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
exc_info=True,
|
||||
)
|
||||
return {"name": str(chat_id), "type": "dm", "error": str(e)}
|
||||
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""
|
||||
Convert standard markdown to Telegram MarkdownV2 format.
|
||||
@@ -2080,10 +2390,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
text = content
|
||||
|
||||
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
|
||||
# render tables natively, but fenced code blocks render as
|
||||
# monospace preformatted text with columns intact. The wrapped
|
||||
# tables then flow through step (1) below as protected regions.
|
||||
# 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
|
||||
# before the normal MarkdownV2 conversions run.
|
||||
text = _wrap_markdown_tables(text)
|
||||
|
||||
# 1) Protect fenced code blocks (``` ... ```)
|
||||
@@ -2229,7 +2537,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
text = ''.join(_safe_parts)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# ── Group mention gating ──────────────────────────────────────────────
|
||||
|
||||
def _telegram_require_mention(self) -> bool:
|
||||
@@ -2444,7 +2752,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
|
||||
event.text = self._clean_bot_trigger_text(event.text)
|
||||
self._enqueue_text_event(event)
|
||||
|
||||
|
||||
async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle incoming command messages."""
|
||||
if not update.message or not update.message.text:
|
||||
@@ -2454,7 +2762,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
|
||||
await self.handle_message(event)
|
||||
|
||||
|
||||
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle incoming location/venue pin messages."""
|
||||
if not update.message:
|
||||
@@ -2812,7 +3120,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return
|
||||
|
||||
await self.handle_message(event)
|
||||
|
||||
|
||||
async def _queue_media_group_event(self, media_group_id: str, event: MessageEvent) -> None:
|
||||
"""Buffer Telegram media-group items so albums arrive as one logical event.
|
||||
|
||||
|
||||
@@ -202,26 +202,22 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
if deliver_type == "github_comment":
|
||||
return await self._deliver_github_comment(content, delivery)
|
||||
|
||||
# Cross-platform delivery — any platform with a gateway adapter
|
||||
if self.gateway_runner and deliver_type in (
|
||||
"telegram",
|
||||
"discord",
|
||||
"slack",
|
||||
"signal",
|
||||
"sms",
|
||||
"whatsapp",
|
||||
"matrix",
|
||||
"mattermost",
|
||||
"homeassistant",
|
||||
"email",
|
||||
"dingtalk",
|
||||
"feishu",
|
||||
"wecom",
|
||||
"wecom_callback",
|
||||
"weixin",
|
||||
"bluebubbles",
|
||||
"qqbot",
|
||||
):
|
||||
# Cross-platform delivery — any platform with a gateway adapter.
|
||||
# Check both built-in names and plugin-registered platforms.
|
||||
_BUILTIN_DELIVER_PLATFORMS = {
|
||||
"telegram", "discord", "slack", "signal", "sms", "whatsapp",
|
||||
"matrix", "mattermost", "homeassistant", "email", "dingtalk",
|
||||
"feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
|
||||
"qqbot", "yuanbao",
|
||||
}
|
||||
_is_known_platform = deliver_type in _BUILTIN_DELIVER_PLATFORMS
|
||||
if not _is_known_platform:
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
_is_known_platform = platform_registry.is_registered(deliver_type)
|
||||
except Exception:
|
||||
pass
|
||||
if self.gateway_runner and _is_known_platform:
|
||||
return await self._deliver_cross_platform(
|
||||
deliver_type, content, delivery
|
||||
)
|
||||
|
||||
@@ -89,8 +89,21 @@ MAX_CONSECUTIVE_FAILURES = 3
|
||||
RETRY_DELAY_SECONDS = 2
|
||||
BACKOFF_DELAY_SECONDS = 30
|
||||
SESSION_EXPIRED_ERRCODE = -14
|
||||
RATE_LIMIT_ERRCODE = -2 # iLink frequency limit — backoff and retry
|
||||
MESSAGE_DEDUP_TTL_SECONDS = 300
|
||||
|
||||
|
||||
def _is_stale_session_ret(
|
||||
ret: "Optional[int]", errcode: "Optional[int]", errmsg: "Optional[str]",
|
||||
) -> bool:
|
||||
"""True when iLink returns ret=-2 / errcode=-2 with 'unknown error',
|
||||
which is a stale-session signal (same as errcode=-14) rather than
|
||||
a genuine rate limit."""
|
||||
if ret != RATE_LIMIT_ERRCODE and errcode != RATE_LIMIT_ERRCODE:
|
||||
return False
|
||||
return (errmsg or "").lower() == "unknown error"
|
||||
|
||||
|
||||
MEDIA_IMAGE = 1
|
||||
MEDIA_VIDEO = 2
|
||||
MEDIA_FILE = 3
|
||||
@@ -1113,7 +1126,7 @@ async def qr_login(
|
||||
class WeixinAdapter(BasePlatformAdapter):
|
||||
"""Native Hermes adapter for Weixin personal accounts."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 4000
|
||||
MAX_MESSAGE_LENGTH = 2000
|
||||
|
||||
# WeChat does not support editing sent messages — streaming must use the
|
||||
# fallback "send-final-only" path so the cursor (▉) is never left visible.
|
||||
@@ -1138,10 +1151,10 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
|
||||
).strip().rstrip("/")
|
||||
self._send_chunk_delay_seconds = float(
|
||||
extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
|
||||
extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
|
||||
)
|
||||
self._send_chunk_retries = int(
|
||||
extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
|
||||
extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
|
||||
)
|
||||
self._send_chunk_retry_delay_seconds = float(
|
||||
extra.get("send_chunk_retry_delay_seconds")
|
||||
@@ -1209,6 +1222,17 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
self._mark_connected()
|
||||
_LIVE_ADAPTERS[self._token] = self
|
||||
logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
|
||||
if self._group_policy != "disabled":
|
||||
logger.warning(
|
||||
"[%s] WEIXIN_GROUP_POLICY=%s is set, but QR-login connects an iLink bot "
|
||||
"identity (e.g. ...@im.bot) which typically cannot be invited into ordinary "
|
||||
"WeChat groups. iLink usually does not deliver ordinary-group events for "
|
||||
"these accounts, so group messages may never reach Hermes regardless of this "
|
||||
"policy. If group delivery doesn't work, the limitation is on the iLink side, "
|
||||
"not in Hermes.",
|
||||
self.name,
|
||||
self._group_policy,
|
||||
)
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
@@ -1253,7 +1277,8 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
ret = response.get("ret", 0)
|
||||
errcode = response.get("errcode", 0)
|
||||
if ret not in (0, None) or errcode not in (0, None):
|
||||
if ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE:
|
||||
if (ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE
|
||||
or _is_stale_session_ret(ret, errcode, response.get("errmsg"))):
|
||||
logger.error("[%s] Session expired; pausing for 10 minutes", self.name)
|
||||
await asyncio.sleep(600)
|
||||
consecutive_failures = 0
|
||||
@@ -1518,6 +1543,7 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
is_session_expired = (
|
||||
ret == SESSION_EXPIRED_ERRCODE
|
||||
or errcode == SESSION_EXPIRED_ERRCODE
|
||||
or _is_stale_session_ret(ret, errcode, resp.get("errmsg"))
|
||||
)
|
||||
# Session expired — strip token and retry once
|
||||
if is_session_expired and not retried_without_token and context_token:
|
||||
@@ -1531,6 +1557,28 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
self.name, _safe_id(chat_id),
|
||||
)
|
||||
continue
|
||||
# Rate limit (-2) — backoff and retry
|
||||
is_rate_limited = (
|
||||
ret == RATE_LIMIT_ERRCODE
|
||||
or errcode == RATE_LIMIT_ERRCODE
|
||||
)
|
||||
if is_rate_limited:
|
||||
errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
|
||||
# Record the error so we raise a descriptive
|
||||
# RuntimeError (instead of AssertionError) if the
|
||||
# loop exhausts with the server still rate-limiting.
|
||||
last_error = RuntimeError(
|
||||
f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
|
||||
)
|
||||
if attempt >= self._send_chunk_retries:
|
||||
break
|
||||
wait = self._send_chunk_retry_delay_seconds * 3 # 3x backoff for rate limit
|
||||
logger.warning(
|
||||
"[%s] rate limited for %s; backing off %.1fs before retry",
|
||||
self.name, _safe_id(chat_id), wait,
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
|
||||
raise RuntimeError(
|
||||
f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
|
||||
@@ -1572,7 +1620,7 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
_, image_cleaned = self.extract_images(cleaned_content)
|
||||
local_files, final_content = self.extract_local_files(image_cleaned)
|
||||
|
||||
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
|
||||
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
|
||||
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
|
||||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ from gateway.platforms.yuanbao_proto import (
|
||||
encode_get_group_member_list,
|
||||
next_seq_no,
|
||||
)
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
from gateway.session import build_session_key
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1897,7 +1897,7 @@ class OwnerCommandMiddleware(InboundMiddleware):
|
||||
return None, None, False
|
||||
|
||||
# Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
|
||||
owner_id = (push or {}).get("bot_owner_id") or ""
|
||||
# owner_id = (push or {}).get("bot_owner_id") or ""
|
||||
# is_owner = bool(owner_id) and owner_id == from_account
|
||||
is_owner = True
|
||||
return cmd, cmd_line, is_owner
|
||||
|
||||
@@ -21,12 +21,10 @@ import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional, Any
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -19,9 +19,8 @@ yuanbao_proto.py - Yuanbao WebSocket 协议编解码(纯 Python 实现)
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import struct
|
||||
import threading
|
||||
from typing import Optional, Union
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
+1237
-152
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,150 @@
|
||||
"""Gateway runtime-metadata footer.
|
||||
|
||||
Renders a compact footer showing runtime state (model, context %, cwd) and
|
||||
appends it to the FINAL message of an agent turn when enabled. Off by default
|
||||
to keep replies minimal.
|
||||
|
||||
Config (``~/.hermes/config.yaml``)::
|
||||
|
||||
display:
|
||||
runtime_footer:
|
||||
enabled: true # off by default
|
||||
fields: [model, context_pct, cwd] # order shown; drop any to hide
|
||||
|
||||
Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
|
||||
Users can toggle the global setting with ``/footer on|off`` from both the CLI
|
||||
and any gateway platform.
|
||||
|
||||
The footer is appended to the final response text in ``gateway/run.py`` right
|
||||
before returning the response to the adapter send path — so it only lands on
|
||||
the final message a user sees, not on tool-progress updates or streaming
|
||||
partials. When streaming is on and the final text has already been delivered
|
||||
piecemeal, the footer is sent as a separate trailing message via
|
||||
``send_trailing_footer()``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
|
||||
_SEP = " · "
|
||||
|
||||
|
||||
def _home_relative_cwd(cwd: str) -> str:
|
||||
"""Return *cwd* with ``$HOME`` collapsed to ``~``. Empty string if unset."""
|
||||
if not cwd:
|
||||
return ""
|
||||
try:
|
||||
home = os.path.expanduser("~")
|
||||
p = os.path.abspath(cwd)
|
||||
if home and (p == home or p.startswith(home + os.sep)):
|
||||
return "~" + p[len(home):]
|
||||
return p
|
||||
except Exception:
|
||||
return cwd
|
||||
|
||||
|
||||
def _model_short(model: Optional[str]) -> str:
|
||||
"""Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
|
||||
if not model:
|
||||
return ""
|
||||
return model.rsplit("/", 1)[-1]
|
||||
|
||||
|
||||
def resolve_footer_config(
|
||||
user_config: dict[str, Any] | None,
|
||||
platform_key: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Resolve effective runtime-footer config for *platform_key*.
|
||||
|
||||
Merge order (later wins):
|
||||
1. Built-in defaults (enabled=False)
|
||||
2. ``display.runtime_footer``
|
||||
3. ``display.platforms.<platform_key>.runtime_footer``
|
||||
"""
|
||||
resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
|
||||
cfg = (user_config or {}).get("display") or {}
|
||||
|
||||
global_cfg = cfg.get("runtime_footer")
|
||||
if isinstance(global_cfg, dict):
|
||||
if "enabled" in global_cfg:
|
||||
resolved["enabled"] = bool(global_cfg.get("enabled"))
|
||||
if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
|
||||
resolved["fields"] = [str(f) for f in global_cfg["fields"]]
|
||||
|
||||
if platform_key:
|
||||
platforms = cfg.get("platforms") or {}
|
||||
plat_cfg = platforms.get(platform_key)
|
||||
if isinstance(plat_cfg, dict):
|
||||
plat_footer = plat_cfg.get("runtime_footer")
|
||||
if isinstance(plat_footer, dict):
|
||||
if "enabled" in plat_footer:
|
||||
resolved["enabled"] = bool(plat_footer.get("enabled"))
|
||||
if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
|
||||
resolved["fields"] = [str(f) for f in plat_footer["fields"]]
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def format_runtime_footer(
|
||||
*,
|
||||
model: Optional[str],
|
||||
context_tokens: int,
|
||||
context_length: Optional[int],
|
||||
cwd: Optional[str] = None,
|
||||
fields: Iterable[str] = _DEFAULT_FIELDS,
|
||||
) -> str:
|
||||
"""Render the footer line, or return "" if no fields have data.
|
||||
|
||||
Fields are skipped silently when their underlying data is missing — a
|
||||
partially-populated footer is better than a line with ``?%`` or empty slots.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
for field in fields:
|
||||
if field == "model":
|
||||
m = _model_short(model)
|
||||
if m:
|
||||
parts.append(m)
|
||||
elif field == "context_pct":
|
||||
if context_length and context_length > 0 and context_tokens >= 0:
|
||||
pct = max(0, min(100, round((context_tokens / context_length) * 100)))
|
||||
parts.append(f"{pct}%")
|
||||
elif field == "cwd":
|
||||
rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
|
||||
if rel:
|
||||
parts.append(rel)
|
||||
# Unknown field names are silently ignored.
|
||||
|
||||
if not parts:
|
||||
return ""
|
||||
return _SEP.join(parts)
|
||||
|
||||
|
||||
def build_footer_line(
|
||||
*,
|
||||
user_config: dict[str, Any] | None,
|
||||
platform_key: str | None,
|
||||
model: Optional[str],
|
||||
context_tokens: int,
|
||||
context_length: Optional[int],
|
||||
cwd: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Top-level entry point used by gateway/run.py.
|
||||
|
||||
Returns the footer text (empty string when disabled or no data). Callers
|
||||
append this to the final response themselves, preserving a single blank
|
||||
line of separation.
|
||||
"""
|
||||
cfg = resolve_footer_config(user_config, platform_key)
|
||||
if not cfg.get("enabled"):
|
||||
return ""
|
||||
return format_runtime_footer(
|
||||
model=model,
|
||||
context_tokens=context_tokens,
|
||||
context_length=context_length,
|
||||
cwd=cwd,
|
||||
fields=cfg.get("fields") or _DEFAULT_FIELDS,
|
||||
)
|
||||
+29
-32
@@ -62,8 +62,9 @@ from .config import (
|
||||
)
|
||||
from .whatsapp_identity import (
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier, # noqa: F401 - re-exported for gateway.session callers
|
||||
)
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -234,7 +235,7 @@ def build_session_context_prompt(
|
||||
) -> str:
|
||||
"""
|
||||
Build the dynamic system prompt section that tells the agent about its context.
|
||||
|
||||
|
||||
This is injected into the system prompt so the agent knows:
|
||||
- Where messages are coming from
|
||||
- What platforms are connected
|
||||
@@ -246,13 +247,23 @@ def build_session_context_prompt(
|
||||
Platforms like Discord are excluded because mentions need real IDs.
|
||||
Routing still uses the original values (they stay in SessionSource).
|
||||
"""
|
||||
# Only apply redaction on platforms where IDs aren't needed for mentions
|
||||
redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
|
||||
# Only apply redaction on platforms where IDs aren't needed for mentions.
|
||||
# Check both the hardcoded set (builtins) and the plugin registry.
|
||||
_is_pii_safe = context.source.platform in _PII_SAFE_PLATFORMS
|
||||
if not _is_pii_safe:
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
entry = platform_registry.get(context.source.platform.value)
|
||||
if entry and entry.pii_safe:
|
||||
_is_pii_safe = True
|
||||
except Exception:
|
||||
pass
|
||||
redact_pii = redact_pii and _is_pii_safe
|
||||
lines = [
|
||||
"## Current Session Context",
|
||||
"",
|
||||
]
|
||||
|
||||
|
||||
# Source info
|
||||
platform_name = context.source.platform.value.title()
|
||||
if context.source.platform == Platform.LOCAL:
|
||||
@@ -277,7 +288,7 @@ def build_session_context_prompt(
|
||||
else:
|
||||
desc = src.description
|
||||
lines.append(f"**Source:** {platform_name} ({desc})")
|
||||
|
||||
|
||||
# Channel topic (if available - provides context about the channel's purpose)
|
||||
if context.source.chat_topic:
|
||||
lines.append(f"**Channel Topic:** {context.source.chat_topic}")
|
||||
@@ -302,7 +313,7 @@ def build_session_context_prompt(
|
||||
if redact_pii:
|
||||
uid = _hash_sender_id(uid)
|
||||
lines.append(f"**User ID:** {uid}")
|
||||
|
||||
|
||||
# Platform-specific behavioral notes
|
||||
if context.source.platform == Platform.SLACK:
|
||||
lines.append("")
|
||||
@@ -368,9 +379,9 @@ def build_session_context_prompt(
|
||||
for p in context.connected_platforms:
|
||||
if p != Platform.LOCAL:
|
||||
platforms_list.append(f"{p.value}: Connected ✓")
|
||||
|
||||
|
||||
lines.append(f"**Connected Platforms:** {', '.join(platforms_list)}")
|
||||
|
||||
|
||||
# Home channels
|
||||
if context.home_channels:
|
||||
lines.append("")
|
||||
@@ -378,11 +389,11 @@ def build_session_context_prompt(
|
||||
for platform, home in context.home_channels.items():
|
||||
hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
|
||||
lines.append(f" - {platform.value}: {home.name} (ID: {hc_id})")
|
||||
|
||||
|
||||
# Delivery options for scheduled tasks
|
||||
lines.append("")
|
||||
lines.append("**Delivery options for scheduled tasks:**")
|
||||
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
|
||||
# Origin delivery
|
||||
@@ -398,15 +409,15 @@ def build_session_context_prompt(
|
||||
lines.append(
|
||||
f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
|
||||
)
|
||||
|
||||
|
||||
# Platform home channels
|
||||
for platform, home in context.home_channels.items():
|
||||
lines.append(f"- `\"{platform.value}\"` → Home channel ({home.name})")
|
||||
|
||||
|
||||
# Note about explicit targeting
|
||||
lines.append("")
|
||||
lines.append("*For explicit targeting, use `\"platform:chat_id\"` format if the user provides a specific chat ID.*")
|
||||
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@@ -705,7 +716,7 @@ class SessionStore:
|
||||
json.dump(data, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, sessions_file)
|
||||
atomic_replace(tmp_path, sessions_file)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -1257,25 +1268,11 @@ class SessionStore:
|
||||
Used by /retry, /undo, and /compress to persist modified conversation history.
|
||||
Rewrites both SQLite and legacy JSONL storage.
|
||||
"""
|
||||
# SQLite: clear old messages and re-insert
|
||||
# SQLite: replace atomically so a mid-rewrite failure doesn't leave
|
||||
# the session half-empty in the DB while JSONL still has history.
|
||||
if self._db:
|
||||
try:
|
||||
self._db.clear_messages(session_id)
|
||||
for msg in messages:
|
||||
role = msg.get("role", "unknown")
|
||||
self._db.append_message(
|
||||
session_id=session_id,
|
||||
role=role,
|
||||
content=msg.get("content"),
|
||||
tool_name=msg.get("tool_name"),
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
reasoning=msg.get("reasoning") if role == "assistant" else None,
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
self._db.replace_messages(session_id, messages)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
|
||||
@@ -91,11 +91,20 @@ class GatewayStreamConsumer:
|
||||
chat_id: str,
|
||||
config: Optional[StreamConsumerConfig] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
on_new_message: Optional[callable] = None,
|
||||
):
|
||||
self.adapter = adapter
|
||||
self.chat_id = chat_id
|
||||
self.cfg = config or StreamConsumerConfig()
|
||||
self.metadata = metadata
|
||||
# Fired whenever a fresh content bubble is created on the platform
|
||||
# (first-send of a new message, commentary, overflow chunk, or
|
||||
# fallback continuation). The gateway uses this to linearize the
|
||||
# tool-progress bubble: when content resumes after a tool batch,
|
||||
# the next tool.started should open a NEW progress bubble below
|
||||
# the content, not edit the old bubble above it.
|
||||
# Called with no arguments. Exceptions are swallowed.
|
||||
self._on_new_message = on_new_message
|
||||
self._queue: queue.Queue = queue.Queue()
|
||||
self._accumulated = ""
|
||||
self._message_id: Optional[str] = None
|
||||
@@ -146,6 +155,16 @@ class GatewayStreamConsumer:
|
||||
if text:
|
||||
self._queue.put((_COMMENTARY, text))
|
||||
|
||||
def _notify_new_message(self) -> None:
|
||||
"""Fire the on_new_message callback, swallowing any errors."""
|
||||
cb = self._on_new_message
|
||||
if cb is None:
|
||||
return
|
||||
try:
|
||||
cb()
|
||||
except Exception:
|
||||
logger.debug("on_new_message callback error", exc_info=True)
|
||||
|
||||
def _reset_segment_state(self, *, preserve_no_edit: bool = False) -> None:
|
||||
if preserve_no_edit and self._message_id == "__no_edit__":
|
||||
return
|
||||
@@ -529,6 +548,9 @@ class GatewayStreamConsumer:
|
||||
self._message_id = str(result.message_id)
|
||||
self._already_sent = True
|
||||
self._last_sent_text = text
|
||||
# Fresh content bubble — close off any stale tool bubble
|
||||
# above so the next tool starts a new bubble below.
|
||||
self._notify_new_message()
|
||||
return str(result.message_id)
|
||||
else:
|
||||
self._edit_supported = False
|
||||
@@ -661,6 +683,9 @@ class GatewayStreamConsumer:
|
||||
sent_any_chunk = True
|
||||
last_successful_chunk = chunk
|
||||
last_message_id = result.message_id or last_message_id
|
||||
# Each fallback chunk is a fresh platform message — notify
|
||||
# so any stale tool-progress bubble gets closed off.
|
||||
self._notify_new_message()
|
||||
|
||||
self._message_id = last_message_id
|
||||
self._already_sent = True
|
||||
@@ -744,6 +769,11 @@ class GatewayStreamConsumer:
|
||||
# tool..."), not the final response. Setting already_sent would cause
|
||||
# the final response to be incorrectly suppressed when there are
|
||||
# multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
|
||||
if result.success:
|
||||
# Commentary counts as fresh content — close off any
|
||||
# stale tool bubble above it so the next tool starts a
|
||||
# new bubble below.
|
||||
self._notify_new_message()
|
||||
return result.success
|
||||
except Exception as e:
|
||||
logger.error("Commentary send error: %s", e)
|
||||
@@ -973,6 +1003,11 @@ class GatewayStreamConsumer:
|
||||
# every delta/tool boundary when platforms accept a
|
||||
# message but do not return an editable message id.
|
||||
self._message_id = "__no_edit__"
|
||||
# Notify the gateway that a fresh content bubble was
|
||||
# created so any accumulated tool-progress bubble above
|
||||
# gets closed off — the next tool fires into a new
|
||||
# bubble below, preserving chronological order.
|
||||
self._notify_new_message()
|
||||
return True
|
||||
else:
|
||||
# Initial send failed — disable streaming for this session
|
||||
|
||||
@@ -0,0 +1,373 @@
|
||||
"""
|
||||
Top-level argparse construction for the hermes CLI.
|
||||
|
||||
Lives in its own module so other modules (e.g. ``relaunch.py``) can
|
||||
introspect the parser to discover which flags exist without running the
|
||||
``main`` fn.
|
||||
|
||||
Only the top-level parser and the ``chat`` subparser live here. Every other
|
||||
subparser (model, gateway, sessions, …) is built inline in ``main.py``
|
||||
because its dispatch is tightly coupled to module-level ``cmd_*`` functions.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
|
||||
# `--profile` / `-p` is consumed by ``main._apply_profile_override`` before
|
||||
# argparse runs (it sets ``HERMES_HOME`` and strips itself from ``sys.argv``),
|
||||
# so it isn't on the parser. Listed here so all "carry over on relaunch"
|
||||
# metadata lives in one file.
|
||||
PRE_ARGPARSE_INHERITED_FLAGS: list[tuple[str, bool]] = [
|
||||
("--profile", True),
|
||||
("-p", True),
|
||||
]
|
||||
|
||||
|
||||
def _inherited_flag(parser, *args, **kwargs):
|
||||
"""Register a flag that ``hermes_cli.relaunch`` should carry over when
|
||||
the CLI re-execs itself (e.g. after ``sessions browse`` picks a session,
|
||||
or after the setup wizard launches chat).
|
||||
|
||||
Equivalent to ``parser.add_argument(...)`` plus tagging the resulting
|
||||
Action with ``inherit_on_relaunch = True`` so the relaunch table builder
|
||||
can find it via introspection.
|
||||
"""
|
||||
action = parser.add_argument(*args, **kwargs)
|
||||
action.inherit_on_relaunch = True
|
||||
return action
|
||||
|
||||
|
||||
_EPILOGUE = """
|
||||
Examples:
|
||||
hermes Start interactive chat
|
||||
hermes chat -q "Hello" Single query mode
|
||||
hermes -c Resume the most recent session
|
||||
hermes -c "my project" Resume a session by name (latest in lineage)
|
||||
hermes --resume <session_id> Resume a specific session by ID
|
||||
hermes setup Run setup wizard
|
||||
hermes logout Clear stored authentication
|
||||
hermes auth add <provider> Add a pooled credential
|
||||
hermes auth list List pooled credentials
|
||||
hermes auth remove <p> <t> Remove pooled credential by index, id, or label
|
||||
hermes auth reset <provider> Clear exhaustion status for a provider
|
||||
hermes model Select default model
|
||||
hermes fallback [list] Show fallback provider chain
|
||||
hermes fallback add Add a fallback provider (same picker as `hermes model`)
|
||||
hermes fallback remove Remove a fallback provider from the chain
|
||||
hermes config View configuration
|
||||
hermes config edit Edit config in $EDITOR
|
||||
hermes config set model gpt-4 Set a config value
|
||||
hermes gateway Run messaging gateway
|
||||
hermes -s hermes-agent-dev,github-auth
|
||||
hermes -w Start in isolated git worktree
|
||||
hermes gateway install Install gateway background service
|
||||
hermes sessions list List past sessions
|
||||
hermes sessions browse Interactive session picker
|
||||
hermes sessions rename ID T Rename/title a session
|
||||
hermes logs View agent.log (last 50 lines)
|
||||
hermes logs -f Follow agent.log in real time
|
||||
hermes logs errors View errors.log
|
||||
hermes logs --since 1h Lines from the last hour
|
||||
hermes debug share Upload debug report for support
|
||||
hermes update Update to latest version
|
||||
|
||||
For more help on a command:
|
||||
hermes <command> --help
|
||||
"""
|
||||
|
||||
|
||||
def build_top_level_parser():
|
||||
"""Build the top-level parser, the subparsers action, and the ``chat`` subparser.
|
||||
|
||||
Returns ``(parser, subparsers, chat_parser)``. The caller wires
|
||||
``chat_parser.set_defaults(func=cmd_chat)`` and continues registering
|
||||
other subparsers via ``subparsers.add_parser(...)``.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="hermes",
|
||||
description="Hermes Agent - AI assistant with tool-calling capabilities",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=_EPILOGUE,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--version", "-V", action="store_true", help="Show version and exit"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-z",
|
||||
"--oneshot",
|
||||
metavar="PROMPT",
|
||||
default=None,
|
||||
help=(
|
||||
"One-shot mode: send a single prompt and print ONLY the final "
|
||||
"response text to stdout. No banner, no spinner, no tool "
|
||||
"previews, no session_id line. Tools, memory, rules, and "
|
||||
"AGENTS.md in the CWD are loaded as normal; approvals are "
|
||||
"auto-bypassed. Intended for scripts / pipes."
|
||||
),
|
||||
)
|
||||
# --model / --provider are accepted at the top level so they can pair
|
||||
# with -z without needing the `chat` subcommand. If neither -z nor a
|
||||
# subcommand consumes them, they fall through harmlessly as None.
|
||||
# Mirrors `hermes chat --model ... --provider ...` semantics.
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"-m",
|
||||
"--model",
|
||||
default=None,
|
||||
help=(
|
||||
"Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
|
||||
),
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--provider",
|
||||
default=None,
|
||||
help=(
|
||||
"Provider override for this invocation (e.g. openrouter, anthropic). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--toolsets",
|
||||
default=None,
|
||||
help="Comma-separated toolsets to enable for this invocation. Applies to -z/--oneshot and --tui.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
"-r",
|
||||
metavar="SESSION",
|
||||
default=None,
|
||||
help="Resume a previous session by ID or title",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--continue",
|
||||
"-c",
|
||||
dest="continue_last",
|
||||
nargs="?",
|
||||
const=True,
|
||||
default=None,
|
||||
metavar="SESSION_NAME",
|
||||
help="Resume a session by name, or the most recent if no name given",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--worktree",
|
||||
"-w",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Run in an isolated git worktree (for parallel agents)",
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--accept-hooks",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Auto-approve any unseen shell hooks declared in config.yaml "
|
||||
"without a TTY prompt. Equivalent to HERMES_ACCEPT_HOOKS=1 or "
|
||||
"hooks_auto_accept: true in config.yaml. Use on CI / headless "
|
||||
"runs that can't prompt."
|
||||
),
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--skills",
|
||||
"-s",
|
||||
action="append",
|
||||
default=None,
|
||||
help="Preload one or more skills for the session (repeat flag or comma-separate)",
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--yolo",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Bypass all dangerous command approval prompts (use at your own risk)",
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--pass-session-id",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Include the session ID in the agent's system prompt",
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--ignore-user-config",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--ignore-rules",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--tui",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Launch the modern TUI instead of the classic REPL",
|
||||
)
|
||||
_inherited_flag(
|
||||
parser,
|
||||
"--dev",
|
||||
dest="tui_dev",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="With --tui: run TypeScript sources via tsx (skip dist build)",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
||||
|
||||
# =========================================================================
|
||||
# chat command
|
||||
# =========================================================================
|
||||
chat_parser = subparsers.add_parser(
|
||||
"chat",
|
||||
help="Interactive chat with the agent",
|
||||
description="Start an interactive chat session with Hermes Agent",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-q", "--query", help="Single query (non-interactive mode)"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--image", help="Optional local image path to attach to a single query"
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"-m", "--model", help="Model to use (e.g., anthropic/claude-sonnet-4)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-t", "--toolsets", help="Comma-separated toolsets to enable"
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"-s",
|
||||
"--skills",
|
||||
action="append",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Preload one or more skills for the session (repeat flag or comma-separate)",
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"--provider",
|
||||
# No `choices=` here: user-defined providers from config.yaml `providers:`
|
||||
# are also valid values, and runtime resolution (resolve_runtime_provider)
|
||||
# handles validation/error reporting consistently with the top-level
|
||||
# `--provider` flag.
|
||||
default=None,
|
||||
help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Verbose output"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-Q",
|
||||
"--quiet",
|
||||
action="store_true",
|
||||
help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info.",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--resume",
|
||||
"-r",
|
||||
metavar="SESSION_ID",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Resume a previous session by ID (shown on exit)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--continue",
|
||||
"-c",
|
||||
dest="continue_last",
|
||||
nargs="?",
|
||||
const=True,
|
||||
default=argparse.SUPPRESS,
|
||||
metavar="SESSION_NAME",
|
||||
help="Resume a session by name, or the most recent if no name given",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--worktree",
|
||||
"-w",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Run in an isolated git worktree (for parallel agents on the same repo)",
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"--accept-hooks",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help=(
|
||||
"Auto-approve any unseen shell hooks declared in config.yaml "
|
||||
"without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
|
||||
"hooks_auto_accept: in config.yaml)."
|
||||
),
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--checkpoints",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--max-turns",
|
||||
type=int,
|
||||
default=None,
|
||||
metavar="N",
|
||||
help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)",
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"--yolo",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Bypass all dangerous command approval prompts (use at your own risk)",
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"--pass-session-id",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Include the session ID in the agent's system prompt",
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"--ignore-user-config",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"--ignore-rules",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--source",
|
||||
default=None,
|
||||
help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists.",
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"--tui",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Launch the modern TUI instead of the classic REPL",
|
||||
)
|
||||
_inherited_flag(
|
||||
chat_parser,
|
||||
"--dev",
|
||||
dest="tui_dev",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="With --tui: run TypeScript sources via tsx (skip dist build)",
|
||||
)
|
||||
|
||||
return parser, subparsers, chat_parser
|
||||
+380
-5
@@ -43,6 +43,7 @@ import yaml
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -71,6 +72,14 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60 # 30 minutes
|
||||
ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 # refresh 2 min before expiry
|
||||
DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1 # poll at most every 1s
|
||||
DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113"
|
||||
MINIMAX_OAUTH_SCOPE = "group_id profile model.completion"
|
||||
MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code"
|
||||
MINIMAX_OAUTH_GLOBAL_BASE = "https://api.minimax.io"
|
||||
MINIMAX_OAUTH_CN_BASE = "https://api.minimaxi.com"
|
||||
MINIMAX_OAUTH_GLOBAL_INFERENCE = "https://api.minimax.io/anthropic"
|
||||
MINIMAX_OAUTH_CN_INFERENCE = "https://api.minimaxi.com/anthropic"
|
||||
MINIMAX_OAUTH_REFRESH_SKEW_SECONDS = 60
|
||||
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
|
||||
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
|
||||
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
|
||||
@@ -109,6 +118,12 @@ SERVICE_PROVIDER_NAMES: Dict[str, str] = {
|
||||
DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
|
||||
GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60 # refresh 60s before expiry
|
||||
|
||||
# LM Studio's default no-auth mode still requires *some* non-empty bearer for
|
||||
# the API-key code paths (auxiliary_client, runtime resolver) to treat the
|
||||
# provider as configured. This sentinel is sent only to LM Studio, never to
|
||||
# any remote service.
|
||||
LMSTUDIO_NOAUTH_PLACEHOLDER = "dummy-lm-api-key"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Provider Registry
|
||||
@@ -119,7 +134,7 @@ class ProviderConfig:
|
||||
"""Describes a known inference provider."""
|
||||
id: str
|
||||
name: str
|
||||
auth_type: str # "oauth_device_code", "oauth_external", or "api_key"
|
||||
auth_type: str # "oauth_device_code", "oauth_external", "oauth_minimax", or "api_key"
|
||||
portal_base_url: str = ""
|
||||
inference_base_url: str = ""
|
||||
client_id: str = ""
|
||||
@@ -159,6 +174,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
auth_type="oauth_external",
|
||||
inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
|
||||
),
|
||||
"lmstudio": ProviderConfig(
|
||||
id="lmstudio",
|
||||
name="LM Studio",
|
||||
auth_type="api_key",
|
||||
inference_base_url="http://127.0.0.1:1234/v1",
|
||||
api_key_env_vars=("LM_API_KEY",),
|
||||
base_url_env_var="LM_BASE_URL",
|
||||
),
|
||||
"copilot": ProviderConfig(
|
||||
id="copilot",
|
||||
name="GitHub Copilot",
|
||||
@@ -240,6 +263,17 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("MINIMAX_API_KEY",),
|
||||
base_url_env_var="MINIMAX_BASE_URL",
|
||||
),
|
||||
"minimax-oauth": ProviderConfig(
|
||||
id="minimax-oauth",
|
||||
name="MiniMax (OAuth \u00b7 minimax.io)",
|
||||
auth_type="oauth_minimax",
|
||||
portal_base_url=MINIMAX_OAUTH_GLOBAL_BASE,
|
||||
inference_base_url=MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
client_id=MINIMAX_OAUTH_CLIENT_ID,
|
||||
scope=MINIMAX_OAUTH_SCOPE,
|
||||
extra={"region": "global", "cn_portal_base_url": MINIMAX_OAUTH_CN_BASE,
|
||||
"cn_inference_base_url": MINIMAX_OAUTH_CN_INFERENCE},
|
||||
),
|
||||
"anthropic": ProviderConfig(
|
||||
id="anthropic",
|
||||
name="Anthropic",
|
||||
@@ -348,6 +382,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("XIAOMI_API_KEY",),
|
||||
base_url_env_var="XIAOMI_BASE_URL",
|
||||
),
|
||||
"tencent-tokenhub": ProviderConfig(
|
||||
id="tencent-tokenhub",
|
||||
name="Tencent TokenHub",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://tokenhub.tencentmaas.com/v1",
|
||||
api_key_env_vars=("TOKENHUB_API_KEY",),
|
||||
base_url_env_var="TOKENHUB_BASE_URL",
|
||||
),
|
||||
"ollama-cloud": ProviderConfig(
|
||||
id="ollama-cloud",
|
||||
name="Ollama Cloud",
|
||||
@@ -820,7 +862,7 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
|
||||
handle.write(payload)
|
||||
handle.flush()
|
||||
os.fsync(handle.fileno())
|
||||
os.replace(tmp_path, auth_file)
|
||||
atomic_replace(tmp_path, auth_file)
|
||||
try:
|
||||
dir_fd = os.open(str(auth_file.parent), os.O_RDONLY)
|
||||
except OSError:
|
||||
@@ -1130,6 +1172,7 @@ def resolve_provider(
|
||||
"arcee-ai": "arcee", "arceeai": "arcee",
|
||||
"gmi-cloud": "gmi", "gmicloud": "gmi",
|
||||
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
||||
"minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth",
|
||||
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
|
||||
"alibaba_coding_plan": "alibaba-coding-plan",
|
||||
"claude": "anthropic", "claude-code": "anthropic",
|
||||
@@ -1141,11 +1184,13 @@ def resolve_provider(
|
||||
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
|
||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
|
||||
"tencent": "tencent-tokenhub", "tokenhub": "tencent-tokenhub",
|
||||
"tencent-cloud": "tencent-tokenhub", "tencentmaas": "tencent-tokenhub",
|
||||
"aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
|
||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
"lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio",
|
||||
# Local server aliases — route through the generic custom provider
|
||||
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
|
||||
"ollama": "custom", "ollama_cloud": "ollama-cloud",
|
||||
"vllm": "custom", "llamacpp": "custom",
|
||||
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||
@@ -1192,8 +1237,11 @@ def resolve_provider(
|
||||
continue
|
||||
# GitHub tokens are commonly present for repo/tool access but should not
|
||||
# hijack inference auto-selection unless the user explicitly chooses
|
||||
# Copilot/GitHub Models as the provider.
|
||||
if pid == "copilot":
|
||||
# Copilot/GitHub Models as the provider. LM Studio is a local server
|
||||
# whose availability isn't implied by LM_API_KEY presence (it may be
|
||||
# offline, and the no-auth setup uses a placeholder value), so it
|
||||
# also requires explicit selection.
|
||||
if pid in ("copilot", "lmstudio"):
|
||||
continue
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if has_usable_secret(os.getenv(env_var, "")):
|
||||
@@ -3471,6 +3519,13 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
|
||||
key_source = ""
|
||||
api_key, key_source = _resolve_api_key_provider_secret(provider_id, pconfig)
|
||||
|
||||
# No-auth LM Studio: substitute a placeholder so runtime / auxiliary_client
|
||||
# see the local server as configured. doctor still reports unconfigured
|
||||
# because get_api_key_provider_status uses the raw secret resolver.
|
||||
if not api_key and provider_id == "lmstudio":
|
||||
api_key = LMSTUDIO_NOAUTH_PLACEHOLDER
|
||||
key_source = key_source or "default"
|
||||
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
|
||||
@@ -4081,6 +4136,326 @@ def _codex_device_code_login() -> Dict[str, Any]:
|
||||
}
|
||||
|
||||
|
||||
# ==================== MiniMax Portal OAuth ====================
|
||||
|
||||
def _minimax_pkce_pair() -> tuple:
|
||||
"""Generate (code_verifier, code_challenge_S256, state) for MiniMax OAuth."""
|
||||
import secrets
|
||||
verifier = secrets.token_urlsafe(64)[:96]
|
||||
challenge = base64.urlsafe_b64encode(
|
||||
hashlib.sha256(verifier.encode()).digest()
|
||||
).decode().rstrip("=")
|
||||
state = secrets.token_urlsafe(16)
|
||||
return verifier, challenge, state
|
||||
|
||||
|
||||
def _minimax_request_user_code(
|
||||
client: httpx.Client, *, portal_base_url: str, client_id: str,
|
||||
code_challenge: str, state: str,
|
||||
) -> Dict[str, Any]:
|
||||
response = client.post(
|
||||
f"{portal_base_url}/oauth/code",
|
||||
data={
|
||||
"response_type": "code",
|
||||
"client_id": client_id,
|
||||
"scope": MINIMAX_OAUTH_SCOPE,
|
||||
"code_challenge": code_challenge,
|
||||
"code_challenge_method": "S256",
|
||||
"state": state,
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept": "application/json",
|
||||
"x-request-id": str(uuid.uuid4()),
|
||||
},
|
||||
)
|
||||
if response.status_code != 200:
|
||||
raise AuthError(
|
||||
f"MiniMax OAuth authorization failed: {response.text or response.reason_phrase}",
|
||||
provider="minimax-oauth", code="authorization_failed",
|
||||
)
|
||||
payload = response.json()
|
||||
for field in ("user_code", "verification_uri", "expired_in"):
|
||||
if field not in payload:
|
||||
raise AuthError(
|
||||
f"MiniMax OAuth response missing field: {field}",
|
||||
provider="minimax-oauth", code="authorization_incomplete",
|
||||
)
|
||||
if payload.get("state") != state:
|
||||
raise AuthError(
|
||||
"MiniMax OAuth state mismatch (possible CSRF).",
|
||||
provider="minimax-oauth", code="state_mismatch",
|
||||
)
|
||||
return payload
|
||||
|
||||
|
||||
def _minimax_poll_token(
|
||||
client: httpx.Client, *, portal_base_url: str, client_id: str,
|
||||
user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
|
||||
) -> Dict[str, Any]:
|
||||
# OpenClaw treats expired_in as a unix-ms timestamp (Date.now() < expireTimeMs).
|
||||
# Defensive parsing: if it's small enough to be a duration, treat as seconds.
|
||||
import time as _time
|
||||
now_ms = int(_time.time() * 1000)
|
||||
if expired_in > now_ms // 2:
|
||||
# Looks like a unix-ms timestamp.
|
||||
deadline = expired_in / 1000.0
|
||||
else:
|
||||
# Treat as duration in seconds from now.
|
||||
deadline = _time.time() + max(1, expired_in)
|
||||
interval = max(2.0, (interval_ms or 2000) / 1000.0)
|
||||
|
||||
while _time.time() < deadline:
|
||||
response = client.post(
|
||||
f"{portal_base_url}/oauth/token",
|
||||
data={
|
||||
"grant_type": MINIMAX_OAUTH_GRANT_TYPE,
|
||||
"client_id": client_id,
|
||||
"user_code": user_code,
|
||||
"code_verifier": code_verifier,
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
try:
|
||||
payload = response.json() if response.text else {}
|
||||
except Exception:
|
||||
payload = {}
|
||||
|
||||
if response.status_code != 200:
|
||||
msg = (payload.get("base_resp", {}) or {}).get("status_msg") or response.text
|
||||
raise AuthError(
|
||||
f"MiniMax OAuth error: {msg or 'unknown'}",
|
||||
provider="minimax-oauth", code="token_exchange_failed",
|
||||
)
|
||||
|
||||
status = payload.get("status")
|
||||
if status == "error":
|
||||
raise AuthError(
|
||||
"MiniMax OAuth reported an error. Please try again later.",
|
||||
provider="minimax-oauth", code="authorization_denied",
|
||||
)
|
||||
if status == "success":
|
||||
if not all(payload.get(k) for k in ("access_token", "refresh_token", "expired_in")):
|
||||
raise AuthError(
|
||||
"MiniMax OAuth success payload missing required token fields.",
|
||||
provider="minimax-oauth", code="token_incomplete",
|
||||
)
|
||||
return payload
|
||||
# "pending" or any other status -> keep polling
|
||||
_time.sleep(interval)
|
||||
|
||||
raise AuthError(
|
||||
"MiniMax OAuth timed out before authorization completed.",
|
||||
provider="minimax-oauth", code="timeout",
|
||||
)
|
||||
|
||||
|
||||
def _minimax_save_auth_state(auth_state: Dict[str, Any]) -> None:
|
||||
"""Persist MiniMax OAuth state to Hermes auth store (~/.hermes/auth.json)."""
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
_save_provider_state(auth_store, "minimax-oauth", auth_state)
|
||||
_save_auth_store(auth_store)
|
||||
|
||||
|
||||
def _minimax_oauth_login(
|
||||
*, region: str = "global", open_browser: bool = True,
|
||||
timeout_seconds: float = 15.0,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run MiniMax OAuth flow, persist tokens, return auth state dict."""
|
||||
pconfig = PROVIDER_REGISTRY["minimax-oauth"]
|
||||
if region == "cn":
|
||||
portal_base_url = pconfig.extra["cn_portal_base_url"]
|
||||
inference_base_url = pconfig.extra["cn_inference_base_url"]
|
||||
else:
|
||||
portal_base_url = pconfig.portal_base_url
|
||||
inference_base_url = pconfig.inference_base_url
|
||||
|
||||
verifier, challenge, state = _minimax_pkce_pair()
|
||||
|
||||
if _is_remote_session():
|
||||
open_browser = False
|
||||
|
||||
print(f"Starting Hermes login via MiniMax ({region}) OAuth...")
|
||||
print(f"Portal: {portal_base_url}")
|
||||
|
||||
with httpx.Client(timeout=httpx.Timeout(timeout_seconds),
|
||||
headers={"Accept": "application/json"}) as client:
|
||||
code_data = _minimax_request_user_code(
|
||||
client, portal_base_url=portal_base_url,
|
||||
client_id=pconfig.client_id,
|
||||
code_challenge=challenge, state=state,
|
||||
)
|
||||
verification_url = str(code_data["verification_uri"])
|
||||
user_code = str(code_data["user_code"])
|
||||
|
||||
print()
|
||||
print("To continue:")
|
||||
print(f" 1. Open: {verification_url}")
|
||||
print(f" 2. If prompted, enter code: {user_code}")
|
||||
if open_browser:
|
||||
if webbrowser.open(verification_url):
|
||||
print(" (Opened browser for verification)")
|
||||
else:
|
||||
print(" Could not open browser automatically -- use the URL above.")
|
||||
|
||||
interval_raw = code_data.get("interval")
|
||||
interval_ms = int(interval_raw) if interval_raw is not None else None
|
||||
print("Waiting for approval...")
|
||||
|
||||
token_data = _minimax_poll_token(
|
||||
client, portal_base_url=portal_base_url,
|
||||
client_id=pconfig.client_id,
|
||||
user_code=user_code, code_verifier=verifier,
|
||||
expired_in=int(code_data["expired_in"]),
|
||||
interval_ms=interval_ms,
|
||||
)
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
expires_in_s = int(token_data["expired_in"])
|
||||
expires_at = now.timestamp() + expires_in_s
|
||||
|
||||
auth_state = {
|
||||
"provider": "minimax-oauth",
|
||||
"region": region,
|
||||
"portal_base_url": portal_base_url,
|
||||
"inference_base_url": inference_base_url,
|
||||
"client_id": pconfig.client_id,
|
||||
"scope": MINIMAX_OAUTH_SCOPE,
|
||||
"token_type": token_data.get("token_type", "Bearer"),
|
||||
"access_token": token_data["access_token"],
|
||||
"refresh_token": token_data["refresh_token"],
|
||||
"resource_url": token_data.get("resource_url"),
|
||||
"obtained_at": now.isoformat(),
|
||||
"expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
|
||||
"expires_in": expires_in_s,
|
||||
}
|
||||
|
||||
_minimax_save_auth_state(auth_state)
|
||||
print("\u2713 MiniMax OAuth login successful.")
|
||||
if msg := token_data.get("notification_message"):
|
||||
print(f"Note from MiniMax: {msg}")
|
||||
return auth_state
|
||||
|
||||
|
||||
def _refresh_minimax_oauth_state(
|
||||
state: Dict[str, Any], *, timeout_seconds: float = 15.0,
|
||||
force: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Refresh MiniMax OAuth access token if close to expiry (or forced)."""
|
||||
if not state.get("refresh_token"):
|
||||
raise AuthError(
|
||||
"MiniMax OAuth state has no refresh_token; please re-login.",
|
||||
provider="minimax-oauth", code="no_refresh_token", relogin_required=True,
|
||||
)
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
|
||||
except Exception:
|
||||
expires_at = 0.0
|
||||
now = time.time()
|
||||
if not force and (expires_at - now) > MINIMAX_OAUTH_REFRESH_SKEW_SECONDS:
|
||||
return state
|
||||
|
||||
portal_base_url = state["portal_base_url"]
|
||||
with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client:
|
||||
response = client.post(
|
||||
f"{portal_base_url}/oauth/token",
|
||||
data={
|
||||
"grant_type": "refresh_token",
|
||||
"client_id": state["client_id"],
|
||||
"refresh_token": state["refresh_token"],
|
||||
},
|
||||
headers={
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
if response.status_code != 200:
|
||||
body = response.text.lower()
|
||||
relogin = any(m in body for m in
|
||||
("invalid_grant", "refresh_token_reused", "invalid_refresh_token"))
|
||||
raise AuthError(
|
||||
f"MiniMax OAuth refresh failed: {response.text or response.reason_phrase}",
|
||||
provider="minimax-oauth", code="refresh_failed",
|
||||
relogin_required=relogin,
|
||||
)
|
||||
payload = response.json()
|
||||
if payload.get("status") != "success":
|
||||
raise AuthError(
|
||||
"MiniMax OAuth refresh did not return success.",
|
||||
provider="minimax-oauth", code="refresh_failed",
|
||||
relogin_required=True,
|
||||
)
|
||||
now_dt = datetime.now(timezone.utc)
|
||||
expires_in_s = int(payload["expired_in"])
|
||||
new_state = dict(state)
|
||||
new_state.update({
|
||||
"access_token": payload["access_token"],
|
||||
"refresh_token": payload.get("refresh_token", state["refresh_token"]),
|
||||
"obtained_at": now_dt.isoformat(),
|
||||
"expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
|
||||
tz=timezone.utc).isoformat(),
|
||||
"expires_in": expires_in_s,
|
||||
})
|
||||
_minimax_save_auth_state(new_state)
|
||||
return new_state
|
||||
|
||||
|
||||
def resolve_minimax_oauth_runtime_credentials(
|
||||
*, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
|
||||
) -> Dict[str, Any]:
|
||||
"""Return {provider, api_key, base_url, source} for minimax-oauth."""
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if not state or not state.get("access_token"):
|
||||
raise AuthError(
|
||||
"Not logged into MiniMax OAuth. Run `hermes model` and select "
|
||||
"MiniMax (OAuth).",
|
||||
provider="minimax-oauth", code="not_logged_in", relogin_required=True,
|
||||
)
|
||||
state = _refresh_minimax_oauth_state(state)
|
||||
return {
|
||||
"provider": "minimax-oauth",
|
||||
"api_key": state["access_token"],
|
||||
"base_url": state["inference_base_url"].rstrip("/"),
|
||||
"source": "oauth",
|
||||
}
|
||||
|
||||
|
||||
def get_minimax_oauth_auth_status() -> Dict[str, Any]:
|
||||
"""Return auth status dict for MiniMax OAuth provider."""
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if not state or not state.get("access_token"):
|
||||
return {"logged_in": False, "provider": "minimax-oauth"}
|
||||
try:
|
||||
expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
|
||||
token_valid = (expires_at - time.time()) > 0
|
||||
except Exception:
|
||||
token_valid = bool(state.get("access_token"))
|
||||
return {
|
||||
"logged_in": token_valid,
|
||||
"provider": "minimax-oauth",
|
||||
"region": state.get("region", "global"),
|
||||
"expires_at": state.get("expires_at"),
|
||||
}
|
||||
|
||||
|
||||
def _login_minimax_oauth(args, pconfig: ProviderConfig) -> None:
|
||||
"""CLI entry for MiniMax OAuth login."""
|
||||
region = getattr(args, "region", None) or "global"
|
||||
open_browser = not getattr(args, "no_browser", False)
|
||||
timeout = getattr(args, "timeout", None) or 15.0
|
||||
try:
|
||||
_minimax_oauth_login(
|
||||
region=region, open_browser=open_browser, timeout_seconds=timeout,
|
||||
)
|
||||
except AuthError as exc:
|
||||
print(format_auth_error(exc))
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
def _nous_device_code_login(
|
||||
*,
|
||||
portal_base_url: Optional[str] = None,
|
||||
|
||||
@@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL
|
||||
|
||||
|
||||
# Providers that support OAuth login in addition to API keys.
|
||||
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}
|
||||
_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}
|
||||
|
||||
|
||||
def _get_custom_provider_names() -> list:
|
||||
@@ -170,7 +170,7 @@ def auth_add_command(args) -> None:
|
||||
if provider.startswith(CUSTOM_POOL_PREFIX):
|
||||
requested_type = AUTH_TYPE_API_KEY
|
||||
else:
|
||||
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY
|
||||
requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY
|
||||
|
||||
pool = load_pool(provider)
|
||||
|
||||
@@ -333,6 +333,27 @@ def auth_add_command(args) -> None:
|
||||
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
|
||||
return
|
||||
|
||||
if provider == "minimax-oauth":
|
||||
from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
|
||||
creds = resolve_minimax_oauth_runtime_credentials()
|
||||
label = (getattr(args, "label", None) or "").strip() or label_from_token(
|
||||
creds["api_key"],
|
||||
_oauth_default_label(provider, len(pool.entries()) + 1),
|
||||
)
|
||||
entry = PooledCredential(
|
||||
provider=provider,
|
||||
id=uuid.uuid4().hex[:6],
|
||||
label=label,
|
||||
auth_type=AUTH_TYPE_OAUTH,
|
||||
priority=0,
|
||||
source=f"{SOURCE_MANUAL}:minimax_oauth",
|
||||
access_token=creds["api_key"],
|
||||
base_url=creds.get("base_url"),
|
||||
)
|
||||
pool.add_entry(entry)
|
||||
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
|
||||
return
|
||||
|
||||
raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")
|
||||
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
+152
-57
@@ -696,6 +696,78 @@ def run_quick_backup(args) -> None:
|
||||
print("No state files found to snapshot.")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared full-zip backup helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
|
||||
"""Write a full zip snapshot of ``hermes_root`` to ``out_path``.
|
||||
|
||||
Uses the same exclusion rules and SQLite safe-copy as :func:`run_backup`.
|
||||
Returns the output path on success, None on failure (nothing to back up,
|
||||
or write error — caller should surface the outcome but not raise).
|
||||
"""
|
||||
files_to_add: list[tuple[Path, Path]] = []
|
||||
try:
|
||||
for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
|
||||
dp = Path(dirpath)
|
||||
# Prune excluded directories in-place so os.walk doesn't descend
|
||||
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
|
||||
|
||||
for fname in filenames:
|
||||
fpath = dp / fname
|
||||
try:
|
||||
rel = fpath.relative_to(hermes_root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if _should_exclude(rel):
|
||||
continue
|
||||
|
||||
# Skip the output zip itself if it already exists inside root.
|
||||
try:
|
||||
if fpath.resolve() == out_path.resolve():
|
||||
continue
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
files_to_add.append((fpath, rel))
|
||||
except OSError as exc:
|
||||
logger.warning("Full-zip backup: walk failed: %s", exc)
|
||||
return None
|
||||
|
||||
if not files_to_add:
|
||||
return None
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
|
||||
for abs_path, rel_path in files_to_add:
|
||||
try:
|
||||
if abs_path.suffix == ".db":
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
|
||||
tmp_db = Path(tmp.name)
|
||||
try:
|
||||
if _safe_copy_db(abs_path, tmp_db):
|
||||
zf.write(tmp_db, arcname=str(rel_path))
|
||||
finally:
|
||||
tmp_db.unlink(missing_ok=True)
|
||||
else:
|
||||
zf.write(abs_path, arcname=str(rel_path))
|
||||
except (PermissionError, OSError, ValueError) as exc:
|
||||
logger.debug("Skipping %s in zip backup: %s", rel_path, exc)
|
||||
continue
|
||||
except OSError as exc:
|
||||
logger.warning("Full-zip backup: zip write failed: %s", exc)
|
||||
# Best-effort cleanup of partial file
|
||||
try:
|
||||
out_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
return out_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-update auto-backup
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -768,64 +840,87 @@ def create_pre_update_backup(
|
||||
stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
|
||||
out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
|
||||
|
||||
# Collect files (same logic as run_backup, minus the chatty progress prints)
|
||||
files_to_add: list[tuple[Path, Path]] = []
|
||||
try:
|
||||
for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
|
||||
dp = Path(dirpath)
|
||||
# Prune excluded directories in-place so os.walk doesn't descend
|
||||
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
|
||||
|
||||
for fname in filenames:
|
||||
fpath = dp / fname
|
||||
try:
|
||||
rel = fpath.relative_to(hermes_root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if _should_exclude(rel):
|
||||
continue
|
||||
|
||||
# Skip the output zip itself if it already exists
|
||||
try:
|
||||
if fpath.resolve() == out_path.resolve():
|
||||
continue
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
files_to_add.append((fpath, rel))
|
||||
except OSError as exc:
|
||||
logger.warning("Pre-update backup: walk failed: %s", exc)
|
||||
return None
|
||||
|
||||
if not files_to_add:
|
||||
return None
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
|
||||
for abs_path, rel_path in files_to_add:
|
||||
try:
|
||||
if abs_path.suffix == ".db":
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
|
||||
tmp_db = Path(tmp.name)
|
||||
try:
|
||||
if _safe_copy_db(abs_path, tmp_db):
|
||||
zf.write(tmp_db, arcname=str(rel_path))
|
||||
finally:
|
||||
tmp_db.unlink(missing_ok=True)
|
||||
else:
|
||||
zf.write(abs_path, arcname=str(rel_path))
|
||||
except (PermissionError, OSError, ValueError) as exc:
|
||||
logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
|
||||
continue
|
||||
except OSError as exc:
|
||||
logger.warning("Pre-update backup: zip write failed: %s", exc)
|
||||
# Best-effort cleanup of partial file
|
||||
try:
|
||||
out_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
result = _write_full_zip_backup(out_path, hermes_root)
|
||||
if result is None:
|
||||
return None
|
||||
|
||||
_prune_pre_update_backups(backup_dir, keep=keep)
|
||||
return out_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-migration auto-backup (used by `hermes claw migrate`)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PRE_MIGRATION_PREFIX = "pre-migration-"
|
||||
_PRE_MIGRATION_DEFAULT_KEEP = 5
|
||||
|
||||
|
||||
def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
|
||||
"""Remove oldest pre-migration backups beyond the keep limit.
|
||||
|
||||
Only touches files matching ``pre-migration-*.zip`` so other backups in
|
||||
the same directory are never touched.
|
||||
"""
|
||||
if keep < 0:
|
||||
keep = 0
|
||||
if not backup_dir.exists():
|
||||
return 0
|
||||
|
||||
backups = sorted(
|
||||
(p for p in backup_dir.iterdir()
|
||||
if p.is_file() and p.name.startswith(_PRE_MIGRATION_PREFIX) and p.suffix.lower() == ".zip"),
|
||||
key=lambda p: p.name,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
deleted = 0
|
||||
for p in backups[keep:]:
|
||||
try:
|
||||
p.unlink()
|
||||
deleted += 1
|
||||
except OSError as exc:
|
||||
logger.warning("Failed to prune pre-migration backup %s: %s", p.name, exc)
|
||||
|
||||
return deleted
|
||||
|
||||
|
||||
def create_pre_migration_backup(
|
||||
hermes_home: Optional[Path] = None,
|
||||
keep: int = _PRE_MIGRATION_DEFAULT_KEEP,
|
||||
) -> Optional[Path]:
|
||||
"""Create a full zip backup of HERMES_HOME under ``backups/`` before a
|
||||
``hermes claw migrate`` apply.
|
||||
|
||||
Shares implementation with :func:`create_pre_update_backup` via
|
||||
``_write_full_zip_backup`` — same exclusions, same SQLite safe-copy,
|
||||
restorable with ``hermes import <archive>``. Writes to
|
||||
``<HERMES_HOME>/backups/pre-migration-<timestamp>.zip`` and auto-prunes
|
||||
old pre-migration backups.
|
||||
|
||||
Returns the path to the created zip, or ``None`` if nothing was found
|
||||
to back up (fresh install) or the write failed. Never raises — the
|
||||
caller decides whether to abort or proceed.
|
||||
"""
|
||||
hermes_root = hermes_home or get_default_hermes_root()
|
||||
if not hermes_root.is_dir():
|
||||
return None
|
||||
|
||||
# Reuses the shared backups/ directory so `hermes import` and the
|
||||
# update-backup listing pick up pre-migration archives too.
|
||||
backup_dir = _pre_update_backup_dir(hermes_root)
|
||||
try:
|
||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as exc:
|
||||
logger.warning("Could not create pre-migration backup dir %s: %s", backup_dir, exc)
|
||||
return None
|
||||
|
||||
stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
|
||||
out_path = backup_dir / f"{_PRE_MIGRATION_PREFIX}{stamp}.zip"
|
||||
|
||||
result = _write_full_zip_backup(out_path, hermes_root)
|
||||
if result is None:
|
||||
return None
|
||||
|
||||
_prune_pre_migration_backups(backup_dir, keep=keep)
|
||||
return out_path
|
||||
|
||||
+85
-38
@@ -5,6 +5,7 @@ Pure display functions with no HermesCLI state dependency.
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import threading
|
||||
@@ -122,35 +123,36 @@ def get_available_skills() -> Dict[str, List[str]]:
|
||||
# Cache update check results for 6 hours to avoid repeated git fetches
|
||||
_UPDATE_CHECK_CACHE_SECONDS = 6 * 3600
|
||||
|
||||
# Sentinel returned when we know an update exists but can't count commits
|
||||
# (e.g. nix-built hermes — no local git history to count against).
|
||||
UPDATE_AVAILABLE_NO_COUNT = -1
|
||||
|
||||
def check_for_updates() -> Optional[int]:
|
||||
"""Check how many commits behind origin/main the local repo is.
|
||||
_UPSTREAM_REPO_URL = "https://github.com/NousResearch/hermes-agent.git"
|
||||
|
||||
Does a ``git fetch`` at most once every 6 hours (cached to
|
||||
``~/.hermes/.update_check``). Returns the number of commits behind,
|
||||
or ``None`` if the check fails or isn't applicable.
|
||||
|
||||
def _check_via_rev(local_rev: str) -> Optional[int]:
|
||||
"""Compare an embedded git revision to upstream main via ls-remote.
|
||||
|
||||
Returns 0 if up-to-date, ``UPDATE_AVAILABLE_NO_COUNT`` if behind,
|
||||
or ``None`` on failure.
|
||||
"""
|
||||
hermes_home = get_hermes_home()
|
||||
repo_dir = hermes_home / "hermes-agent"
|
||||
cache_file = hermes_home / ".update_check"
|
||||
|
||||
# Must be a git repo — fall back to project root for dev installs
|
||||
if not (repo_dir / ".git").exists():
|
||||
repo_dir = Path(__file__).parent.parent.resolve()
|
||||
if not (repo_dir / ".git").exists():
|
||||
return None
|
||||
|
||||
# Read cache
|
||||
now = time.time()
|
||||
try:
|
||||
if cache_file.exists():
|
||||
cached = json.loads(cache_file.read_text())
|
||||
if now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS:
|
||||
return cached.get("behind")
|
||||
result = subprocess.run(
|
||||
["git", "ls-remote", _UPSTREAM_REPO_URL, "refs/heads/main"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
if result.returncode != 0 or not result.stdout:
|
||||
return None
|
||||
upstream_rev = result.stdout.split()[0]
|
||||
if not upstream_rev:
|
||||
return None
|
||||
return 0 if upstream_rev == local_rev else UPDATE_AVAILABLE_NO_COUNT
|
||||
|
||||
# Fetch latest refs (fast — only downloads ref metadata, no files)
|
||||
|
||||
def _check_via_local_git(repo_dir: Path) -> Optional[int]:
|
||||
"""Count commits behind origin/main in a local checkout."""
|
||||
try:
|
||||
subprocess.run(
|
||||
["git", "fetch", "origin", "--quiet"],
|
||||
@@ -160,7 +162,6 @@ def check_for_updates() -> Optional[int]:
|
||||
except Exception:
|
||||
pass # Offline or timeout — use stale refs, that's fine
|
||||
|
||||
# Count commits behind
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-list", "--count", "HEAD..origin/main"],
|
||||
@@ -168,15 +169,52 @@ def check_for_updates() -> Optional[int]:
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
if result.returncode == 0:
|
||||
behind = int(result.stdout.strip())
|
||||
else:
|
||||
behind = None
|
||||
return int(result.stdout.strip())
|
||||
except Exception:
|
||||
behind = None
|
||||
pass
|
||||
return None
|
||||
|
||||
# Write cache
|
||||
|
||||
def check_for_updates() -> Optional[int]:
|
||||
"""Check whether a Hermes update is available.
|
||||
|
||||
Two paths: if ``HERMES_REVISION`` is set (nix builds embed it), compare
|
||||
it to upstream main via ``git ls-remote``. Otherwise look for a local
|
||||
git checkout and count commits behind ``origin/main``.
|
||||
|
||||
Returns the number of commits behind, ``UPDATE_AVAILABLE_NO_COUNT`` (-1)
|
||||
if behind but the count is unknown, ``0`` if up-to-date, or ``None`` if
|
||||
the check failed or doesn't apply. Cached for 6 hours.
|
||||
"""
|
||||
hermes_home = get_hermes_home()
|
||||
cache_file = hermes_home / ".update_check"
|
||||
embedded_rev = os.environ.get("HERMES_REVISION") or None
|
||||
|
||||
# Read cache — invalidate if the embedded rev has changed since last check
|
||||
now = time.time()
|
||||
try:
|
||||
cache_file.write_text(json.dumps({"ts": now, "behind": behind}))
|
||||
if cache_file.exists():
|
||||
cached = json.loads(cache_file.read_text())
|
||||
if (
|
||||
now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS
|
||||
and cached.get("rev") == embedded_rev
|
||||
):
|
||||
return cached.get("behind")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if embedded_rev:
|
||||
behind = _check_via_rev(embedded_rev)
|
||||
else:
|
||||
repo_dir = hermes_home / "hermes-agent"
|
||||
if not (repo_dir / ".git").exists():
|
||||
repo_dir = Path(__file__).parent.parent.resolve()
|
||||
if not (repo_dir / ".git").exists():
|
||||
return None
|
||||
behind = _check_via_local_git(repo_dir)
|
||||
|
||||
try:
|
||||
cache_file.write_text(json.dumps({"ts": now, "behind": behind, "rev": embedded_rev}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -549,20 +587,29 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
# Update check — use prefetched result if available
|
||||
try:
|
||||
behind = get_update_result(timeout=0.5)
|
||||
if behind and behind > 0:
|
||||
from hermes_cli.config import recommended_update_command
|
||||
commits_word = "commit" if behind == 1 else "commits"
|
||||
right_lines.append(
|
||||
f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
|
||||
f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
|
||||
)
|
||||
if behind is not None and behind != 0:
|
||||
from hermes_cli.config import get_managed_update_command, recommended_update_command
|
||||
if behind > 0:
|
||||
commits_word = "commit" if behind == 1 else "commits"
|
||||
right_lines.append(
|
||||
f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
|
||||
f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
|
||||
)
|
||||
else:
|
||||
# UPDATE_AVAILABLE_NO_COUNT: nix-built hermes; we know an update
|
||||
# exists but not by how much, and we don't know how the user
|
||||
# installed it (nix run, profile, system flake, home-manager).
|
||||
managed_cmd = get_managed_update_command()
|
||||
line = "[bold yellow]⚠ update available[/]"
|
||||
if managed_cmd:
|
||||
line += f"[dim yellow] — run [bold]{managed_cmd}[/bold][/]"
|
||||
right_lines.append(line)
|
||||
except Exception:
|
||||
pass # Never break the banner over an update check
|
||||
|
||||
right_content = "\n".join(right_lines)
|
||||
layout_table.add_row(left_content, right_content)
|
||||
|
||||
agent_name = _skin_branding("agent_name", "Hermes Agent")
|
||||
title_color = _skin_color("banner_title", "#FFD700")
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
version_label = format_banner_version_label()
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
"""Shared helpers for attaching Hermes to a local Chrome CDP port."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import platform
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
DEFAULT_BROWSER_CDP_PORT = 9222
|
||||
DEFAULT_BROWSER_CDP_URL = f"http://127.0.0.1:{DEFAULT_BROWSER_CDP_PORT}"
|
||||
|
||||
_DARWIN_APPS = (
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||
)
|
||||
|
||||
_WINDOWS_INSTALL_PARTS = (
|
||||
("Google", "Chrome", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chromium.exe"),
|
||||
("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
|
||||
("Microsoft", "Edge", "Application", "msedge.exe"),
|
||||
)
|
||||
|
||||
_LINUX_BIN_NAMES = (
|
||||
"google-chrome", "google-chrome-stable", "chromium-browser",
|
||||
"chromium", "brave-browser", "microsoft-edge",
|
||||
)
|
||||
|
||||
_WINDOWS_BIN_NAMES = (
|
||||
"chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
|
||||
"chrome", "msedge", "brave", "chromium",
|
||||
)
|
||||
|
||||
|
||||
def get_chrome_debug_candidates(system: str) -> list[str]:
|
||||
candidates: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def add(path: str | None) -> None:
|
||||
if not path:
|
||||
return
|
||||
normalized = os.path.normcase(os.path.normpath(path))
|
||||
if normalized in seen or not os.path.isfile(path):
|
||||
return
|
||||
candidates.append(path)
|
||||
seen.add(normalized)
|
||||
|
||||
def add_install_paths(bases: tuple[str | None, ...]) -> None:
|
||||
for base in filter(None, bases):
|
||||
for parts in _WINDOWS_INSTALL_PARTS:
|
||||
add(os.path.join(base, *parts))
|
||||
|
||||
if system == "Darwin":
|
||||
for app in _DARWIN_APPS:
|
||||
add(app)
|
||||
return candidates
|
||||
|
||||
if system == "Windows":
|
||||
for name in _WINDOWS_BIN_NAMES:
|
||||
add(shutil.which(name))
|
||||
add_install_paths((
|
||||
os.environ.get("ProgramFiles"),
|
||||
os.environ.get("ProgramFiles(x86)"),
|
||||
os.environ.get("LOCALAPPDATA"),
|
||||
))
|
||||
return candidates
|
||||
|
||||
for name in _LINUX_BIN_NAMES:
|
||||
add(shutil.which(name))
|
||||
add_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"))
|
||||
return candidates
|
||||
|
||||
|
||||
def chrome_debug_data_dir() -> str:
|
||||
return str(get_hermes_home() / "chrome-debug")
|
||||
|
||||
|
||||
def _chrome_debug_args(port: int) -> list[str]:
|
||||
return [
|
||||
f"--remote-debugging-port={port}",
|
||||
f"--user-data-dir={chrome_debug_data_dir()}",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
]
|
||||
|
||||
|
||||
def manual_chrome_debug_command(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> str | None:
|
||||
system = system or platform.system()
|
||||
candidates = get_chrome_debug_candidates(system)
|
||||
|
||||
if candidates:
|
||||
argv = [candidates[0], *_chrome_debug_args(port)]
|
||||
return subprocess.list2cmdline(argv) if system == "Windows" else shlex.join(argv)
|
||||
|
||||
if system == "Darwin":
|
||||
data_dir = chrome_debug_data_dir()
|
||||
return (
|
||||
f'open -a "Google Chrome" --args --remote-debugging-port={port} '
|
||||
f'--user-data-dir="{data_dir}" --no-first-run --no-default-browser-check'
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _detach_kwargs(system: str) -> dict:
|
||||
if system != "Windows":
|
||||
return {"start_new_session": True}
|
||||
flags = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
|
||||
subprocess, "CREATE_NEW_PROCESS_GROUP", 0
|
||||
)
|
||||
return {"creationflags": flags} if flags else {}
|
||||
|
||||
|
||||
def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
|
||||
system = system or platform.system()
|
||||
candidates = get_chrome_debug_candidates(system)
|
||||
if not candidates:
|
||||
return False
|
||||
|
||||
os.makedirs(chrome_debug_data_dir(), exist_ok=True)
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[candidates[0], *_chrome_debug_args(port)],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
**_detach_kwargs(system),
|
||||
)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
+67
-6
@@ -4,7 +4,8 @@ Usage:
|
||||
hermes claw migrate # Preview then migrate (always shows preview first)
|
||||
hermes claw migrate --dry-run # Preview only, no changes
|
||||
hermes claw migrate --yes # Skip confirmation prompt
|
||||
hermes claw migrate --preset full --overwrite # Full migration, overwrite conflicts
|
||||
hermes claw migrate --preset full --overwrite --migrate-secrets # Full run w/ secrets
|
||||
hermes claw migrate --no-backup # Skip pre-migration snapshot
|
||||
hermes claw cleanup # Archive leftover OpenClaw directories
|
||||
hermes claw cleanup --dry-run # Preview what would be archived
|
||||
"""
|
||||
@@ -15,6 +16,7 @@ import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
|
||||
from hermes_constants import get_optional_skills_dir
|
||||
@@ -321,10 +323,13 @@ def _cmd_migrate(args):
|
||||
migrate_secrets = getattr(args, "migrate_secrets", False)
|
||||
workspace_target = getattr(args, "workspace_target", None)
|
||||
skill_conflict = getattr(args, "skill_conflict", "skip")
|
||||
no_backup = getattr(args, "no_backup", False)
|
||||
|
||||
# If using the "full" preset, secrets are included by default
|
||||
if preset == "full":
|
||||
migrate_secrets = True
|
||||
# Secrets are never included implicitly — they must be explicitly requested
|
||||
# via --migrate-secrets, even under --preset full. This mirrors OpenClaw's
|
||||
# migrate-hermes posture (two-phase: run once without secrets, rerun with
|
||||
# --include-secrets) and prevents a --preset full invocation from silently
|
||||
# importing API keys that the user may not have intended to copy.
|
||||
|
||||
print()
|
||||
print(
|
||||
@@ -431,15 +436,24 @@ def _cmd_migrate(args):
|
||||
|
||||
preview_summary = preview_report.get("summary", {})
|
||||
preview_count = preview_summary.get("migrated", 0)
|
||||
preview_conflicts = preview_summary.get("conflict", 0)
|
||||
|
||||
if preview_count == 0:
|
||||
# "Nothing to migrate" means nothing migrated AND nothing blocked by
|
||||
# conflicts. If there are conflicts, we still want to show the plan and
|
||||
# surface the refusal/--overwrite guidance instead of silently bailing.
|
||||
if preview_count == 0 and preview_conflicts == 0:
|
||||
print()
|
||||
print_info("Nothing to migrate from OpenClaw.")
|
||||
_print_migration_report(preview_report, dry_run=True)
|
||||
return
|
||||
|
||||
print()
|
||||
print_header(f"Migration Preview — {preview_count} item(s) would be imported")
|
||||
if preview_count > 0:
|
||||
print_header(f"Migration Preview — {preview_count} item(s) would be imported")
|
||||
else:
|
||||
print_header(
|
||||
f"Migration Preview — {preview_conflicts} conflict(s), nothing would be imported"
|
||||
)
|
||||
print_info("No changes have been made yet. Review the list below:")
|
||||
_print_migration_report(preview_report, dry_run=True)
|
||||
|
||||
@@ -447,6 +461,24 @@ def _cmd_migrate(args):
|
||||
if dry_run:
|
||||
return
|
||||
|
||||
# ── Phase 1b: Refuse if the plan has conflicts and --overwrite is not set ─
|
||||
# Modelled on OpenClaw's assertConflictFreePlan() — apply is a safe no-op
|
||||
# on conflicts unless the user explicitly opts in to overwriting. Without
|
||||
# this guard, the user would answer "yes, proceed" and silently end up
|
||||
# with a migration that skipped every conflicting item.
|
||||
if preview_conflicts > 0 and not overwrite:
|
||||
print()
|
||||
print_error(
|
||||
f"Plan has {preview_conflicts} conflict(s). Refusing to apply."
|
||||
)
|
||||
print_info(
|
||||
"Each conflict is an item whose target already exists in ~/.hermes/. "
|
||||
"Re-run with --overwrite to replace conflicting targets (item-level "
|
||||
"backups are written to the migration report directory)."
|
||||
)
|
||||
print_info("Or re-run with --dry-run to review the full plan.")
|
||||
return
|
||||
|
||||
# ── Phase 2: Confirm and execute ───────────────────────────
|
||||
print()
|
||||
if not auto_yes:
|
||||
@@ -458,6 +490,32 @@ def _cmd_migrate(args):
|
||||
print_info("Migration cancelled.")
|
||||
return
|
||||
|
||||
# ── Phase 2b: Pre-apply backup of the Hermes home ─────────
|
||||
# Delegates to hermes_cli.backup.create_pre_migration_backup(), which
|
||||
# shares implementation with the pre-update backup (same exclusion
|
||||
# rules, same SQLite safe-copy, zip format) so the archive is
|
||||
# restorable with `hermes import`. Mirrors OpenClaw's
|
||||
# createPreMigrationBackup posture — one atomic restore point before
|
||||
# any mutation, auto-pruned to the last 5 pre-migration zips.
|
||||
backup_archive: Optional[Path] = None
|
||||
if not no_backup:
|
||||
try:
|
||||
from hermes_cli.backup import create_pre_migration_backup, _format_size
|
||||
backup_archive = create_pre_migration_backup(hermes_home=hermes_home)
|
||||
if backup_archive:
|
||||
size_str = _format_size(backup_archive.stat().st_size)
|
||||
print()
|
||||
print_success(f"Pre-migration backup: {backup_archive} ({size_str})")
|
||||
print_info(f"Restore with: hermes import {backup_archive.name}")
|
||||
except Exception as e:
|
||||
print()
|
||||
print_error(f"Could not create pre-migration backup: {e}")
|
||||
print_info(
|
||||
"Re-run with --no-backup to skip, or free up disk space under the Hermes home."
|
||||
)
|
||||
logger.debug("Pre-migration backup error", exc_info=True)
|
||||
return
|
||||
|
||||
try:
|
||||
migrator = mod.Migrator(
|
||||
source_root=source_dir.resolve(),
|
||||
@@ -476,6 +534,9 @@ def _cmd_migrate(args):
|
||||
print()
|
||||
print_error(f"Migration failed: {e}")
|
||||
logger.debug("OpenClaw migration error", exc_info=True)
|
||||
if backup_archive:
|
||||
print_info(f"A pre-migration backup is available at: {backup_archive}")
|
||||
print_info(f"Restore with: hermes import {backup_archive.name}")
|
||||
return
|
||||
|
||||
# Print results
|
||||
|
||||
@@ -115,6 +115,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
|
||||
"Configuration", cli_only=True,
|
||||
gateway_config_gate="display.tool_progress_command"),
|
||||
CommandDef("footer", "Toggle gateway runtime-metadata footer on final replies",
|
||||
"Configuration", args_hint="[on|off|status]",
|
||||
subcommands=("on", "off", "status")),
|
||||
CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
|
||||
"Configuration"),
|
||||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||
@@ -125,6 +128,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
cli_only=True, args_hint="[name]"),
|
||||
CommandDef("indicator", "Pick the TUI busy-indicator style", "Configuration",
|
||||
cli_only=True, args_hint="[kaomoji|emoji|unicode|ascii]",
|
||||
subcommands=("kaomoji", "emoji", "unicode", "ascii")),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
|
||||
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
|
||||
@@ -142,10 +148,15 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
aliases=("reload_mcp",)),
|
||||
CommandDef("reload-skills", "Re-scan ~/.hermes/skills/ for newly installed or removed skills",
|
||||
"Tools & Skills", aliases=("reload_skills",)),
|
||||
CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
|
||||
cli_only=True, args_hint="[connect|disconnect|status]",
|
||||
subcommands=("connect", "disconnect", "status")),
|
||||
@@ -943,6 +954,42 @@ def slack_subcommand_map() -> dict[str, str]:
|
||||
# Autocomplete
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Per-process cache for /model<space> LM Studio autocomplete. Probing on
|
||||
# every keystroke would block the UI; a short TTL keeps it live without
|
||||
# hammering the server.
|
||||
_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None
|
||||
|
||||
|
||||
def _lmstudio_completion_models() -> list[str]:
|
||||
"""Locally-loaded LM Studio models for /model autocomplete (cached, gated)."""
|
||||
global _LMSTUDIO_COMPLETION_CACHE
|
||||
# Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio.
|
||||
if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")):
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store() or {}
|
||||
if "lmstudio" not in (store.get("providers") or {}) \
|
||||
and "lmstudio" not in (store.get("credential_pool") or {}):
|
||||
return []
|
||||
except Exception:
|
||||
return []
|
||||
now = time.time()
|
||||
if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0:
|
||||
return _LMSTUDIO_COMPLETION_CACHE[1]
|
||||
try:
|
||||
from hermes_cli.models import fetch_lmstudio_models
|
||||
models = fetch_lmstudio_models(
|
||||
api_key=os.environ.get("LM_API_KEY", ""),
|
||||
base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1",
|
||||
timeout=0.8,
|
||||
)
|
||||
except Exception:
|
||||
models = []
|
||||
_LMSTUDIO_COMPLETION_CACHE = (now, models)
|
||||
return models
|
||||
|
||||
|
||||
class SlashCommandCompleter(Completer):
|
||||
"""Autocomplete for built-in slash commands, subcommands, and skill commands."""
|
||||
|
||||
@@ -1366,6 +1413,19 @@ class SlashCommandCompleter(Completer):
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
# LM Studio: surface locally-loaded models. Gated on the user actually
|
||||
# having LM Studio configured (env var or auth-store entry) so we
|
||||
# don't probe 127.0.0.1 on every keystroke for users who don't use it.
|
||||
for name in _lmstudio_completion_models():
|
||||
if name in seen:
|
||||
continue
|
||||
if name.startswith(sub_lower) and name != sub_lower:
|
||||
yield Completion(
|
||||
name,
|
||||
start_position=-len(sub_text),
|
||||
display=name,
|
||||
display_meta="LM Studio",
|
||||
)
|
||||
|
||||
def get_completions(self, document, complete_event):
|
||||
text = document.text_before_cursor
|
||||
|
||||
+552
-71
@@ -30,34 +30,69 @@ logger = logging.getLogger(__name__)
|
||||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
||||
_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
|
||||
# (path, mtime_ns, size) -> cached expanded config dict.
|
||||
# load_config() returns a deepcopy of the cached value when the file
|
||||
# hasn't changed since the last load, skipping yaml.safe_load +
|
||||
# _deep_merge + _normalize_* + _expand_env_vars (~13 ms/call).
|
||||
# save_config() + migrate_config() write via atomic_yaml_write which
|
||||
# produces a fresh inode, so stat() sees a new mtime_ns and the next
|
||||
# load repopulates automatically — no explicit invalidation hook.
|
||||
_LOAD_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
|
||||
# (path, mtime_ns, size) -> cached raw yaml dict. Same pattern as
|
||||
# _LOAD_CONFIG_CACHE but for read_raw_config() — used when callers want
|
||||
# the user's on-disk values without defaults merged in.
|
||||
_RAW_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
|
||||
# Env var names written to .env that aren't in OPTIONAL_ENV_VARS
|
||||
# (managed by setup/provider flows directly).
|
||||
_EXTRA_ENV_KEYS = frozenset({
|
||||
"OPENAI_API_KEY", "OPENAI_BASE_URL",
|
||||
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
|
||||
"DISCORD_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL",
|
||||
"DISCORD_HOME_CHANNEL", "DISCORD_HOME_CHANNEL_NAME",
|
||||
"TELEGRAM_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL_NAME",
|
||||
"SLACK_HOME_CHANNEL", "SLACK_HOME_CHANNEL_NAME",
|
||||
"SIGNAL_ACCOUNT", "SIGNAL_HTTP_URL",
|
||||
"SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
|
||||
"SIGNAL_HOME_CHANNEL", "SIGNAL_HOME_CHANNEL_NAME",
|
||||
"SMS_HOME_CHANNEL", "SMS_HOME_CHANNEL_NAME",
|
||||
"DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
|
||||
"DINGTALK_HOME_CHANNEL", "DINGTALK_HOME_CHANNEL_NAME",
|
||||
"FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
|
||||
"FEISHU_HOME_CHANNEL", "FEISHU_HOME_CHANNEL_NAME",
|
||||
"YUANBAO_HOME_CHANNEL", "YUANBAO_HOME_CHANNEL_NAME",
|
||||
"WECOM_BOT_ID", "WECOM_SECRET",
|
||||
"WECOM_CALLBACK_CORP_ID", "WECOM_CALLBACK_CORP_SECRET", "WECOM_CALLBACK_AGENT_ID",
|
||||
"WECOM_CALLBACK_TOKEN", "WECOM_CALLBACK_ENCODING_AES_KEY",
|
||||
"WECOM_CALLBACK_HOST", "WECOM_CALLBACK_PORT",
|
||||
"WECOM_HOME_CHANNEL", "WECOM_HOME_CHANNEL_NAME",
|
||||
"WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
|
||||
"WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
|
||||
"WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
|
||||
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
|
||||
"BLUEBUBBLES_HOME_CHANNEL", "BLUEBUBBLES_HOME_CHANNEL_NAME",
|
||||
"QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
|
||||
"QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME", # legacy aliases (pre-rename, still read for back-compat)
|
||||
"QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
|
||||
"QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
|
||||
"IRC_SERVER", "IRC_PORT", "IRC_NICKNAME", "IRC_CHANNEL",
|
||||
"IRC_USE_TLS", "IRC_SERVER_PASSWORD", "IRC_NICKSERV_PASSWORD",
|
||||
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
|
||||
"WHATSAPP_MODE", "WHATSAPP_ENABLED",
|
||||
"MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
|
||||
"MATTERMOST_HOME_CHANNEL", "MATTERMOST_HOME_CHANNEL_NAME", "MATTERMOST_REPLY_MODE",
|
||||
"MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
|
||||
"MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
|
||||
"MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD", "MATRIX_DM_AUTO_THREAD",
|
||||
"MATRIX_RECOVERY_KEY",
|
||||
# Langfuse observability plugin — optional tuning keys + standard SDK vars.
|
||||
# Activation is via plugins.enabled (opt-in through `hermes plugins enable
|
||||
# observability/langfuse` or `hermes tools → Langfuse`); credentials gate
|
||||
# the plugin at runtime.
|
||||
"HERMES_LANGFUSE_ENV",
|
||||
"HERMES_LANGFUSE_RELEASE",
|
||||
"HERMES_LANGFUSE_SAMPLE_RATE",
|
||||
"HERMES_LANGFUSE_MAX_CHARS",
|
||||
"HERMES_LANGFUSE_DEBUG",
|
||||
"LANGFUSE_PUBLIC_KEY",
|
||||
"LANGFUSE_SECRET_KEY",
|
||||
"LANGFUSE_BASE_URL",
|
||||
})
|
||||
import yaml
|
||||
|
||||
@@ -206,6 +241,7 @@ def get_container_exec_info() -> Optional[dict]:
|
||||
|
||||
# Re-export from hermes_constants — canonical definition lives there.
|
||||
from hermes_constants import get_hermes_home # noqa: F811,E402
|
||||
from utils import atomic_replace
|
||||
|
||||
def get_config_path() -> Path:
|
||||
"""Get the main config file path."""
|
||||
@@ -314,7 +350,7 @@ def ensure_hermes_home():
|
||||
else:
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
_secure_dir(home)
|
||||
for subdir in ("cron", "sessions", "logs", "memories"):
|
||||
for subdir in ("cron", "sessions", "logs", "logs/curator", "memories"):
|
||||
d = home / subdir
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
_secure_dir(d)
|
||||
@@ -335,6 +371,10 @@ def _ensure_hermes_home_managed(home: Path):
|
||||
f"{d} does not exist. "
|
||||
"Run 'sudo nixos-rebuild switch' first."
|
||||
)
|
||||
# Curator reports dir is a sub-path of logs/; create it if missing.
|
||||
# In managed mode the activation script may not know about this subdir,
|
||||
# so we mkdir it ourselves (it's inside an already-secured logs/ dir).
|
||||
(home / "logs" / "curator").mkdir(parents=True, exist_ok=True)
|
||||
# Inside umask(0o007) scope — SOUL.md will be created as 0660
|
||||
_ensure_default_soul_md(home)
|
||||
|
||||
@@ -389,6 +429,20 @@ DEFAULT_CONFIG = {
|
||||
# (60+ tool iterations with tiny output) before users assume the
|
||||
# bot is dead and /restart.
|
||||
"gateway_notify_interval": 180,
|
||||
# Freshness window for the gateway auto-continue note (seconds).
|
||||
# After a gateway crash/restart/SIGTERM mid-run, the next user
|
||||
# message gets a "[System note: your previous turn was
|
||||
# interrupted — process the unfinished tool result(s) first]"
|
||||
# prepended so the model picks up where it left off. That's the
|
||||
# right behaviour while the interruption is fresh, but stale
|
||||
# markers (transcript last touched hours or days ago) can revive
|
||||
# an unrelated old task when the user's next message starts new
|
||||
# work. This window is the max age of the last persisted
|
||||
# transcript row for which we still inject the continue note.
|
||||
# Default 3600s comfortably covers a long turn (gateway_timeout
|
||||
# default is 1800s) plus runtime slack. Set to 0 to disable the
|
||||
# gate and restore pre-fix behaviour (always inject).
|
||||
"gateway_auto_continue_freshness": 3600,
|
||||
# How user-attached images are presented to the main model on each turn.
|
||||
# "auto" — attach natively when the active model reports
|
||||
# supports_vision=True AND the user hasn't explicitly
|
||||
@@ -451,7 +505,8 @@ DEFAULT_CONFIG = {
|
||||
"singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
"modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
"daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
# Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
|
||||
"vercel_runtime": "node24",
|
||||
# Container resource limits (docker, singularity, modal, daytona, vercel_sandbox — ignored for local/ssh)
|
||||
"container_cpu": 1,
|
||||
"container_memory": 5120, # MB (default 5GB)
|
||||
"container_disk": 51200, # MB (default 50GB)
|
||||
@@ -467,6 +522,16 @@ DEFAULT_CONFIG = {
|
||||
# Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
|
||||
# Default off because passing host directories into a sandbox weakens isolation.
|
||||
"docker_mount_cwd_to_workspace": False,
|
||||
# Explicit opt-in: run the Docker container as the host user's uid:gid
|
||||
# (via `--user`). When enabled, files written into bind-mounted dirs
|
||||
# (docker_volumes, the persistent workspace, or the auto-mounted cwd)
|
||||
# are owned by your host user instead of root, which avoids needing
|
||||
# `sudo chown` after container runs. Default off to preserve behavior
|
||||
# for images whose entrypoints expect to start as root (e.g. the
|
||||
# bundled Hermes image, which drops to the `hermes` user via gosu).
|
||||
# When on, SETUID/SETGID caps are omitted from the container since
|
||||
# no privilege drop is needed.
|
||||
"docker_run_as_host_user": False,
|
||||
# Persistent shell — keep a long-lived bash shell across execute() calls
|
||||
# so cwd/env vars/shell variables survive between commands.
|
||||
# Enabled by default for non-local backends (SSH); local is always opt-in
|
||||
@@ -546,7 +611,7 @@ DEFAULT_CONFIG = {
|
||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||
"target_ratio": 0.20, # fraction of threshold to preserve as recent tail
|
||||
"protect_last_n": 20, # minimum recent messages to keep uncompressed
|
||||
|
||||
"hygiene_hard_message_limit": 400, # gateway session-hygiene force-compress threshold by message count
|
||||
},
|
||||
|
||||
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
||||
@@ -648,6 +713,19 @@ DEFAULT_CONFIG = {
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
# Curator — skill-usage review fork. Timeout is generous because the
|
||||
# review pass can take several minutes on reasoning models (umbrella
|
||||
# building over hundreds of candidate skills). "auto" = use main chat
|
||||
# model; override via `hermes model` → auxiliary → Curator to route
|
||||
# to a cheaper aux model (e.g. openrouter google/gemini-3-flash-preview).
|
||||
"curator": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 600,
|
||||
"extra_body": {},
|
||||
},
|
||||
},
|
||||
|
||||
"display": {
|
||||
@@ -655,6 +733,11 @@ DEFAULT_CONFIG = {
|
||||
"personality": "kawaii",
|
||||
"resume_display": "full",
|
||||
"busy_input_mode": "interrupt", # interrupt | queue | steer
|
||||
# When true, `hermes --tui` auto-resumes the most recent human-
|
||||
# facing session on launch instead of forging a fresh one.
|
||||
# Mirrors `hermes -c` muscle memory. Default off so existing
|
||||
# users aren't surprised. HERMES_TUI_RESUME=<id> always wins.
|
||||
"tui_auto_resume_recent": False,
|
||||
"bell_on_complete": False,
|
||||
"show_reasoning": False,
|
||||
"streaming": False,
|
||||
@@ -662,6 +745,9 @@ DEFAULT_CONFIG = {
|
||||
"inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage)
|
||||
"show_cost": False, # Show $ cost in the status bar (off by default)
|
||||
"skin": "default",
|
||||
# TUI busy indicator style: kaomoji (default), emoji, unicode (braille
|
||||
# spinner), or ascii. Live-swappable via `/indicator <style>`.
|
||||
"tui_status_indicator": "kaomoji",
|
||||
"user_message_preview": { # CLI: how many submitted user-message lines to echo back in scrollback
|
||||
"first_lines": 2,
|
||||
"last_lines": 2,
|
||||
@@ -671,6 +757,14 @@ DEFAULT_CONFIG = {
|
||||
"tool_progress_overrides": {}, # DEPRECATED — use display.platforms instead
|
||||
"tool_preview_length": 0, # Max chars for tool call previews (0 = no limit, show full paths/commands)
|
||||
"platforms": {}, # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
|
||||
# Gateway runtime-metadata footer appended to the FINAL message of a turn
|
||||
# (disabled by default to keep replies minimal). When enabled, renders
|
||||
# e.g. `model · 68% · ~/projects/hermes`. Per-platform overrides go under
|
||||
# display.platforms.<platform>.runtime_footer.
|
||||
"runtime_footer": {
|
||||
"enabled": False,
|
||||
"fields": ["model", "context_pct", "cwd"], # Order shown; drop any to hide
|
||||
},
|
||||
},
|
||||
|
||||
# Web dashboard settings
|
||||
@@ -689,7 +783,7 @@ DEFAULT_CONFIG = {
|
||||
# limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
|
||||
# Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
|
||||
"tts": {
|
||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
|
||||
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "gemini" | "neutts" (local) | "kittentts" (local) | "piper" (local)
|
||||
"edge": {
|
||||
"voice": "en-US-AriaNeural",
|
||||
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
|
||||
@@ -719,6 +813,19 @@ DEFAULT_CONFIG = {
|
||||
"model": "neuphonic/neutts-air-q4-gguf", # HuggingFace model repo
|
||||
"device": "cpu", # cpu, cuda, or mps
|
||||
},
|
||||
"piper": {
|
||||
# Voice name (e.g. "en_US-lessac-medium") downloaded on first
|
||||
# use, OR an absolute path to a pre-downloaded .onnx file.
|
||||
# Full voice list: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/VOICES.md
|
||||
"voice": "en_US-lessac-medium",
|
||||
# "voices_dir": "", # Override voice cache dir; default = ~/.hermes/cache/piper-voices/
|
||||
# "use_cuda": False, # Requires onnxruntime-gpu
|
||||
# "length_scale": 1.0, # 2.0 = twice as slow
|
||||
# "noise_scale": 0.667,
|
||||
# "noise_w_scale": 0.8,
|
||||
# "volume": 1.0,
|
||||
# "normalize_audio": True,
|
||||
},
|
||||
},
|
||||
|
||||
"stt": {
|
||||
@@ -851,6 +958,29 @@ DEFAULT_CONFIG = {
|
||||
"guard_agent_created": False,
|
||||
},
|
||||
|
||||
# Curator — background skill maintenance.
|
||||
#
|
||||
# Periodically reviews AGENT-CREATED skills (never bundled or
|
||||
# hub-installed) and keeps the collection tidy: marks long-unused skills
|
||||
# as stale, archives genuinely obsolete ones (archive only, never
|
||||
# deletes), and spawns a forked aux-model agent to consolidate overlaps
|
||||
# and patch drift. Runs inactivity-triggered from session start — no
|
||||
# cron daemon.
|
||||
#
|
||||
# See `hermes curator status` for the last run summary.
|
||||
"curator": {
|
||||
"enabled": True,
|
||||
# How long to wait between curator runs (hours). Default: 7 days.
|
||||
"interval_hours": 24 * 7,
|
||||
# Only run when the agent has been idle at least this long (hours).
|
||||
"min_idle_hours": 2,
|
||||
# Mark a skill as "stale" after this many days without use.
|
||||
"stale_after_days": 30,
|
||||
# Archive a skill (move to skills/.archive/) after this many days
|
||||
# without use. Archived skills are recoverable — no auto-deletion.
|
||||
"archive_after_days": 90,
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
# This section is only needed for hermes-specific overrides; everything else
|
||||
# (apiKey, workspace, peerName, sessions, enabled) comes from the global config.
|
||||
@@ -888,6 +1018,7 @@ DEFAULT_CONFIG = {
|
||||
|
||||
# Telegram platform settings (gateway mode)
|
||||
"telegram": {
|
||||
"reactions": False, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
|
||||
},
|
||||
|
||||
@@ -913,6 +1044,14 @@ DEFAULT_CONFIG = {
|
||||
"mode": "manual",
|
||||
"timeout": 60,
|
||||
"cron_mode": "deny",
|
||||
# When true, /reload-mcp asks the user to confirm before rebuilding
|
||||
# the MCP tool set for the active session. Reloading invalidates
|
||||
# the provider prompt cache (tool schemas are baked into the system
|
||||
# prompt), so the next message re-sends full input tokens — this can
|
||||
# be expensive on long-context or high-reasoning models. Users click
|
||||
# "Always Approve" to silence the prompt permanently; that flips
|
||||
# this key to false.
|
||||
"mcp_reload_confirm": True,
|
||||
},
|
||||
|
||||
# Permanently allowed dangerous command patterns (added via "always" approval)
|
||||
@@ -942,7 +1081,7 @@ DEFAULT_CONFIG = {
|
||||
# Pre-exec security scanning via tirith
|
||||
"security": {
|
||||
"allow_private_urls": False, # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
|
||||
"redact_secrets": True,
|
||||
"redact_secrets": False,
|
||||
"tirith_enabled": True,
|
||||
"tirith_path": "tirith",
|
||||
"tirith_timeout": 5,
|
||||
@@ -1066,7 +1205,7 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
"_config_version": 23,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -1166,6 +1305,22 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"LM_API_KEY": {
|
||||
"description": "LM Studio bearer token for auth-enabled local servers",
|
||||
"prompt": "LM Studio API key / bearer token",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"LM_BASE_URL": {
|
||||
"description": "LM Studio base URL override",
|
||||
"prompt": "LM Studio base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"GLM_API_KEY": {
|
||||
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
|
||||
"prompt": "Z.AI / GLM API key",
|
||||
@@ -1692,6 +1847,30 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "tool",
|
||||
},
|
||||
|
||||
# ── Langfuse observability ──
|
||||
"HERMES_LANGFUSE_PUBLIC_KEY": {
|
||||
"description": "Langfuse project public key (pk-lf-...)",
|
||||
"prompt": "Langfuse public key",
|
||||
"url": "https://cloud.langfuse.com",
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
},
|
||||
"HERMES_LANGFUSE_SECRET_KEY": {
|
||||
"description": "Langfuse project secret key (sk-lf-...)",
|
||||
"prompt": "Langfuse secret key",
|
||||
"url": "https://cloud.langfuse.com",
|
||||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"HERMES_LANGFUSE_BASE_URL": {
|
||||
"description": "Langfuse server URL (default: https://cloud.langfuse.com)",
|
||||
"prompt": "Langfuse server URL (leave empty for cloud.langfuse.com)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Messaging platforms ──
|
||||
"TELEGRAM_BOT_TOKEN": {
|
||||
"description": "Telegram bot token from @BotFather",
|
||||
@@ -1839,6 +2018,14 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"MATRIX_DM_AUTO_THREAD": {
|
||||
"description": "Auto-create threads for DM messages in Matrix (default: false)",
|
||||
"prompt": "Auto-create threads in DMs (true/false)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"MATRIX_DEVICE_ID": {
|
||||
"description": "Stable Matrix device ID for E2EE persistence across restarts (e.g. HERMES_BOT)",
|
||||
"prompt": "Matrix device ID (stable across restarts)",
|
||||
@@ -1923,6 +2110,43 @@ OPTIONAL_ENV_VARS = {
|
||||
"prompt": "QQ Sandbox Mode",
|
||||
"category": "messaging",
|
||||
},
|
||||
"IRC_SERVER": {
|
||||
"description": "IRC server hostname (e.g. irc.libera.chat)",
|
||||
"prompt": "IRC server",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"IRC_CHANNEL": {
|
||||
"description": "IRC channel to join (e.g. #hermes)",
|
||||
"prompt": "IRC channel",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"IRC_NICKNAME": {
|
||||
"description": "Bot nickname on IRC (default: hermes-bot)",
|
||||
"prompt": "IRC nickname",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"IRC_SERVER_PASSWORD": {
|
||||
"description": "IRC server password (if required)",
|
||||
"prompt": "IRC server password",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"IRC_NICKSERV_PASSWORD": {
|
||||
"description": "NickServ password for nick identification",
|
||||
"prompt": "NickServ password",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"GATEWAY_ALLOW_ALL_USERS": {
|
||||
"description": "Allow all users to interact with messaging bots (true/false). Default: false.",
|
||||
"prompt": "Allow all users (true/false)",
|
||||
@@ -2085,19 +2309,55 @@ def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
|
||||
return missing
|
||||
|
||||
|
||||
def _set_nested(config: dict, dotted_key: str, value):
|
||||
def _set_nested(config, dotted_key: str, value):
|
||||
"""Set a value at an arbitrarily nested dotted key path.
|
||||
|
||||
Creates intermediate dicts as needed, e.g. ``_set_nested(c, "a.b.c", 1)``
|
||||
ensures ``c["a"]["b"]["c"] == 1``.
|
||||
Supports both dict and list navigation:
|
||||
_set_nested(c, "a.b.c", 1) → c["a"]["b"]["c"] = 1
|
||||
_set_nested(c, "a.0.b", 1) → c["a"][0]["b"] = 1
|
||||
_set_nested(c, "providers.1", "x") → c["providers"][1] = "x"
|
||||
|
||||
Intermediate dicts are created on demand. List indices are parsed
|
||||
from numeric path segments; the referenced index must already exist
|
||||
(we do not grow lists — the user is navigating into structure they
|
||||
wrote themselves). If a segment targets a non-container leaf
|
||||
(scalar), the leaf is replaced with a fresh dict so the write can
|
||||
proceed — this preserves the pre-existing behavior for bare scalar
|
||||
overrides (e.g. setting ``a.b.c`` where ``a.b`` was previously a
|
||||
string).
|
||||
|
||||
Guards against #17876: before this fix the code unconditionally
|
||||
replaced any non-dict value (including lists) with ``{}``, silently
|
||||
destroying list-typed config like ``custom_providers`` whenever a
|
||||
caller used an indexed path.
|
||||
"""
|
||||
parts = dotted_key.split(".")
|
||||
current = config
|
||||
for part in parts[:-1]:
|
||||
if part not in current or not isinstance(current.get(part), dict):
|
||||
current[part] = {}
|
||||
current = current[part]
|
||||
current[parts[-1]] = value
|
||||
if isinstance(current, list):
|
||||
try:
|
||||
idx = int(part)
|
||||
except (TypeError, ValueError):
|
||||
raise TypeError(
|
||||
f"Cannot navigate into list at key {dotted_key!r}: "
|
||||
f"segment {part!r} is not a numeric index"
|
||||
)
|
||||
current = current[idx]
|
||||
elif isinstance(current, dict):
|
||||
existing = current.get(part)
|
||||
# Preserve dicts and lists; replace missing/scalar with a fresh dict.
|
||||
if part not in current or not isinstance(existing, (dict, list)):
|
||||
current[part] = {}
|
||||
current = current[part]
|
||||
else:
|
||||
raise TypeError(
|
||||
f"Cannot navigate into {type(current).__name__} at key {dotted_key!r}"
|
||||
)
|
||||
last = parts[-1]
|
||||
if isinstance(current, list):
|
||||
current[int(last)] = value
|
||||
else:
|
||||
current[last] = value
|
||||
|
||||
|
||||
def get_missing_config_fields() -> List[Dict[str, Any]]:
|
||||
@@ -2180,14 +2440,21 @@ def _normalize_custom_provider_entry(
|
||||
"baseUrl": "base_url",
|
||||
"apiMode": "api_mode",
|
||||
"keyEnv": "key_env",
|
||||
"apiKeyEnv": "key_env", # alias — OpenClaw-compatible + docs variant
|
||||
"defaultModel": "default_model",
|
||||
"contextLength": "context_length",
|
||||
"rateLimitDelay": "rate_limit_delay",
|
||||
}
|
||||
# api_key_env is a documented snake_case alias for key_env (see
|
||||
# website/docs/guides/azure-foundry.md). Normalize it up front so the
|
||||
# rest of the normalizer treats it as the canonical field.
|
||||
if "api_key_env" in entry and "key_env" not in entry:
|
||||
entry["key_env"] = entry["api_key_env"]
|
||||
_KNOWN_KEYS = {
|
||||
"name", "api", "url", "base_url", "api_key", "key_env",
|
||||
"name", "api", "url", "base_url", "api_key", "key_env", "api_key_env",
|
||||
"api_mode", "transport", "model", "default_model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"request_timeout_seconds", "stale_timeout_seconds",
|
||||
}
|
||||
for camel, snake in _CAMEL_ALIASES.items():
|
||||
if camel in entry and snake not in entry:
|
||||
@@ -2439,6 +2706,9 @@ _KNOWN_ROOT_KEYS = {
|
||||
_VALID_CUSTOM_PROVIDER_FIELDS = {
|
||||
"name", "base_url", "api_key", "api_mode", "model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
# key_env is read at runtime by runtime_provider.py and auxiliary_client.py
|
||||
# — include it here so the set accurately describes the supported schema.
|
||||
"key_env",
|
||||
}
|
||||
|
||||
# Fields that look like they should be inside custom_providers, not at root
|
||||
@@ -2515,10 +2785,32 @@ def validate_config_structure(config: Optional[Dict[str, Any]] = None) -> List["
|
||||
"Add the API endpoint URL, e.g.: base_url: https://api.example.com/v1",
|
||||
))
|
||||
|
||||
# ── fallback_model must be a top-level dict with provider + model ────
|
||||
# ── fallback_model: single dict OR list of dicts (chain) ─────────────
|
||||
fb = config.get("fallback_model")
|
||||
if fb is not None:
|
||||
if not isinstance(fb, dict):
|
||||
if isinstance(fb, list):
|
||||
# Chain fallback — validate each entry
|
||||
for i, entry in enumerate(fb):
|
||||
if not isinstance(entry, dict):
|
||||
issues.append(ConfigIssue(
|
||||
"error",
|
||||
f"fallback_model[{i}] should be a dict, got {type(entry).__name__}",
|
||||
"Each entry needs provider + model",
|
||||
))
|
||||
else:
|
||||
if not entry.get("provider"):
|
||||
issues.append(ConfigIssue(
|
||||
"warning",
|
||||
f"fallback_model[{i}] is missing 'provider' field",
|
||||
"Add: provider: openrouter (or another provider)",
|
||||
))
|
||||
if not entry.get("model"):
|
||||
issues.append(ConfigIssue(
|
||||
"warning",
|
||||
f"fallback_model[{i}] is missing 'model' field",
|
||||
"Add: model: <model-name>",
|
||||
))
|
||||
elif not isinstance(fb, dict):
|
||||
issues.append(ConfigIssue(
|
||||
"error",
|
||||
f"fallback_model should be a dict with 'provider' and 'model', got {type(fb).__name__}",
|
||||
@@ -2986,6 +3278,90 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
"Use `hermes plugins enable <name>` to activate."
|
||||
)
|
||||
|
||||
# ── Version 22 → 23: seed curator defaults + create logs/curator/ ──
|
||||
# The curator (background skill maintenance) was added in PR #16049, but
|
||||
# existing configs from before that PR (or before the April 2026
|
||||
# unification under `auxiliary.curator`) never wrote the curator section
|
||||
# to disk. The runtime deep-merge in `load_config()` fills defaults at
|
||||
# read time, so the curator *functions*; but users can't see/edit the
|
||||
# settings in their `config.yaml`, and `hermes curator status` has no
|
||||
# stable logs dir to point at until the first run mkdir's it.
|
||||
#
|
||||
# This migration:
|
||||
# 1. Writes the `curator` top-level section to config.yaml (enabled,
|
||||
# interval_hours, min_idle_hours, stale_after_days, archive_after_days)
|
||||
# — only keys the user hasn't already overridden.
|
||||
# 2. Writes the `auxiliary.curator` aux-task slot (provider, model,
|
||||
# base_url, api_key, timeout, extra_body) — canonical slot for
|
||||
# routing the curator fork to a cheaper aux model.
|
||||
# 3. Creates `~/.hermes/logs/curator/` if missing (belt-and-suspenders
|
||||
# on top of ensure_hermes_home() — old profiles that predate this
|
||||
# migration still benefit).
|
||||
if current_ver < 23:
|
||||
try:
|
||||
curator_dir = get_hermes_home() / "logs" / "curator"
|
||||
curator_dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as e:
|
||||
results["warnings"].append(f"Could not create {curator_dir}: {e}")
|
||||
|
||||
config = read_raw_config()
|
||||
touched = False
|
||||
|
||||
# (1) Top-level curator section — only add missing keys
|
||||
_curator_defaults = DEFAULT_CONFIG.get("curator", {})
|
||||
raw_curator = config.get("curator")
|
||||
if not isinstance(raw_curator, dict):
|
||||
raw_curator = {}
|
||||
added_curator: List[str] = []
|
||||
for k, v in _curator_defaults.items():
|
||||
if k not in raw_curator:
|
||||
raw_curator[k] = copy.deepcopy(v)
|
||||
added_curator.append(k)
|
||||
if added_curator:
|
||||
config["curator"] = raw_curator
|
||||
touched = True
|
||||
|
||||
# (2) auxiliary.curator task slot
|
||||
_aux_curator_defaults = (
|
||||
DEFAULT_CONFIG.get("auxiliary", {}).get("curator", {})
|
||||
)
|
||||
raw_aux = config.get("auxiliary")
|
||||
if not isinstance(raw_aux, dict):
|
||||
raw_aux = {}
|
||||
raw_aux_curator = raw_aux.get("curator")
|
||||
if not isinstance(raw_aux_curator, dict):
|
||||
raw_aux_curator = {}
|
||||
added_aux: List[str] = []
|
||||
for k, v in _aux_curator_defaults.items():
|
||||
if k not in raw_aux_curator:
|
||||
raw_aux_curator[k] = copy.deepcopy(v)
|
||||
added_aux.append(k)
|
||||
if added_aux:
|
||||
raw_aux["curator"] = raw_aux_curator
|
||||
config["auxiliary"] = raw_aux
|
||||
touched = True
|
||||
|
||||
if touched:
|
||||
save_config(config)
|
||||
if added_curator:
|
||||
results["config_added"].append(
|
||||
f"curator ({len(added_curator)} default key(s))"
|
||||
)
|
||||
if not quiet:
|
||||
print(
|
||||
" ✓ Seeded curator defaults in config.yaml: "
|
||||
f"{', '.join(added_curator)}"
|
||||
)
|
||||
if added_aux:
|
||||
results["config_added"].append(
|
||||
f"auxiliary.curator ({len(added_aux)} default key(s))"
|
||||
)
|
||||
if not quiet:
|
||||
print(
|
||||
" ✓ Seeded auxiliary.curator defaults in config.yaml: "
|
||||
f"{', '.join(added_aux)}"
|
||||
)
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
@@ -3258,17 +3634,17 @@ def _preserve_env_ref_templates(current, raw, loaded_expanded=None):
|
||||
|
||||
|
||||
def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Move stale root-level provider/base_url into model section.
|
||||
"""Move stale root-level provider/base_url/context_length into model section.
|
||||
|
||||
Some users (or older code) placed ``provider:`` and ``base_url:`` at the
|
||||
config root instead of inside ``model:``. These root-level keys are only
|
||||
used as a fallback when the corresponding ``model.*`` key is empty — they
|
||||
never override an existing ``model.provider`` or ``model.base_url``.
|
||||
Some users (or older code) placed ``provider:``, ``base_url:``, or
|
||||
``context_length:`` at the config root instead of inside ``model:``.
|
||||
These root-level keys are only used as a fallback when the corresponding
|
||||
``model.*`` key is empty — they never override an existing value.
|
||||
After migration the root-level keys are removed so they can't cause
|
||||
confusion on subsequent loads.
|
||||
"""
|
||||
# Only act if there are root-level keys to migrate
|
||||
has_root = any(config.get(k) for k in ("provider", "base_url"))
|
||||
has_root = any(config.get(k) for k in ("provider", "base_url", "context_length"))
|
||||
if not has_root:
|
||||
return config
|
||||
|
||||
@@ -3278,7 +3654,7 @@ def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
model = {"default": model} if model else {}
|
||||
config["model"] = model
|
||||
|
||||
for key in ("provider", "base_url"):
|
||||
for key in ("provider", "base_url", "context_length"):
|
||||
root_val = config.get(key)
|
||||
if root_val and not model.get(key):
|
||||
model[key] = root_val
|
||||
@@ -3303,6 +3679,52 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return config
|
||||
|
||||
|
||||
def cfg_get(cfg: Optional[Dict[str, Any]], *keys: str, default: Any = None) -> Any:
|
||||
"""Traverse nested dict keys safely, returning ``default`` on any miss.
|
||||
|
||||
Canonical helper for the ``cfg.get("X", {}).get("Y", default)`` pattern
|
||||
that appears 50+ times across the codebase. Handles three common gotchas
|
||||
in one place:
|
||||
|
||||
1. Missing intermediate keys (returns ``default``, no KeyError).
|
||||
2. An intermediate value that's not a dict (e.g. a user wrote a string
|
||||
where a section was expected). Returns ``default`` instead of
|
||||
AttributeError on ``.get()``.
|
||||
3. ``cfg is None`` (callers sometimes pass ``load_config() or None``).
|
||||
|
||||
Named ``cfg_get`` rather than ``cfg_path`` to avoid shadowing the
|
||||
ubiquitous ``cfg_path = _hermes_home / "config.yaml"`` local variable
|
||||
that appears in gateway/run.py, cron/scheduler.py, main.py, etc.
|
||||
|
||||
Explicit ``None`` values are returned as-is (matches ``dict.get(key,
|
||||
default)`` semantics — ``default`` is only returned when the key is
|
||||
*absent*, not when it's present but set to ``None``).
|
||||
|
||||
Examples:
|
||||
>>> cfg_get({"agent": {"reasoning_effort": "high"}}, "agent", "reasoning_effort")
|
||||
'high'
|
||||
>>> cfg_get({}, "agent", "reasoning_effort", default="medium")
|
||||
'medium'
|
||||
>>> cfg_get({"agent": "oops_a_string"}, "agent", "reasoning_effort", default="low")
|
||||
'low'
|
||||
>>> cfg_get(None, "anything", default=42)
|
||||
42
|
||||
>>> cfg_get({"a": {"b": None}}, "a", "b", default="def") # explicit None preserved
|
||||
>>> cfg_get({"a": {"b": False}}, "a", "b", default=True) # falsy values preserved
|
||||
False
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return default
|
||||
node: Any = cfg
|
||||
for key in keys:
|
||||
if not isinstance(node, dict):
|
||||
return default
|
||||
if key not in node:
|
||||
return default
|
||||
node = node[key]
|
||||
return node
|
||||
|
||||
|
||||
|
||||
def read_raw_config() -> Dict[str, Any]:
|
||||
"""Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
|
||||
@@ -3311,25 +3733,62 @@ def read_raw_config() -> Dict[str, Any]:
|
||||
be parsed. Use this for lightweight config reads where you just need a
|
||||
single value and don't want the overhead of ``load_config()``'s deep-merge
|
||||
+ migration pipeline.
|
||||
|
||||
Cached on the config file's (mtime_ns, size) — same strategy as
|
||||
``load_config()``. Returns a deepcopy on every call since some callers
|
||||
mutate the result before passing to ``save_config()``.
|
||||
"""
|
||||
try:
|
||||
config_path = get_config_path()
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
st = config_path.stat()
|
||||
cache_key = (st.st_mtime_ns, st.st_size)
|
||||
except (FileNotFoundError, OSError):
|
||||
return {}
|
||||
|
||||
path_key = str(config_path)
|
||||
cached = _RAW_CONFIG_CACHE.get(path_key)
|
||||
if cached is not None and cached[:2] == cache_key:
|
||||
return copy.deepcopy(cached[2])
|
||||
|
||||
try:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
return {}
|
||||
|
||||
if not isinstance(data, dict):
|
||||
data = {}
|
||||
_RAW_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(data))
|
||||
return data
|
||||
|
||||
|
||||
def load_config() -> Dict[str, Any]:
|
||||
"""Load configuration from ~/.hermes/config.yaml."""
|
||||
"""Load configuration from ~/.hermes/config.yaml.
|
||||
|
||||
Cached on the config file's (mtime_ns, size). Returns a deepcopy of
|
||||
the cached value when unchanged, since most call sites mutate the
|
||||
result (e.g. ``cfg["model"]["default"] = ...`` before ``save_config``).
|
||||
The cache is keyed on ``str(config_path)`` so profile switches
|
||||
(which change ``HERMES_HOME`` and therefore ``get_config_path()``)
|
||||
don't collide.
|
||||
"""
|
||||
ensure_hermes_home()
|
||||
config_path = get_config_path()
|
||||
|
||||
path_key = str(config_path)
|
||||
|
||||
try:
|
||||
st = config_path.stat()
|
||||
cache_key: Optional[Tuple[int, int]] = (st.st_mtime_ns, st.st_size)
|
||||
except FileNotFoundError:
|
||||
cache_key = None
|
||||
|
||||
cached = _LOAD_CONFIG_CACHE.get(path_key)
|
||||
if cached is not None and cache_key is not None and cached[:2] == cache_key:
|
||||
return copy.deepcopy(cached[2])
|
||||
|
||||
config = copy.deepcopy(DEFAULT_CONFIG)
|
||||
|
||||
if config_path.exists():
|
||||
|
||||
if cache_key is not None:
|
||||
try:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
@@ -3347,20 +3806,26 @@ def load_config() -> Dict[str, Any]:
|
||||
|
||||
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
|
||||
expanded = _expand_env_vars(normalized)
|
||||
_LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded)
|
||||
_LAST_EXPANDED_CONFIG_BY_PATH[path_key] = copy.deepcopy(expanded)
|
||||
if cache_key is not None:
|
||||
_LOAD_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(expanded))
|
||||
else:
|
||||
_LOAD_CONFIG_CACHE.pop(path_key, None)
|
||||
return expanded
|
||||
|
||||
|
||||
_SECURITY_COMMENT = """
|
||||
# ── Security ──────────────────────────────────────────────────────────
|
||||
# API keys, tokens, and passwords are redacted from tool output by default.
|
||||
# Set to false to see full values (useful for debugging auth issues).
|
||||
# Secret redaction is OFF by default — tool output (terminal stdout,
|
||||
# read_file results, web content) passes through unmodified. Set
|
||||
# redact_secrets to true to mask strings that look like API keys, tokens,
|
||||
# and passwords before they enter the model context and logs.
|
||||
# tirith pre-exec scanning is enabled by default when the tirith binary
|
||||
# is available. Configure via security.tirith_* keys or env vars
|
||||
# (TIRITH_ENABLED, TIRITH_BIN, TIRITH_TIMEOUT, TIRITH_FAIL_OPEN).
|
||||
#
|
||||
# security:
|
||||
# redact_secrets: false
|
||||
# redact_secrets: true
|
||||
# tirith_enabled: true
|
||||
# tirith_path: "tirith"
|
||||
# tirith_timeout: 5
|
||||
@@ -3393,11 +3858,11 @@ _FALLBACK_COMMENT = """
|
||||
|
||||
_COMMENTED_SECTIONS = """
|
||||
# ── Security ──────────────────────────────────────────────────────────
|
||||
# API keys, tokens, and passwords are redacted from tool output by default.
|
||||
# Set to false to see full values (useful for debugging auth issues).
|
||||
# Secret redaction is OFF by default. Set to true to mask strings that
|
||||
# look like API keys, tokens, and passwords in tool output and logs.
|
||||
#
|
||||
# security:
|
||||
# redact_secrets: false
|
||||
# redact_secrets: true
|
||||
|
||||
# ── Fallback Model ────────────────────────────────────────────────────
|
||||
# Automatic provider failover when primary is unavailable.
|
||||
@@ -3448,7 +3913,12 @@ def save_config(config: Dict[str, Any]):
|
||||
if not sec or sec.get("redact_secrets") is None:
|
||||
parts.append(_SECURITY_COMMENT)
|
||||
fb = normalized.get("fallback_model", {})
|
||||
if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
|
||||
fb_is_valid = False
|
||||
if isinstance(fb, list):
|
||||
fb_is_valid = any(isinstance(e, dict) and e.get("provider") and e.get("model") for e in fb)
|
||||
elif isinstance(fb, dict):
|
||||
fb_is_valid = bool(fb.get("provider") and fb.get("model"))
|
||||
if not fb_is_valid:
|
||||
parts.append(_FALLBACK_COMMENT)
|
||||
|
||||
atomic_yaml_write(
|
||||
@@ -3517,18 +3987,27 @@ def _sanitize_env_lines(lines: list) -> list:
|
||||
|
||||
# Detect concatenated KEY=VALUE pairs on one line.
|
||||
# Search for known KEY= patterns at any position in the line.
|
||||
split_positions = []
|
||||
# We collect full needle ranges so we can drop matches that are
|
||||
# fully contained within a longer overlapping needle. Without this,
|
||||
# suffix collisions corrupt the file: e.g. LM_API_KEY= inside
|
||||
# GLM_API_KEY= would otherwise split the line into "G\nLM_API_KEY=...".
|
||||
match_ranges: list[tuple[int, int]] = []
|
||||
for key_name in known_keys:
|
||||
needle = key_name + "="
|
||||
idx = stripped.find(needle)
|
||||
while idx >= 0:
|
||||
split_positions.append(idx)
|
||||
match_ranges.append((idx, idx + len(needle)))
|
||||
idx = stripped.find(needle, idx + len(needle))
|
||||
|
||||
split_positions = sorted({
|
||||
s for s, e in match_ranges
|
||||
if not any(
|
||||
s2 <= s and e2 >= e and (s2, e2) != (s, e)
|
||||
for s2, e2 in match_ranges
|
||||
)
|
||||
})
|
||||
|
||||
if len(split_positions) > 1:
|
||||
split_positions.sort()
|
||||
# Deduplicate (shouldn't happen, but be safe)
|
||||
split_positions = sorted(set(split_positions))
|
||||
for i, pos in enumerate(split_positions):
|
||||
end = split_positions[i + 1] if i + 1 < len(split_positions) else len(stripped)
|
||||
part = stripped[pos:end].strip()
|
||||
@@ -3574,7 +4053,7 @@ def sanitize_env_file() -> int:
|
||||
f.writelines(sanitized)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, env_path)
|
||||
atomic_replace(tmp_path, env_path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -3637,7 +4116,7 @@ def save_env_value(key: str, value: str):
|
||||
value = _check_non_ascii_credential(key, value)
|
||||
ensure_hermes_home()
|
||||
env_path = get_env_path()
|
||||
|
||||
|
||||
# On Windows, open() defaults to the system locale (cp1252) which can
|
||||
# cause OSError errno 22 on UTF-8 .env files.
|
||||
read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
|
||||
@@ -3649,7 +4128,7 @@ def save_env_value(key: str, value: str):
|
||||
lines = f.readlines()
|
||||
# Sanitize on every read: split concatenated keys, drop stale placeholders
|
||||
lines = _sanitize_env_lines(lines)
|
||||
|
||||
|
||||
# Find and update or append
|
||||
found = False
|
||||
for i, line in enumerate(lines):
|
||||
@@ -3657,7 +4136,7 @@ def save_env_value(key: str, value: str):
|
||||
lines[i] = f"{key}={value}\n"
|
||||
found = True
|
||||
break
|
||||
|
||||
|
||||
if not found:
|
||||
# Ensure there's a newline at the end of the file before appending
|
||||
if lines and not lines[-1].endswith("\n"):
|
||||
@@ -3677,7 +4156,7 @@ def save_env_value(key: str, value: str):
|
||||
f.writelines(lines)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, env_path)
|
||||
atomic_replace(tmp_path, env_path)
|
||||
# Restore original permissions before _secure_file may tighten them.
|
||||
if original_mode is not None:
|
||||
try:
|
||||
@@ -3733,7 +4212,7 @@ def remove_env_value(key: str) -> bool:
|
||||
f.writelines(new_lines)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, env_path)
|
||||
atomic_replace(tmp_path, env_path)
|
||||
if original_mode is not None:
|
||||
try:
|
||||
os.chmod(env_path, original_mode)
|
||||
@@ -3820,12 +4299,13 @@ def get_env_value(key: str) -> Optional[str]:
|
||||
# =============================================================================
|
||||
|
||||
def redact_key(key: str) -> str:
|
||||
"""Redact an API key for display."""
|
||||
if not key:
|
||||
return color("(not set)", Colors.DIM)
|
||||
if len(key) < 12:
|
||||
return "***"
|
||||
return key[:4] + "..." + key[-4:]
|
||||
"""Redact an API key for display.
|
||||
|
||||
Thin wrapper over :func:`agent.redact.mask_secret` — preserves the
|
||||
"(not set)" placeholder in dim color for the empty case.
|
||||
"""
|
||||
from agent.redact import mask_secret
|
||||
return mask_secret(key, empty=color("(not set)", Colors.DIM))
|
||||
|
||||
|
||||
def show_config():
|
||||
@@ -3905,6 +4385,9 @@ def show_config():
|
||||
print(f" Daytona image: {terminal.get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')}")
|
||||
daytona_key = get_env_value('DAYTONA_API_KEY')
|
||||
print(f" API key: {'configured' if daytona_key else '(not set)'}")
|
||||
elif terminal.get('backend') == 'vercel_sandbox':
|
||||
print(f" Vercel runtime: {terminal.get('vercel_runtime', 'node24')}")
|
||||
print(f" Vercel auth: {'configured' if get_env_value('VERCEL_OIDC_TOKEN') or (get_env_value('VERCEL_TOKEN') and get_env_value('VERCEL_PROJECT_ID') and get_env_value('VERCEL_TEAM_ID')) else '(not set)'}")
|
||||
elif terminal.get('backend') == 'ssh':
|
||||
ssh_host = get_env_value('TERMINAL_SSH_HOST')
|
||||
ssh_user = get_env_value('TERMINAL_SSH_USER')
|
||||
@@ -4062,15 +4545,11 @@ def set_config_value(key: str, value: str):
|
||||
except Exception:
|
||||
user_config = {}
|
||||
|
||||
# Handle nested keys (e.g., "tts.provider")
|
||||
parts = key.split('.')
|
||||
current = user_config
|
||||
|
||||
for part in parts[:-1]:
|
||||
if part not in current or not isinstance(current.get(part), dict):
|
||||
current[part] = {}
|
||||
current = current[part]
|
||||
|
||||
# Handle nested keys (e.g., "tts.provider") including numeric list
|
||||
# indices (e.g., "custom_providers.0.api_key"). Delegates to
|
||||
# _set_nested which preserves list-typed nodes; before #17876 the
|
||||
# inline navigation here silently overwrote lists with dicts.
|
||||
|
||||
# Convert value to appropriate type
|
||||
if value.lower() in ('true', 'yes', 'on'):
|
||||
value = True
|
||||
@@ -4080,8 +4559,8 @@ def set_config_value(key: str, value: str):
|
||||
value = int(value)
|
||||
elif value.replace('.', '', 1).isdigit():
|
||||
value = float(value)
|
||||
|
||||
current[parts[-1]] = value
|
||||
|
||||
_set_nested(user_config, key, value)
|
||||
|
||||
# Write only user config back (not the full merged defaults)
|
||||
ensure_hermes_home()
|
||||
@@ -4097,7 +4576,9 @@ def set_config_value(key: str, value: str):
|
||||
"terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
|
||||
"terminal.modal_image": "TERMINAL_MODAL_IMAGE",
|
||||
"terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
|
||||
"terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
|
||||
"terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"terminal.cwd": "TERMINAL_CWD",
|
||||
"terminal.timeout": "TERMINAL_TIMEOUT",
|
||||
"terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
|
||||
@@ -0,0 +1,244 @@
|
||||
"""CLI subcommand: `hermes curator <subcommand>`.
|
||||
|
||||
Thin shell around agent/curator.py and tools/skill_usage.py. Renders a status
|
||||
table, triggers a run, pauses/resumes, and pins/unpins skills.
|
||||
|
||||
This module intentionally has no side effects at import time — main.py wires
|
||||
the argparse subparsers on demand.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _fmt_ts(ts: Optional[str]) -> str:
|
||||
if not ts:
|
||||
return "never"
|
||||
try:
|
||||
dt = datetime.fromisoformat(ts)
|
||||
except (TypeError, ValueError):
|
||||
return str(ts)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
delta = datetime.now(timezone.utc) - dt
|
||||
secs = int(delta.total_seconds())
|
||||
if secs < 60:
|
||||
return f"{secs}s ago"
|
||||
if secs < 3600:
|
||||
return f"{secs // 60}m ago"
|
||||
if secs < 86400:
|
||||
return f"{secs // 3600}h ago"
|
||||
return f"{secs // 86400}d ago"
|
||||
|
||||
|
||||
def _cmd_status(args) -> int:
|
||||
from agent import curator
|
||||
from tools import skill_usage
|
||||
|
||||
state = curator.load_state()
|
||||
enabled = curator.is_enabled()
|
||||
paused = state.get("paused", False)
|
||||
last_run = state.get("last_run_at")
|
||||
summary = state.get("last_run_summary") or "(none)"
|
||||
runs = state.get("run_count", 0)
|
||||
|
||||
status_line = (
|
||||
"ENABLED" if enabled and not paused else
|
||||
"PAUSED" if paused else
|
||||
"DISABLED"
|
||||
)
|
||||
print(f"curator: {status_line}")
|
||||
print(f" runs: {runs}")
|
||||
print(f" last run: {_fmt_ts(last_run)}")
|
||||
print(f" last summary: {summary}")
|
||||
_report = state.get("last_report_path")
|
||||
if _report:
|
||||
print(f" last report: {_report}")
|
||||
_ih = curator.get_interval_hours()
|
||||
_interval_label = (
|
||||
f"{_ih // 24}d" if _ih % 24 == 0 and _ih >= 24
|
||||
else f"{_ih}h"
|
||||
)
|
||||
print(f" interval: every {_interval_label}")
|
||||
print(f" stale after: {curator.get_stale_after_days()}d unused")
|
||||
print(f" archive after: {curator.get_archive_after_days()}d unused")
|
||||
|
||||
rows = skill_usage.agent_created_report()
|
||||
if not rows:
|
||||
print("\nno agent-created skills")
|
||||
return 0
|
||||
|
||||
by_state = {"active": [], "stale": [], "archived": []}
|
||||
pinned = []
|
||||
for r in rows:
|
||||
state_name = r.get("state", "active")
|
||||
by_state.setdefault(state_name, []).append(r)
|
||||
if r.get("pinned"):
|
||||
pinned.append(r["name"])
|
||||
|
||||
print(f"\nagent-created skills: {len(rows)} total")
|
||||
for state_name in ("active", "stale", "archived"):
|
||||
bucket = by_state.get(state_name, [])
|
||||
print(f" {state_name:10s} {len(bucket)}")
|
||||
|
||||
if pinned:
|
||||
print(f"\npinned ({len(pinned)}): {', '.join(pinned)}")
|
||||
|
||||
# Show top 5 least-recently-active skills. Views and edits are activity too:
|
||||
# curator should not report a skill as "never used" right after skill_view()
|
||||
# or skill_manage() touched it.
|
||||
active = sorted(
|
||||
by_state.get("active", []),
|
||||
key=lambda r: r.get("last_activity_at") or r.get("created_at") or "",
|
||||
)[:5]
|
||||
if active:
|
||||
print("\nleast recently active (top 5):")
|
||||
for r in active:
|
||||
last = _fmt_ts(r.get("last_activity_at"))
|
||||
print(
|
||||
f" {r['name']:40s} "
|
||||
f"activity={r.get('activity_count', 0):3d} "
|
||||
f"use={r.get('use_count', 0):3d} "
|
||||
f"view={r.get('view_count', 0):3d} "
|
||||
f"patches={r.get('patch_count', 0):3d} "
|
||||
f"last_activity={last}"
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_run(args) -> int:
|
||||
from agent import curator
|
||||
if not curator.is_enabled():
|
||||
print("curator: disabled via config; enable with `curator.enabled: true`")
|
||||
return 1
|
||||
|
||||
print("curator: running review pass...")
|
||||
|
||||
def _on_summary(msg: str) -> None:
|
||||
print(msg)
|
||||
|
||||
result = curator.run_curator_review(
|
||||
on_summary=_on_summary,
|
||||
synchronous=bool(args.synchronous),
|
||||
)
|
||||
auto = result.get("auto_transitions", {})
|
||||
if auto:
|
||||
print(
|
||||
f"auto: checked={auto.get('checked', 0)} "
|
||||
f"stale={auto.get('marked_stale', 0)} "
|
||||
f"archived={auto.get('archived', 0)} "
|
||||
f"reactivated={auto.get('reactivated', 0)}"
|
||||
)
|
||||
if not args.synchronous:
|
||||
print("llm pass running in background — check `hermes curator status` later")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_pause(args) -> int:
|
||||
from agent import curator
|
||||
curator.set_paused(True)
|
||||
print("curator: paused")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_resume(args) -> int:
|
||||
from agent import curator
|
||||
curator.set_paused(False)
|
||||
print("curator: resumed")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_pin(args) -> int:
|
||||
from tools import skill_usage
|
||||
if not skill_usage.is_agent_created(args.skill):
|
||||
print(
|
||||
f"curator: '{args.skill}' is bundled or hub-installed — cannot pin "
|
||||
"(only agent-created skills participate in curation)"
|
||||
)
|
||||
return 1
|
||||
skill_usage.set_pinned(args.skill, True)
|
||||
print(f"curator: pinned '{args.skill}' (will bypass auto-transitions)")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_unpin(args) -> int:
|
||||
from tools import skill_usage
|
||||
if not skill_usage.is_agent_created(args.skill):
|
||||
print(
|
||||
f"curator: '{args.skill}' is bundled or hub-installed — "
|
||||
"there's nothing to unpin (curator only tracks agent-created skills)"
|
||||
)
|
||||
return 1
|
||||
skill_usage.set_pinned(args.skill, False)
|
||||
print(f"curator: unpinned '{args.skill}'")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_restore(args) -> int:
|
||||
from tools import skill_usage
|
||||
ok, msg = skill_usage.restore_skill(args.skill)
|
||||
print(f"curator: {msg}")
|
||||
return 0 if ok else 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# argparse wiring (called from hermes_cli.main)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def register_cli(parent: argparse.ArgumentParser) -> None:
|
||||
"""Attach `curator` subcommands to *parent*.
|
||||
|
||||
main.py calls this with the ArgumentParser returned by
|
||||
``subparsers.add_parser("curator", ...)``.
|
||||
"""
|
||||
parent.set_defaults(func=lambda a: (parent.print_help(), 0)[1])
|
||||
subs = parent.add_subparsers(dest="curator_command")
|
||||
|
||||
p_status = subs.add_parser("status", help="Show curator status and skill stats")
|
||||
p_status.set_defaults(func=_cmd_status)
|
||||
|
||||
p_run = subs.add_parser("run", help="Trigger a curator review now")
|
||||
p_run.add_argument(
|
||||
"--sync", "--synchronous", dest="synchronous", action="store_true",
|
||||
help="Wait for the LLM review pass to finish (default: background thread)",
|
||||
)
|
||||
p_run.set_defaults(func=_cmd_run)
|
||||
|
||||
p_pause = subs.add_parser("pause", help="Pause the curator until resumed")
|
||||
p_pause.set_defaults(func=_cmd_pause)
|
||||
|
||||
p_resume = subs.add_parser("resume", help="Resume a paused curator")
|
||||
p_resume.set_defaults(func=_cmd_resume)
|
||||
|
||||
p_pin = subs.add_parser("pin", help="Pin a skill so the curator never auto-transitions it")
|
||||
p_pin.add_argument("skill", help="Skill name")
|
||||
p_pin.set_defaults(func=_cmd_pin)
|
||||
|
||||
p_unpin = subs.add_parser("unpin", help="Unpin a skill")
|
||||
p_unpin.add_argument("skill", help="Skill name")
|
||||
p_unpin.set_defaults(func=_cmd_unpin)
|
||||
|
||||
p_restore = subs.add_parser("restore", help="Restore an archived skill")
|
||||
p_restore.add_argument("skill", help="Skill name")
|
||||
p_restore.set_defaults(func=_cmd_restore)
|
||||
|
||||
|
||||
def cli_main(argv=None) -> int:
|
||||
"""Standalone entry (also usable by hermes_cli.main fallthrough)."""
|
||||
parser = argparse.ArgumentParser(prog="hermes curator")
|
||||
register_cli(parser)
|
||||
args = parser.parse_args(argv)
|
||||
fn = getattr(args, "func", None)
|
||||
if fn is None:
|
||||
parser.print_help()
|
||||
return 0
|
||||
return int(fn(args) or 0)
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
sys.exit(cli_main())
|
||||
+2
-2
@@ -7,7 +7,6 @@ Currently supports:
|
||||
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
@@ -18,6 +17,7 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -79,7 +79,7 @@ def _save_pending(entries: list[dict]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(".json.tmp")
|
||||
tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
|
||||
os.replace(tmp, path)
|
||||
atomic_replace(tmp, path)
|
||||
except OSError:
|
||||
# Non-fatal — worst case the user has to run ``hermes debug delete``
|
||||
# manually.
|
||||
|
||||
@@ -13,7 +13,6 @@ automatically.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
+172
-22
@@ -8,6 +8,7 @@ import os
|
||||
import sys
|
||||
import subprocess
|
||||
import shutil
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
|
||||
@@ -30,6 +31,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.models import _HERMES_USER_AGENT
|
||||
from hermes_cli.vercel_auth import describe_vercel_auth
|
||||
from hermes_constants import OPENROUTER_MODELS_URL
|
||||
from utils import base_url_host_matches
|
||||
|
||||
@@ -57,6 +59,7 @@ _PROVIDER_ENV_HINTS = (
|
||||
"OPENCODE_ZEN_API_KEY",
|
||||
"OPENCODE_GO_API_KEY",
|
||||
"XIAOMI_API_KEY",
|
||||
"TOKENHUB_API_KEY",
|
||||
)
|
||||
|
||||
|
||||
@@ -75,6 +78,14 @@ def _system_package_install_cmd(pkg: str) -> str:
|
||||
return f"sudo apt install {pkg}"
|
||||
|
||||
|
||||
def _safe_which(cmd: str) -> str | None:
|
||||
"""shutil.which wrapper resilient to platform monkeypatching in tests."""
|
||||
try:
|
||||
return shutil.which(cmd)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _termux_browser_setup_steps(node_installed: bool) -> list[str]:
|
||||
steps: list[str] = []
|
||||
step = 1
|
||||
@@ -292,15 +303,23 @@ def run_doctor(args):
|
||||
|
||||
known_providers: set = set()
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
from hermes_cli.auth import (
|
||||
PROVIDER_REGISTRY,
|
||||
resolve_provider as _resolve_auth_provider,
|
||||
)
|
||||
known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"}
|
||||
except Exception:
|
||||
_resolve_auth_provider = None
|
||||
pass
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
|
||||
from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
|
||||
from hermes_cli.providers import (
|
||||
normalize_provider as _normalize_catalog_provider,
|
||||
resolve_provider_full as _resolve_provider_full,
|
||||
)
|
||||
except Exception:
|
||||
_compatible_custom_providers = None
|
||||
_normalize_catalog_provider = None
|
||||
_resolve_provider_full = None
|
||||
|
||||
custom_providers = []
|
||||
@@ -320,17 +339,43 @@ def run_doctor(args):
|
||||
if name:
|
||||
known_providers.add("custom:" + name.lower().replace(" ", "-"))
|
||||
|
||||
canonical_provider = provider
|
||||
valid_provider_ids = set(known_providers)
|
||||
provider_ids_to_accept = {provider} if provider else set()
|
||||
if _normalize_catalog_provider is not None:
|
||||
for known_provider in known_providers:
|
||||
try:
|
||||
valid_provider_ids.add(_normalize_catalog_provider(known_provider))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
runtime_provider = provider
|
||||
if (
|
||||
provider
|
||||
and _resolve_auth_provider is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
try:
|
||||
runtime_provider = _resolve_auth_provider(provider)
|
||||
provider_ids_to_accept.add(runtime_provider)
|
||||
except Exception:
|
||||
runtime_provider = provider
|
||||
|
||||
catalog_provider = provider
|
||||
if (
|
||||
provider
|
||||
and _resolve_provider_full is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
|
||||
canonical_provider = provider_def.id if provider_def is not None else None
|
||||
catalog_provider = provider_def.id if provider_def is not None else None
|
||||
if catalog_provider is not None:
|
||||
provider_ids_to_accept.add(catalog_provider)
|
||||
|
||||
if provider and provider != "auto":
|
||||
if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
|
||||
if catalog_provider is None or (
|
||||
known_providers
|
||||
and not (provider_ids_to_accept & valid_provider_ids)
|
||||
):
|
||||
known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)"
|
||||
check_fail(
|
||||
f"model.provider '{provider_raw}' is not a recognised provider",
|
||||
@@ -343,7 +388,24 @@ def run_doctor(args):
|
||||
)
|
||||
|
||||
# Warn if model is set to a provider-prefixed name on a provider that doesn't use them
|
||||
if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"):
|
||||
provider_for_policy = runtime_provider or catalog_provider
|
||||
providers_accepting_vendor_slugs = {
|
||||
"openrouter",
|
||||
"custom",
|
||||
"auto",
|
||||
"ai-gateway",
|
||||
"kilocode",
|
||||
"opencode-zen",
|
||||
"huggingface",
|
||||
"lmstudio",
|
||||
"nous",
|
||||
}
|
||||
if (
|
||||
default_model
|
||||
and "/" in default_model
|
||||
and provider_for_policy
|
||||
and provider_for_policy not in providers_accepting_vendor_slugs
|
||||
):
|
||||
check_warn(
|
||||
f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
|
||||
"(vendor-prefixed slugs belong to aggregators like openrouter)",
|
||||
@@ -359,20 +421,24 @@ def run_doctor(args):
|
||||
# own env-var checks elsewhere in doctor, and get_auth_status()
|
||||
# returns a bare {logged_in: False} for anything it doesn't
|
||||
# explicitly dispatch, which would produce false positives.
|
||||
if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"):
|
||||
if runtime_provider and runtime_provider not in ("auto", "custom", "openrouter"):
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
|
||||
pconfig = PROVIDER_REGISTRY.get(canonical_provider)
|
||||
pconfig = PROVIDER_REGISTRY.get(runtime_provider)
|
||||
if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
|
||||
status = get_auth_status(canonical_provider) or {}
|
||||
configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key"))
|
||||
status = get_auth_status(runtime_provider) or {}
|
||||
configured = bool(
|
||||
status.get("configured")
|
||||
or status.get("logged_in")
|
||||
or status.get("api_key")
|
||||
)
|
||||
if not configured:
|
||||
check_fail(
|
||||
f"model.provider '{canonical_provider}' is set but no API key is configured",
|
||||
f"model.provider '{runtime_provider}' is set but no API key is configured",
|
||||
"(check ~/.hermes/.env or run 'hermes setup')",
|
||||
)
|
||||
issues.append(
|
||||
f"No credentials found for provider '{canonical_provider}'. "
|
||||
f"No credentials found for provider '{runtime_provider}'. "
|
||||
f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
|
||||
f"or switch providers with 'hermes config set model.provider <name>'"
|
||||
)
|
||||
@@ -481,6 +547,7 @@ def run_doctor(args):
|
||||
get_nous_auth_status,
|
||||
get_codex_auth_status,
|
||||
get_gemini_oauth_auth_status,
|
||||
get_minimax_oauth_auth_status,
|
||||
)
|
||||
|
||||
nous_status = get_nous_auth_status()
|
||||
@@ -510,13 +577,27 @@ def run_doctor(args):
|
||||
check_ok("Google Gemini OAuth", f"(logged in{suffix})")
|
||||
else:
|
||||
check_warn("Google Gemini OAuth", "(not logged in)")
|
||||
|
||||
minimax_status = get_minimax_oauth_auth_status()
|
||||
if minimax_status.get("logged_in"):
|
||||
region = minimax_status.get("region", "global")
|
||||
check_ok("MiniMax OAuth", f"(logged in, region={region})")
|
||||
else:
|
||||
check_warn("MiniMax OAuth", "(not logged in)")
|
||||
except Exception as e:
|
||||
check_warn("Auth provider status", f"(could not check: {e})")
|
||||
|
||||
if shutil.which("codex"):
|
||||
if _safe_which("codex"):
|
||||
check_ok("codex CLI")
|
||||
else:
|
||||
check_warn("codex CLI not found", "(required for openai-codex login)")
|
||||
# Native OAuth uses Hermes' own device-code flow — the Codex CLI is
|
||||
# only needed if you want to import existing tokens from
|
||||
# ~/.codex/auth.json. Downgrade to info so users running
|
||||
# `hermes auth openai-codex` aren't told they're missing something.
|
||||
check_info(
|
||||
"codex CLI not installed "
|
||||
"(optional — only required to import tokens from an existing Codex CLI login)"
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Check: Directory structure
|
||||
@@ -724,13 +805,13 @@ def run_doctor(args):
|
||||
print(color("◆ External Tools", Colors.CYAN, Colors.BOLD))
|
||||
|
||||
# Git
|
||||
if shutil.which("git"):
|
||||
if _safe_which("git"):
|
||||
check_ok("git")
|
||||
else:
|
||||
check_warn("git not found", "(optional)")
|
||||
|
||||
# ripgrep (optional, for faster file search)
|
||||
if shutil.which("rg"):
|
||||
if _safe_which("rg"):
|
||||
check_ok("ripgrep (rg)", "(faster file search)")
|
||||
else:
|
||||
check_warn("ripgrep (rg) not found", "(file search uses grep fallback)")
|
||||
@@ -739,7 +820,7 @@ def run_doctor(args):
|
||||
# Docker (optional)
|
||||
terminal_env = os.getenv("TERMINAL_ENV", "local")
|
||||
if terminal_env == "docker":
|
||||
if shutil.which("docker"):
|
||||
if _safe_which("docker"):
|
||||
# Check if docker daemon is running
|
||||
try:
|
||||
result = subprocess.run(["docker", "info"], capture_output=True, timeout=10)
|
||||
@@ -754,7 +835,7 @@ def run_doctor(args):
|
||||
check_fail("docker not found", "(required for TERMINAL_ENV=docker)")
|
||||
issues.append("Install Docker or change TERMINAL_ENV")
|
||||
else:
|
||||
if shutil.which("docker"):
|
||||
if _safe_which("docker"):
|
||||
check_ok("docker", "(optional)")
|
||||
else:
|
||||
if _is_termux():
|
||||
@@ -800,8 +881,52 @@ def run_doctor(args):
|
||||
check_fail("daytona SDK not installed", "(pip install daytona)")
|
||||
issues.append("Install daytona SDK: pip install daytona")
|
||||
|
||||
# Vercel Sandbox (if using vercel_sandbox backend)
|
||||
if terminal_env == "vercel_sandbox":
|
||||
runtime = os.getenv("TERMINAL_VERCEL_RUNTIME", "node24").strip() or "node24"
|
||||
from tools.terminal_tool import _SUPPORTED_VERCEL_RUNTIMES
|
||||
if runtime in _SUPPORTED_VERCEL_RUNTIMES:
|
||||
check_ok("Vercel runtime", f"({runtime})")
|
||||
else:
|
||||
supported = ", ".join(_SUPPORTED_VERCEL_RUNTIMES)
|
||||
check_fail("Vercel runtime unsupported", f"({runtime}; use {supported})")
|
||||
issues.append(f"Set TERMINAL_VERCEL_RUNTIME to one of: {supported}")
|
||||
|
||||
disk = os.getenv("TERMINAL_CONTAINER_DISK", "51200").strip()
|
||||
if disk in ("", "0", "51200"):
|
||||
check_ok("Vercel disk setting", "(uses platform default)")
|
||||
else:
|
||||
check_fail("Vercel custom disk unsupported", "(reset terminal.container_disk to 51200)")
|
||||
issues.append("Vercel Sandbox does not support custom container_disk; use the shared default 51200")
|
||||
|
||||
if importlib.util.find_spec("vercel") is not None:
|
||||
check_ok("vercel SDK", "(installed)")
|
||||
else:
|
||||
check_fail("vercel SDK not installed", "(pip install 'hermes-agent[vercel]')")
|
||||
issues.append("Install the Vercel optional dependency: pip install 'hermes-agent[vercel]'")
|
||||
|
||||
auth_status = describe_vercel_auth()
|
||||
if auth_status.ok:
|
||||
check_ok("Vercel auth", f"({auth_status.label})")
|
||||
elif auth_status.label.startswith("partial"):
|
||||
check_fail("Vercel auth incomplete", f"({auth_status.label})")
|
||||
issues.append("Set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID together")
|
||||
else:
|
||||
check_fail("Vercel auth not configured", f"({auth_status.label})")
|
||||
issues.append(
|
||||
"Configure Vercel Sandbox auth with VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID"
|
||||
)
|
||||
for line in auth_status.detail_lines:
|
||||
check_info(f"Vercel auth {line}")
|
||||
|
||||
persistent = os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("1", "true", "yes", "on")
|
||||
if persistent:
|
||||
check_info("Vercel persistence: snapshot filesystem only; live processes do not survive sandbox recreation")
|
||||
else:
|
||||
check_info("Vercel persistence: ephemeral filesystem")
|
||||
|
||||
# Node.js + agent-browser (for browser automation tools)
|
||||
if shutil.which("node"):
|
||||
if _safe_which("node"):
|
||||
check_ok("Node.js")
|
||||
# Check if agent-browser is installed
|
||||
agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
|
||||
@@ -827,7 +952,7 @@ def run_doctor(args):
|
||||
check_warn("Node.js not found", "(optional, needed for browser tools)")
|
||||
|
||||
# npm audit for all Node.js packages
|
||||
if shutil.which("npm"):
|
||||
if _safe_which("npm"):
|
||||
npm_dirs = [
|
||||
(PROJECT_ROOT, "Browser tools (agent-browser)"),
|
||||
(PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"),
|
||||
@@ -906,10 +1031,16 @@ def run_doctor(args):
|
||||
print(" Checking Anthropic API...", end="", flush=True)
|
||||
try:
|
||||
import httpx
|
||||
from agent.anthropic_adapter import _is_oauth_token, _COMMON_BETAS, _OAUTH_ONLY_BETAS
|
||||
from agent.anthropic_adapter import (
|
||||
_is_oauth_token,
|
||||
_COMMON_BETAS,
|
||||
_OAUTH_ONLY_BETAS,
|
||||
_CONTEXT_1M_BETA,
|
||||
)
|
||||
|
||||
headers = {"anthropic-version": "2023-06-01"}
|
||||
if _is_oauth_token(anthropic_key):
|
||||
is_oauth = _is_oauth_token(anthropic_key)
|
||||
if is_oauth:
|
||||
headers["Authorization"] = f"Bearer {anthropic_key}"
|
||||
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
|
||||
else:
|
||||
@@ -919,6 +1050,25 @@ def run_doctor(args):
|
||||
headers=headers,
|
||||
timeout=10
|
||||
)
|
||||
# Reactive recovery: OAuth subscriptions that don't include 1M
|
||||
# context reject the request with 400 "long context beta is not
|
||||
# yet available for this subscription". Retry once with that
|
||||
# beta stripped so the doctor check doesn't falsely report the
|
||||
# Anthropic API as unreachable for those users.
|
||||
if (
|
||||
is_oauth
|
||||
and response.status_code == 400
|
||||
and "long context beta" in response.text.lower()
|
||||
and "not yet available" in response.text.lower()
|
||||
):
|
||||
headers["anthropic-beta"] = ",".join(
|
||||
[b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS)
|
||||
)
|
||||
response = httpx.get(
|
||||
"https://api.anthropic.com/v1/models",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
print(f"\r {color('✓', Colors.GREEN)} Anthropic API ")
|
||||
elif response.status_code == 401:
|
||||
|
||||
+8
-6
@@ -33,12 +33,14 @@ def _get_git_commit(project_root: Path) -> str:
|
||||
|
||||
|
||||
def _redact(value: str) -> str:
|
||||
"""Redact all but first 4 and last 4 chars."""
|
||||
if not value:
|
||||
return ""
|
||||
if len(value) < 12:
|
||||
return "***"
|
||||
return value[:4] + "..." + value[-4:]
|
||||
"""Redact all but first 4 and last 4 chars.
|
||||
|
||||
Thin wrapper over :func:`agent.redact.mask_secret`. Returns ``""`` for
|
||||
an empty value (matches the historical behavior of this helper —
|
||||
``hermes dump`` formats empty values as blank, not as ``"(not set)"``).
|
||||
"""
|
||||
from agent.redact import mask_secret
|
||||
return mask_secret(value)
|
||||
|
||||
|
||||
def _gateway_status() -> str:
|
||||
|
||||
@@ -7,6 +7,7 @@ import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
# Env var name suffixes that indicate credential values. These are the
|
||||
@@ -127,7 +128,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
|
||||
f.writelines(sanitized)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp, path)
|
||||
atomic_replace(tmp, path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp)
|
||||
|
||||
+198
-52
@@ -279,9 +279,11 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
|
||||
["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
errors="ignore",
|
||||
timeout=10,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
if result.returncode != 0 or result.stdout is None:
|
||||
return []
|
||||
current_cmd = ""
|
||||
for line in result.stdout.split("\n"):
|
||||
@@ -830,6 +832,22 @@ def _user_dbus_socket_path() -> Path:
|
||||
return Path(xdg) / "bus"
|
||||
|
||||
|
||||
def _user_systemd_private_socket_path() -> Path:
|
||||
"""Return the per-user systemd private socket path (regardless of existence)."""
|
||||
xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
|
||||
return Path(xdg) / "systemd" / "private"
|
||||
|
||||
|
||||
def _user_systemd_socket_ready() -> bool:
|
||||
"""Return True when user-scope systemd has a reachable control socket.
|
||||
|
||||
Some distros expose only the per-user systemd private socket even when the
|
||||
D-Bus session bus socket is absent. ``systemctl --user`` can still work in
|
||||
that configuration, so preflight checks must treat either socket as valid.
|
||||
"""
|
||||
return _user_dbus_socket_path().exists() or _user_systemd_private_socket_path().exists()
|
||||
|
||||
|
||||
def _ensure_user_systemd_env() -> None:
|
||||
"""Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.
|
||||
|
||||
@@ -853,28 +871,29 @@ def _ensure_user_systemd_env() -> None:
|
||||
|
||||
|
||||
def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
|
||||
"""Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
|
||||
"""Poll for the user systemd runtime socket(s), up to ``timeout`` seconds.
|
||||
|
||||
Linger-enabled user@.service can take a second or two to spawn the socket
|
||||
after ``loginctl enable-linger`` runs. Returns True once the socket exists.
|
||||
Linger-enabled user@.service can take a second or two to spawn its control
|
||||
socket(s) after ``loginctl enable-linger`` runs. Returns True once either
|
||||
the user D-Bus socket or the per-user systemd private socket exists.
|
||||
"""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
if _user_dbus_socket_path().exists():
|
||||
if _user_systemd_socket_ready():
|
||||
_ensure_user_systemd_env()
|
||||
return True
|
||||
time.sleep(0.2)
|
||||
return _user_dbus_socket_path().exists()
|
||||
return _user_systemd_socket_ready()
|
||||
|
||||
|
||||
def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
|
||||
"""Ensure ``systemctl --user`` will reach the user D-Bus session bus.
|
||||
"""Ensure ``systemctl --user`` will reach the user-scope systemd instance.
|
||||
|
||||
No-op when the bus socket is already there (the common case on desktops
|
||||
and linger-enabled servers). On fresh SSH sessions where the socket is
|
||||
missing:
|
||||
No-op when the user D-Bus socket or per-user systemd private socket is
|
||||
already there (the common case on desktops and linger-enabled servers). On
|
||||
fresh SSH sessions where both are missing:
|
||||
|
||||
* If linger is already enabled, wait briefly for user@.service to spawn
|
||||
the socket.
|
||||
@@ -888,8 +907,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
|
||||
systemd operations and surface the message to the user.
|
||||
"""
|
||||
_ensure_user_systemd_env()
|
||||
bus_path = _user_dbus_socket_path()
|
||||
if bus_path.exists():
|
||||
if _user_systemd_socket_ready():
|
||||
return
|
||||
|
||||
import getpass
|
||||
@@ -903,7 +921,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
|
||||
# Linger is on but socket still missing — unusual; fall through to error.
|
||||
_raise_user_systemd_unavailable(
|
||||
username,
|
||||
reason="User D-Bus socket is missing even though linger is enabled.",
|
||||
reason="User systemd control sockets are missing even though linger is enabled.",
|
||||
fix_hint=(
|
||||
f" systemctl start user@{os.getuid()}.service\n"
|
||||
" (may require sudo; try again after the command succeeds)"
|
||||
@@ -2350,7 +2368,11 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
|
||||
# Exit with code 1 if gateway fails to connect any platform,
|
||||
# so systemd Restart=on-failure will retry on transient errors
|
||||
verbosity = None if quiet else verbose
|
||||
success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
|
||||
try:
|
||||
success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
|
||||
except KeyboardInterrupt:
|
||||
print("\nGateway stopped.")
|
||||
return
|
||||
if not success:
|
||||
sys.exit(1)
|
||||
|
||||
@@ -2743,15 +2765,77 @@ _PLATFORMS = [
|
||||
],
|
||||
},
|
||||
]
|
||||
def _all_platforms() -> list[dict]:
|
||||
"""Return the full list of platforms for setup menus.
|
||||
|
||||
Combines the built-in ``_PLATFORMS`` with plugin platforms registered via
|
||||
``platform_registry``. Plugins are discovered on first call so bundled
|
||||
platforms (like IRC, which auto-load via ``kind: platform``) appear in
|
||||
``hermes setup gateway`` without needing the gateway to be running.
|
||||
Built-ins keep their dict shape; plugin entries are adapted to the same
|
||||
shape with ``_registry_entry`` holding the source.
|
||||
"""
|
||||
# Populate the registry so plugin platforms are visible. Idempotent.
|
||||
# Bundled platform plugins (``kind: platform``) auto-load unconditionally,
|
||||
# so every shipped messaging channel appears in the setup menu by default.
|
||||
# User-installed platform plugins under ~/.hermes/plugins/ still require
|
||||
# opt-in via ``plugins.enabled`` (untrusted code).
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
discover_plugins()
|
||||
except Exception as e:
|
||||
logger.debug("plugin discovery failed during platform enumeration: %s", e)
|
||||
|
||||
platforms = [dict(p) for p in _PLATFORMS]
|
||||
by_key = {p["key"]: p for p in platforms}
|
||||
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
except Exception:
|
||||
return platforms
|
||||
|
||||
for entry in platform_registry.all_entries():
|
||||
if entry.name in by_key:
|
||||
continue # built-in already covers it
|
||||
platforms.append({
|
||||
"key": entry.name,
|
||||
"label": entry.label,
|
||||
"emoji": entry.emoji,
|
||||
"token_var": entry.required_env[0] if entry.required_env else "",
|
||||
"install_hint": entry.install_hint,
|
||||
"_registry_entry": entry,
|
||||
})
|
||||
return platforms
|
||||
|
||||
|
||||
def _platform_status(platform: dict) -> str:
|
||||
"""Return a plain-text status string for a platform.
|
||||
|
||||
Returns uncolored text so it can safely be embedded in
|
||||
simple_term_menu items (ANSI codes break width calculation).
|
||||
curses menu items (ANSI codes break width calculation).
|
||||
"""
|
||||
token_var = platform["token_var"]
|
||||
entry = platform.get("_registry_entry")
|
||||
if entry is not None:
|
||||
configured = False
|
||||
# Prefer is_connected (checks both env and config.yaml) over
|
||||
# check_fn (typically just dependency / env presence).
|
||||
if entry.is_connected is not None:
|
||||
try:
|
||||
from gateway.config import PlatformConfig
|
||||
synthetic = PlatformConfig(enabled=True)
|
||||
configured = bool(entry.is_connected(synthetic))
|
||||
except Exception:
|
||||
configured = False
|
||||
if not configured:
|
||||
try:
|
||||
configured = bool(entry.check_fn())
|
||||
except Exception:
|
||||
configured = False
|
||||
return "configured" if configured else "not configured"
|
||||
|
||||
token_var = platform.get("token_var", "")
|
||||
if not token_var:
|
||||
return "not configured"
|
||||
val = get_env_value(token_var)
|
||||
if token_var == "WHATSAPP_ENABLED":
|
||||
if val and val.lower() == "true":
|
||||
@@ -2953,7 +3037,7 @@ def _setup_sms():
|
||||
def _setup_dingtalk():
|
||||
"""Configure DingTalk — QR scan (recommended) or manual credential entry."""
|
||||
from hermes_cli.setup import (
|
||||
prompt_choice, prompt_yes_no, print_info, print_success, print_warning,
|
||||
prompt_choice, prompt_yes_no, print_success, print_warning,
|
||||
)
|
||||
|
||||
dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk")
|
||||
@@ -3277,6 +3361,12 @@ def _setup_weixin():
|
||||
print_warning(" Direct messages disabled.")
|
||||
|
||||
print()
|
||||
print_info(" Note: QR login connects an iLink bot identity (e.g. ...@im.bot), not a")
|
||||
print_info(" scriptable personal WeChat account. Ordinary WeChat groups typically cannot")
|
||||
print_info(" invite an @im.bot identity, and iLink does not deliver ordinary-group events")
|
||||
print_info(" to most bot accounts. The settings below only apply when iLink actually")
|
||||
print_info(" delivers group events for your account type — otherwise DM remains the only")
|
||||
print_info(" working channel regardless of this choice.")
|
||||
group_choices = [
|
||||
"Disable group chats (recommended)",
|
||||
"Allow all group chats",
|
||||
@@ -3290,12 +3380,12 @@ def _setup_weixin():
|
||||
elif group_idx == 1:
|
||||
save_env_value("WEIXIN_GROUP_POLICY", "open")
|
||||
save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
|
||||
print_warning(" All group chats enabled.")
|
||||
print_warning(" All group chats enabled (only takes effect if iLink delivers group events).")
|
||||
else:
|
||||
allow_groups = prompt(" Allowed group chat IDs (comma-separated)", "", password=False).replace(" ", "")
|
||||
allow_groups = prompt(" Allowed group chat IDs (comma-separated, not member user IDs)", "", password=False).replace(" ", "")
|
||||
save_env_value("WEIXIN_GROUP_POLICY", "allowlist")
|
||||
save_env_value("WEIXIN_GROUP_ALLOWED_USERS", allow_groups)
|
||||
print_success(" Group allowlist saved.")
|
||||
print_success(" Group allowlist saved (only takes effect if iLink delivers group events).")
|
||||
|
||||
if user_id:
|
||||
print()
|
||||
@@ -3504,7 +3594,6 @@ def _setup_qqbot():
|
||||
method_idx = prompt_choice(" How would you like to set up QQ Bot?", method_choices, 0)
|
||||
|
||||
credentials = None
|
||||
used_qr = False
|
||||
|
||||
if method_idx == 0:
|
||||
# ── QR scan-to-configure ──
|
||||
@@ -3515,8 +3604,6 @@ def _setup_qqbot():
|
||||
print()
|
||||
print_warning(" QQ Bot setup cancelled.")
|
||||
return
|
||||
if credentials:
|
||||
used_qr = True
|
||||
if not credentials:
|
||||
print_info(" QR setup did not complete. Continuing with manual input.")
|
||||
|
||||
@@ -3706,6 +3793,71 @@ def _setup_signal():
|
||||
print_info(f" Groups: {'enabled' if get_env_value('SIGNAL_GROUP_ALLOWED_USERS') else 'disabled'}")
|
||||
|
||||
|
||||
def _builtin_setup_fn(key: str):
|
||||
"""Resolve the interactive setup function for a built-in platform key.
|
||||
|
||||
Late-bound to avoid a circular import with ``hermes_cli.setup`` (which
|
||||
imports from this module for the remaining bespoke flows).
|
||||
"""
|
||||
from hermes_cli import setup as _s
|
||||
return {
|
||||
"telegram": _s._setup_telegram,
|
||||
"discord": _s._setup_discord,
|
||||
"slack": _s._setup_slack,
|
||||
"matrix": _s._setup_matrix,
|
||||
"mattermost": _s._setup_mattermost,
|
||||
"bluebubbles": _s._setup_bluebubbles,
|
||||
"webhooks": _s._setup_webhooks,
|
||||
"signal": _setup_signal,
|
||||
"whatsapp": _setup_whatsapp,
|
||||
"weixin": _setup_weixin,
|
||||
"dingtalk": _setup_dingtalk,
|
||||
"feishu": _setup_feishu,
|
||||
"wecom": _setup_wecom,
|
||||
"qqbot": _setup_qqbot,
|
||||
}.get(key)
|
||||
def _configure_platform(platform: dict) -> None:
|
||||
"""Run the interactive setup flow for a single platform.
|
||||
|
||||
Dispatch order:
|
||||
1. Plugin-provided ``setup_fn`` on the registry entry.
|
||||
2. Built-in setup function matched by platform key.
|
||||
3. ``_setup_standard_platform`` when the entry has a ``vars`` schema.
|
||||
4. Env-var hint fallback for plugins that offer no setup helper.
|
||||
|
||||
Bundled platform plugins (e.g. IRC) auto-load, so no plugin enable step
|
||||
is needed here. User-installed platform plugins under ~/.hermes/plugins/
|
||||
must already be in ``plugins.enabled`` before they appear in this menu.
|
||||
"""
|
||||
entry = platform.get("_registry_entry")
|
||||
|
||||
if entry is not None and entry.setup_fn is not None:
|
||||
entry.setup_fn()
|
||||
return
|
||||
|
||||
fn = _builtin_setup_fn(platform["key"])
|
||||
if fn is not None:
|
||||
fn()
|
||||
return
|
||||
|
||||
if platform.get("vars"):
|
||||
_setup_standard_platform(platform)
|
||||
return
|
||||
|
||||
# Plugin with no setup helper — show env-var instructions.
|
||||
label = platform.get("label", platform["key"])
|
||||
emoji = platform.get("emoji", "🔌")
|
||||
print()
|
||||
print(color(f" ─── {emoji} {label} Setup ───", Colors.CYAN))
|
||||
required = entry.required_env if entry else []
|
||||
if required:
|
||||
print_info(f" Set these env vars in ~/.hermes/.env: {', '.join(required)}")
|
||||
else:
|
||||
print_info(f" Configure {label} in config.yaml under gateway.platforms.{platform['key']}")
|
||||
if platform.get("install_hint"):
|
||||
print_info(f" {platform['install_hint']}")
|
||||
|
||||
|
||||
def gateway_setup():
|
||||
"""Interactive setup for messaging platforms + gateway service."""
|
||||
if is_managed():
|
||||
@@ -3758,42 +3910,36 @@ def gateway_setup():
|
||||
print()
|
||||
print_header("Messaging Platforms")
|
||||
|
||||
menu_items = []
|
||||
for plat in _PLATFORMS:
|
||||
status = _platform_status(plat)
|
||||
menu_items.append(f"{plat['label']} ({status})")
|
||||
platforms = _all_platforms()
|
||||
|
||||
menu_items = [
|
||||
f"{p['emoji']} {p['label']} ({_platform_status(p)})"
|
||||
for p in platforms
|
||||
]
|
||||
menu_items.append("Done")
|
||||
|
||||
choice = prompt_choice("Select a platform to configure:", menu_items, len(menu_items) - 1)
|
||||
|
||||
if choice == len(_PLATFORMS):
|
||||
if choice == len(platforms):
|
||||
break
|
||||
|
||||
platform = _PLATFORMS[choice]
|
||||
|
||||
if platform["key"] == "whatsapp":
|
||||
_setup_whatsapp()
|
||||
elif platform["key"] == "signal":
|
||||
_setup_signal()
|
||||
elif platform["key"] == "weixin":
|
||||
_setup_weixin()
|
||||
elif platform["key"] == "dingtalk":
|
||||
_setup_dingtalk()
|
||||
elif platform["key"] == "feishu":
|
||||
_setup_feishu()
|
||||
elif platform["key"] == "qqbot":
|
||||
_setup_qqbot()
|
||||
elif platform["key"] == "wecom":
|
||||
_setup_wecom()
|
||||
else:
|
||||
_setup_standard_platform(platform)
|
||||
_configure_platform(platforms[choice])
|
||||
|
||||
# ── Post-setup: offer to install/restart gateway ──
|
||||
# Consider any platform (built-in or plugin) where the user has made
|
||||
# meaningful progress. ``_platform_status`` already handles plugin
|
||||
# entries via their check_fn and per-platform dual-states like
|
||||
# WhatsApp's "enabled, not paired".
|
||||
def _is_progress(status: str) -> bool:
|
||||
s = status.lower()
|
||||
return not (
|
||||
s == "not configured"
|
||||
or s.startswith("partially")
|
||||
or s.startswith("plugin disabled")
|
||||
)
|
||||
|
||||
any_configured = any(
|
||||
bool(get_env_value(p["token_var"]))
|
||||
for p in _PLATFORMS
|
||||
if p["key"] != "whatsapp"
|
||||
) or (get_env_value("WHATSAPP_ENABLED") or "").lower() == "true"
|
||||
_is_progress(_platform_status(p)) for p in _all_platforms()
|
||||
)
|
||||
|
||||
if any_configured:
|
||||
print()
|
||||
@@ -4231,4 +4377,4 @@ def _gateway_command_inner(args):
|
||||
if not supports_systemd_services() and not is_macos():
|
||||
print("Legacy unit migration only applies to systemd-based Linux hosts.")
|
||||
return
|
||||
remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)
|
||||
remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)
|
||||
+1
-2
@@ -19,9 +19,8 @@ format) lives there.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def hooks_command(args) -> None:
|
||||
|
||||
+462
-337
@@ -114,6 +114,12 @@ def _apply_profile_override() -> None:
|
||||
consume = 1
|
||||
break
|
||||
|
||||
# 1.5 If HERMES_HOME is already set and no explicit flag was given, trust it.
|
||||
# This lets child processes (relaunch, subprocess) inherit the parent's
|
||||
# profile choice without having to pass --profile again.
|
||||
if profile_name is None and os.environ.get("HERMES_HOME"):
|
||||
return
|
||||
|
||||
# 2. If no flag, check active_profile in the hermes root
|
||||
if profile_name is None:
|
||||
try:
|
||||
@@ -830,6 +836,16 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti
|
||||
|
||||
|
||||
_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"})
|
||||
_TUI_PREBUILT_MARKER = ".hermes-prebuilt-tui"
|
||||
|
||||
|
||||
def _tui_prebuilt_ready(root: Path) -> bool:
|
||||
return (
|
||||
(root / _TUI_PREBUILT_MARKER).is_file()
|
||||
and (root / "dist" / "entry.js").is_file()
|
||||
and (root / "node_modules" / "@hermes" / "ink" / "package.json").is_file()
|
||||
and (root / "packages" / "hermes-ink" / "dist" / "ink-bundle.js").is_file()
|
||||
)
|
||||
|
||||
|
||||
def _tui_need_npm_install(root: Path) -> bool:
|
||||
@@ -852,6 +868,9 @@ def _tui_need_npm_install(root: Path) -> bool:
|
||||
we'd rather not force a reinstall for them. Falls back to mtime
|
||||
comparison if either lockfile is unparseable.
|
||||
"""
|
||||
if _tui_prebuilt_ready(root):
|
||||
return False
|
||||
|
||||
ink = root / "node_modules" / "@hermes" / "ink" / "package.json"
|
||||
if not ink.is_file():
|
||||
return True
|
||||
@@ -905,6 +924,8 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
|
||||
|
||||
|
||||
def _tui_build_needed(tui_dir: Path) -> bool:
|
||||
if _tui_prebuilt_ready(tui_dir):
|
||||
return False
|
||||
if _hermes_ink_bundle_stale(tui_dir):
|
||||
return True
|
||||
entry = tui_dir / "dist" / "entry.js"
|
||||
@@ -1094,11 +1115,36 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
return [node, str(root / "dist" / "entry.js")], root
|
||||
|
||||
|
||||
def _normalize_tui_toolsets(toolsets: object) -> list[str]:
|
||||
"""Normalize argparse/Fire-style toolset input for the TUI subprocess."""
|
||||
try:
|
||||
from hermes_cli.oneshot import _normalize_toolsets
|
||||
|
||||
return _normalize_toolsets(toolsets) or []
|
||||
except (AttributeError, ImportError):
|
||||
if not toolsets:
|
||||
return []
|
||||
|
||||
raw_items = [toolsets] if isinstance(toolsets, str) else toolsets
|
||||
if not isinstance(raw_items, (list, tuple)):
|
||||
raw_items = [raw_items]
|
||||
|
||||
normalized: list[str] = []
|
||||
for item in raw_items:
|
||||
if isinstance(item, str):
|
||||
normalized.extend(part.strip() for part in item.split(","))
|
||||
else:
|
||||
normalized.append(str(item).strip())
|
||||
|
||||
return [item for item in normalized if item]
|
||||
|
||||
|
||||
def _launch_tui(
|
||||
resume_session_id: Optional[str] = None,
|
||||
tui_dev: bool = False,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
toolsets: object = None,
|
||||
):
|
||||
"""Replace current process with the TUI."""
|
||||
tui_dir = PROJECT_ROOT / "ui-tui"
|
||||
@@ -1123,6 +1169,9 @@ def _launch_tui(
|
||||
if provider:
|
||||
env["HERMES_TUI_PROVIDER"] = provider
|
||||
env["HERMES_INFERENCE_PROVIDER"] = provider
|
||||
tui_toolsets = _normalize_tui_toolsets(toolsets)
|
||||
if tui_toolsets:
|
||||
env["HERMES_TUI_TOOLSETS"] = ",".join(tui_toolsets)
|
||||
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
|
||||
# ~1.5–4GB depending on version and can fatal-OOM on long sessions with
|
||||
# large transcripts / reasoning blobs. Token-level merge: respect any
|
||||
@@ -1270,6 +1319,7 @@ def cmd_chat(args):
|
||||
tui_dev=getattr(args, "tui_dev", False),
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
toolsets=getattr(args, "toolsets", None),
|
||||
)
|
||||
|
||||
# Import and run the CLI
|
||||
@@ -1770,6 +1820,8 @@ def select_provider_and_model(args=None):
|
||||
_model_flow_openai_codex(config, current_model)
|
||||
elif selected_provider == "qwen-oauth":
|
||||
_model_flow_qwen_oauth(config, current_model)
|
||||
elif selected_provider == "minimax-oauth":
|
||||
_model_flow_minimax_oauth(config, current_model, args=args)
|
||||
elif selected_provider == "google-gemini-cli":
|
||||
_model_flow_google_gemini_cli(config, current_model)
|
||||
elif selected_provider == "copilot-acp":
|
||||
@@ -1820,6 +1872,8 @@ def select_provider_and_model(args=None):
|
||||
"gmi",
|
||||
"nvidia",
|
||||
"ollama-cloud",
|
||||
"tencent-tokenhub",
|
||||
"lmstudio",
|
||||
):
|
||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||
|
||||
@@ -1888,6 +1942,7 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("title_generation", "Title generation", "session titles"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
("curator", "Curator", "skill-usage review pass"),
|
||||
]
|
||||
|
||||
|
||||
@@ -2046,7 +2101,11 @@ def _aux_select_for_task(task: str) -> None:
|
||||
|
||||
# Gather authenticated providers (has credentials + curated model list)
|
||||
try:
|
||||
providers = list_authenticated_providers(current_provider=current_provider)
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=current_provider,
|
||||
current_model=current_model,
|
||||
current_base_url=current_base_url,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"Could not detect authenticated providers: {exc}")
|
||||
providers = []
|
||||
@@ -2652,6 +2711,53 @@ def _model_flow_qwen_oauth(_config, current_model=""):
|
||||
print("No change.")
|
||||
|
||||
|
||||
def _model_flow_minimax_oauth(config, current_model="", args=None):
|
||||
"""MiniMax OAuth provider: ensure logged in, then pick model."""
|
||||
from hermes_cli.auth import (
|
||||
get_provider_auth_state,
|
||||
_prompt_model_selection,
|
||||
_save_model_choice,
|
||||
_update_config_for_provider,
|
||||
resolve_minimax_oauth_runtime_credentials,
|
||||
AuthError,
|
||||
format_auth_error,
|
||||
_login_minimax_oauth,
|
||||
PROVIDER_REGISTRY,
|
||||
)
|
||||
state = get_provider_auth_state("minimax-oauth")
|
||||
if not state or not state.get("access_token"):
|
||||
print("Not logged into MiniMax. Starting OAuth login...")
|
||||
print()
|
||||
try:
|
||||
mock_args = argparse.Namespace(
|
||||
region=getattr(args, "region", None) or "global",
|
||||
no_browser=bool(getattr(args, "no_browser", False)),
|
||||
timeout=getattr(args, "timeout", None) or 15.0,
|
||||
)
|
||||
_login_minimax_oauth(mock_args, PROVIDER_REGISTRY["minimax-oauth"])
|
||||
except SystemExit:
|
||||
print("Login cancelled or failed.")
|
||||
return
|
||||
except Exception as exc:
|
||||
print(f"Login failed: {exc}")
|
||||
return
|
||||
|
||||
try:
|
||||
creds = resolve_minimax_oauth_runtime_credentials()
|
||||
except AuthError as exc:
|
||||
print(format_auth_error(exc))
|
||||
return
|
||||
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
model_ids = _PROVIDER_MODELS.get("minimax-oauth", [])
|
||||
selected = _prompt_model_selection(model_ids, current_model)
|
||||
if not selected:
|
||||
return
|
||||
_save_model_choice(selected)
|
||||
_update_config_for_provider("minimax-oauth", creds["base_url"])
|
||||
print(f"\u2713 Using MiniMax model: {selected}")
|
||||
|
||||
|
||||
def _model_flow_google_gemini_cli(_config, current_model=""):
|
||||
"""Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers.
|
||||
|
||||
@@ -4376,6 +4482,7 @@ def _model_flow_bedrock(config, current_model=""):
|
||||
def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
"""Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
|
||||
from hermes_cli.auth import (
|
||||
LMSTUDIO_NOAUTH_PLACEHOLDER,
|
||||
PROVIDER_REGISTRY,
|
||||
_prompt_model_selection,
|
||||
_save_model_choice,
|
||||
@@ -4410,13 +4517,20 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
try:
|
||||
import getpass
|
||||
|
||||
new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
|
||||
if provider_id == "lmstudio":
|
||||
prompt = f"{key_env} (Enter for no-auth default {LMSTUDIO_NOAUTH_PLACEHOLDER!r}): "
|
||||
else:
|
||||
prompt = f"{key_env} (or Enter to cancel): "
|
||||
new_key = getpass.getpass(prompt).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return
|
||||
if not new_key:
|
||||
print("Cancelled.")
|
||||
return
|
||||
if provider_id == "lmstudio":
|
||||
new_key = LMSTUDIO_NOAUTH_PLACEHOLDER
|
||||
else:
|
||||
print("Cancelled.")
|
||||
return
|
||||
save_env_value(key_env, new_key)
|
||||
existing_key = new_key
|
||||
print("API key saved.")
|
||||
@@ -4483,10 +4597,21 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
print(" Tier check: could not verify (proceeding anyway).")
|
||||
print()
|
||||
|
||||
# Optional base URL override
|
||||
# Optional base URL override.
|
||||
# Precedence: env var → config.yaml model.base_url → registry default.
|
||||
# Reading config.yaml prevents silently overwriting a saved remote URL
|
||||
# (e.g. a remote LM Studio endpoint) with localhost when the user just
|
||||
# presses Enter at the prompt below.
|
||||
current_base = ""
|
||||
if base_url_env:
|
||||
current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
|
||||
if not current_base:
|
||||
try:
|
||||
_m = load_config().get("model") or {}
|
||||
if str(_m.get("provider") or "").strip().lower() == provider_id:
|
||||
current_base = str(_m.get("base_url") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
effective_base = current_base or pconfig.inference_base_url
|
||||
|
||||
try:
|
||||
@@ -4508,8 +4633,22 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
# 2. Curated static fallback list (offline insurance)
|
||||
# 3. Live /models endpoint probe (small providers without models.dev data)
|
||||
#
|
||||
# Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
|
||||
if provider_id == "ollama-cloud":
|
||||
# LM Studio: live /api/v1/models probe (no models.dev catalog).
|
||||
# Ollama Cloud: merged discovery (live API + models.dev + disk cache).
|
||||
if provider_id == "lmstudio":
|
||||
from hermes_cli.auth import AuthError
|
||||
from hermes_cli.models import fetch_lmstudio_models
|
||||
|
||||
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
|
||||
try:
|
||||
model_list = fetch_lmstudio_models(api_key=api_key_for_probe, base_url=effective_base)
|
||||
except AuthError as exc:
|
||||
print(f" LM Studio rejected the request: {exc}")
|
||||
print(" Set LM_API_KEY (or update it) to match the server's bearer token.")
|
||||
model_list = []
|
||||
if model_list:
|
||||
print(f" Found {len(model_list)} model(s) from LM Studio")
|
||||
elif provider_id == "ollama-cloud":
|
||||
from hermes_cli.models import fetch_ollama_cloud_models
|
||||
|
||||
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
|
||||
@@ -4731,7 +4870,6 @@ def _model_flow_anthropic(config, current_model=""):
|
||||
read_claude_code_credentials,
|
||||
is_claude_code_token_valid,
|
||||
_is_oauth_token,
|
||||
_resolve_claude_code_token_from_credentials,
|
||||
)
|
||||
|
||||
cc_creds = read_claude_code_credentials()
|
||||
@@ -5213,6 +5351,197 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _find_stale_dashboard_pids() -> list[int]:
|
||||
"""Return PIDs of ``hermes dashboard`` processes other than ourselves.
|
||||
|
||||
``hermes dashboard`` is a long-lived server process commonly started and
|
||||
forgotten. When ``hermes update`` replaces files on disk, the running
|
||||
process keeps the old Python backend in memory while the JS bundle on
|
||||
disk is updated, causing a silent frontend/backend mismatch (e.g. new
|
||||
auth headers the old backend doesn't recognise → every API call 401s).
|
||||
|
||||
The dashboard has no service manager (systemd / launchd), no PID file,
|
||||
and we can't know the original launch args — so the only sane action
|
||||
after an update is to kill the stale process and let the user restart
|
||||
it. This helper is just the detection step; see
|
||||
``_kill_stale_dashboard_processes`` for the kill.
|
||||
|
||||
Returns an empty list on any scan error (missing ps/wmic, timeout, etc.).
|
||||
"""
|
||||
patterns = [
|
||||
"hermes dashboard",
|
||||
"hermes_cli.main dashboard",
|
||||
"hermes_cli/main.py dashboard",
|
||||
]
|
||||
self_pid = os.getpid()
|
||||
dashboard_pids: list[int] = []
|
||||
|
||||
try:
|
||||
if sys.platform == "win32":
|
||||
# wmic may emit text in the system code page (for example cp936
|
||||
# on zh-CN systems), not UTF-8. In text mode, subprocess output
|
||||
# decoding depends on Python's configuration (locale-dependent
|
||||
# by default, or UTF-8 in UTF-8 mode). The important protection
|
||||
# here is errors="ignore": it prevents a reader-thread
|
||||
# UnicodeDecodeError from leaving result.stdout=None and turning
|
||||
# the later .split() into an AttributeError (#17049).
|
||||
result = subprocess.run(
|
||||
["wmic", "process", "get", "ProcessId,CommandLine",
|
||||
"/FORMAT:LIST"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
encoding="utf-8", errors="ignore",
|
||||
)
|
||||
if result.returncode != 0 or result.stdout is None:
|
||||
return []
|
||||
current_cmd = ""
|
||||
for line in result.stdout.split("\n"):
|
||||
line = line.strip()
|
||||
if line.startswith("CommandLine="):
|
||||
current_cmd = line[len("CommandLine="):]
|
||||
elif line.startswith("ProcessId="):
|
||||
pid_str = line[len("ProcessId="):]
|
||||
if (any(p in current_cmd for p in patterns)
|
||||
and int(pid_str) != self_pid):
|
||||
try:
|
||||
dashboard_pids.append(int(pid_str))
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# Linux / macOS: scan the process table via ps and match against
|
||||
# the same explicit patterns list used on Windows. Using ps
|
||||
# (rather than `pgrep -f "hermes.*dashboard"`) keeps us consistent
|
||||
# with `hermes_cli.gateway._scan_gateway_pids` and avoids the
|
||||
# greedy regex matching unrelated cmdlines that merely contain
|
||||
# both words (e.g. a chat session discussing "dashboard").
|
||||
result = subprocess.run(
|
||||
["ps", "-A", "-o", "pid=,command="],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
for line in getattr(result, "stdout", "").split("\n"):
|
||||
stripped = line.strip()
|
||||
if not stripped or "grep" in stripped:
|
||||
continue
|
||||
parts = stripped.split(None, 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
try:
|
||||
pid = int(parts[0])
|
||||
except ValueError:
|
||||
continue
|
||||
command = parts[1]
|
||||
if (any(p in command for p in patterns)
|
||||
and pid != self_pid):
|
||||
dashboard_pids.append(pid)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||
return []
|
||||
|
||||
return dashboard_pids
|
||||
|
||||
|
||||
def _kill_stale_dashboard_processes(
|
||||
reason: str = "the running backend no longer matches the updated frontend",
|
||||
) -> None:
|
||||
"""Kill running ``hermes dashboard`` processes.
|
||||
|
||||
Called at the end of ``hermes update`` (default ``reason``) and also
|
||||
from ``hermes dashboard --stop`` (which overrides ``reason``). The
|
||||
dashboard has no service manager, so after a code update the running
|
||||
process is guaranteed to be serving stale Python against a
|
||||
freshly-updated JS bundle. Leaving it alive produces silent
|
||||
frontend/backend mismatches (new auth headers the old backend doesn't
|
||||
recognise → every API call 401s).
|
||||
|
||||
POSIX: SIGTERM, wait up to ~3s for graceful exit, SIGKILL any survivors.
|
||||
Windows: ``taskkill /PID <pid> /F`` since there's no clean SIGTERM
|
||||
equivalent for background console apps.
|
||||
|
||||
The dashboard isn't auto-restarted because we don't know the original
|
||||
launch args (--host, --port, --insecure, --tui, --no-open). The user
|
||||
restarts it manually; a hint is printed.
|
||||
"""
|
||||
pids = _find_stale_dashboard_pids()
|
||||
if not pids:
|
||||
return
|
||||
|
||||
print()
|
||||
print(f"⟲ Stopping {len(pids)} dashboard process(es) ({reason})")
|
||||
|
||||
killed: list[int] = []
|
||||
failed: list[tuple[int, str]] = []
|
||||
|
||||
if sys.platform == "win32":
|
||||
for pid in pids:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["taskkill", "/PID", str(pid), "/F"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
killed.append(pid)
|
||||
else:
|
||||
failed.append((pid, (result.stderr or result.stdout or "").strip()))
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as e:
|
||||
failed.append((pid, str(e)))
|
||||
else:
|
||||
import signal as _signal
|
||||
import time as _time
|
||||
|
||||
# SIGTERM first — give each process a chance to shut down cleanly
|
||||
# (uvicorn closes its socket, flushes logs, etc.).
|
||||
for pid in pids:
|
||||
try:
|
||||
os.kill(pid, _signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
# Already gone — count as killed.
|
||||
killed.append(pid)
|
||||
except (PermissionError, OSError) as e:
|
||||
failed.append((pid, str(e)))
|
||||
|
||||
# Poll for exit up to ~3s total.
|
||||
deadline = _time.monotonic() + 3.0
|
||||
pending = [p for p in pids if p not in killed
|
||||
and p not in {f[0] for f in failed}]
|
||||
while pending and _time.monotonic() < deadline:
|
||||
_time.sleep(0.1)
|
||||
still_pending = []
|
||||
for pid in pending:
|
||||
try:
|
||||
os.kill(pid, 0) # probe
|
||||
except ProcessLookupError:
|
||||
killed.append(pid)
|
||||
except (PermissionError, OSError):
|
||||
# Can't probe — assume still there.
|
||||
still_pending.append(pid)
|
||||
else:
|
||||
still_pending.append(pid)
|
||||
pending = still_pending
|
||||
|
||||
# SIGKILL any survivors.
|
||||
for pid in pending:
|
||||
try:
|
||||
os.kill(pid, _signal.SIGKILL)
|
||||
killed.append(pid)
|
||||
except ProcessLookupError:
|
||||
killed.append(pid)
|
||||
except (PermissionError, OSError) as e:
|
||||
failed.append((pid, str(e)))
|
||||
|
||||
for pid in killed:
|
||||
print(f" ✓ stopped PID {pid}")
|
||||
for pid, reason in failed:
|
||||
print(f" ✗ failed to stop PID {pid}: {reason}")
|
||||
|
||||
if killed:
|
||||
print(" Restart the dashboard when you're ready:")
|
||||
print(" hermes dashboard --port <port>")
|
||||
|
||||
|
||||
# Back-compat alias: some tests and any external callers may import the old
|
||||
# warn-only name. The new behaviour (kill stale processes) replaces it.
|
||||
_warn_stale_dashboard_processes = _kill_stale_dashboard_processes
|
||||
|
||||
|
||||
def _update_via_zip(args):
|
||||
"""Update Hermes Agent by downloading a ZIP archive.
|
||||
|
||||
@@ -5347,6 +5676,7 @@ def _update_via_zip(args):
|
||||
|
||||
print()
|
||||
print("✓ Update complete!")
|
||||
_kill_stale_dashboard_processes()
|
||||
|
||||
|
||||
def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[str]:
|
||||
@@ -7048,7 +7378,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
print(
|
||||
f" ⚠ {svc_name} died after restart, retrying..."
|
||||
)
|
||||
retry = subprocess.run(
|
||||
subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
@@ -7163,6 +7493,13 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
except Exception as e:
|
||||
logger.debug("Legacy unit check during update failed: %s", e)
|
||||
|
||||
# Kill stale dashboard processes — the dashboard has no service
|
||||
# manager, so leaving it alive after a code update produces a
|
||||
# silent frontend/backend mismatch. We can't auto-restart it
|
||||
# (no saved launch args) but we can stop it, and a hint is
|
||||
# printed for the user to re-launch.
|
||||
_kill_stale_dashboard_processes()
|
||||
|
||||
print()
|
||||
print("Tip: You can now select a provider and model:")
|
||||
print(" hermes model # Select provider and model")
|
||||
@@ -7552,8 +7889,59 @@ def cmd_profile(args):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _report_dashboard_status() -> int:
|
||||
"""Print ``hermes dashboard`` PIDs and return the count.
|
||||
|
||||
Uses the same detection logic as ``_find_stale_dashboard_pids`` (the
|
||||
current process is excluded, but since ``hermes dashboard --status``
|
||||
runs in a short-lived CLI process that never matches the pattern,
|
||||
the exclusion is irrelevant here).
|
||||
"""
|
||||
pids = _find_stale_dashboard_pids()
|
||||
if not pids:
|
||||
print("No hermes dashboard processes running.")
|
||||
return 0
|
||||
|
||||
print(f"{len(pids)} hermes dashboard process(es) running:")
|
||||
for pid in pids:
|
||||
# Best-effort: show the full cmdline so users can tell profiles apart.
|
||||
cmdline = ""
|
||||
try:
|
||||
if sys.platform != "win32":
|
||||
cmdline_path = f"/proc/{pid}/cmdline"
|
||||
if os.path.exists(cmdline_path):
|
||||
with open(cmdline_path, "rb") as f:
|
||||
cmdline = f.read().replace(b"\x00", b" ").decode(
|
||||
"utf-8", errors="replace").strip()
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
if cmdline:
|
||||
print(f" PID {pid}: {cmdline}")
|
||||
else:
|
||||
print(f" PID {pid}")
|
||||
return len(pids)
|
||||
|
||||
|
||||
def cmd_dashboard(args):
|
||||
"""Start the web UI server."""
|
||||
"""Start the web UI server, or (with --stop/--status) manage running ones."""
|
||||
# --status: report running dashboards and exit, no deps needed.
|
||||
if getattr(args, "status", False):
|
||||
count = _report_dashboard_status()
|
||||
sys.exit(0 if count == 0 else 0) # status is informational, always 0
|
||||
|
||||
# --stop: kill any running dashboards and exit, no deps needed.
|
||||
if getattr(args, "stop", False):
|
||||
pids = _find_stale_dashboard_pids()
|
||||
if not pids:
|
||||
print("No hermes dashboard processes running.")
|
||||
sys.exit(0)
|
||||
# Reuse the same SIGTERM-grace-SIGKILL path used after `hermes update`.
|
||||
_kill_stale_dashboard_processes(reason="requested via --stop")
|
||||
# _kill_stale_dashboard_processes prints outcomes itself. Exit 0 if
|
||||
# we killed at least one, 1 if they were all unkillable.
|
||||
remaining = _find_stale_dashboard_pids()
|
||||
sys.exit(1 if remaining else 0)
|
||||
|
||||
try:
|
||||
import fastapi # noqa: F401
|
||||
import uvicorn # noqa: F401
|
||||
@@ -7620,322 +8008,9 @@ def cmd_logs(args):
|
||||
|
||||
def main():
|
||||
"""Main entry point for hermes CLI."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="hermes",
|
||||
description="Hermes Agent - AI assistant with tool-calling capabilities",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
hermes Start interactive chat
|
||||
hermes chat -q "Hello" Single query mode
|
||||
hermes -c Resume the most recent session
|
||||
hermes -c "my project" Resume a session by name (latest in lineage)
|
||||
hermes --resume <session_id> Resume a specific session by ID
|
||||
hermes setup Run setup wizard
|
||||
hermes logout Clear stored authentication
|
||||
hermes auth add <provider> Add a pooled credential
|
||||
hermes auth list List pooled credentials
|
||||
hermes auth remove <p> <t> Remove pooled credential by index, id, or label
|
||||
hermes auth reset <provider> Clear exhaustion status for a provider
|
||||
hermes model Select default model
|
||||
hermes fallback [list] Show fallback provider chain
|
||||
hermes fallback add Add a fallback provider (same picker as `hermes model`)
|
||||
hermes fallback remove Remove a fallback provider from the chain
|
||||
hermes config View configuration
|
||||
hermes config edit Edit config in $EDITOR
|
||||
hermes config set model gpt-4 Set a config value
|
||||
hermes gateway Run messaging gateway
|
||||
hermes -s hermes-agent-dev,github-auth
|
||||
hermes -w Start in isolated git worktree
|
||||
hermes gateway install Install gateway background service
|
||||
hermes sessions list List past sessions
|
||||
hermes sessions browse Interactive session picker
|
||||
hermes sessions rename ID T Rename/title a session
|
||||
hermes logs View agent.log (last 50 lines)
|
||||
hermes logs -f Follow agent.log in real time
|
||||
hermes logs errors View errors.log
|
||||
hermes logs --since 1h Lines from the last hour
|
||||
hermes debug share Upload debug report for support
|
||||
hermes update Update to latest version
|
||||
from hermes_cli._parser import build_top_level_parser
|
||||
|
||||
For more help on a command:
|
||||
hermes <command> --help
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--version", "-V", action="store_true", help="Show version and exit"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-z",
|
||||
"--oneshot",
|
||||
metavar="PROMPT",
|
||||
default=None,
|
||||
help=(
|
||||
"One-shot mode: send a single prompt and print ONLY the final "
|
||||
"response text to stdout. No banner, no spinner, no tool "
|
||||
"previews, no session_id line. Tools, memory, rules, and "
|
||||
"AGENTS.md in the CWD are loaded as normal; approvals are "
|
||||
"auto-bypassed. Intended for scripts / pipes."
|
||||
),
|
||||
)
|
||||
# --model / --provider are accepted at the top level so they can pair
|
||||
# with -z without needing the `chat` subcommand. If neither -z nor a
|
||||
# subcommand consumes them, they fall through harmlessly as None.
|
||||
# Mirrors `hermes chat --model ... --provider ...` semantics.
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
default=None,
|
||||
help=(
|
||||
"Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
default=None,
|
||||
help=(
|
||||
"Provider override for this invocation (e.g. openrouter, anthropic). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
"-r",
|
||||
metavar="SESSION",
|
||||
default=None,
|
||||
help="Resume a previous session by ID or title",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--continue",
|
||||
"-c",
|
||||
dest="continue_last",
|
||||
nargs="?",
|
||||
const=True,
|
||||
default=None,
|
||||
metavar="SESSION_NAME",
|
||||
help="Resume a session by name, or the most recent if no name given",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--worktree",
|
||||
"-w",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Run in an isolated git worktree (for parallel agents)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--accept-hooks",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Auto-approve any unseen shell hooks declared in config.yaml "
|
||||
"without a TTY prompt. Equivalent to HERMES_ACCEPT_HOOKS=1 or "
|
||||
"hooks_auto_accept: true in config.yaml. Use on CI / headless "
|
||||
"runs that can't prompt."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skills",
|
||||
"-s",
|
||||
action="append",
|
||||
default=None,
|
||||
help="Preload one or more skills for the session (repeat flag or comma-separate)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--yolo",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Bypass all dangerous command approval prompts (use at your own risk)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pass-session-id",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Include the session ID in the agent's system prompt",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ignore-user-config",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ignore-rules",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tui",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Launch the modern TUI instead of the classic REPL",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dev",
|
||||
dest="tui_dev",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="With --tui: run TypeScript sources via tsx (skip dist build)",
|
||||
)
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
||||
|
||||
# =========================================================================
|
||||
# chat command
|
||||
# =========================================================================
|
||||
chat_parser = subparsers.add_parser(
|
||||
"chat",
|
||||
help="Interactive chat with the agent",
|
||||
description="Start an interactive chat session with Hermes Agent",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-q", "--query", help="Single query (non-interactive mode)"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--image", help="Optional local image path to attach to a single query"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-m", "--model", help="Model to use (e.g., anthropic/claude-sonnet-4)"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-t", "--toolsets", help="Comma-separated toolsets to enable"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-s",
|
||||
"--skills",
|
||||
action="append",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Preload one or more skills for the session (repeat flag or comma-separate)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--provider",
|
||||
choices=[
|
||||
"auto",
|
||||
"openrouter",
|
||||
"nous",
|
||||
"openai-codex",
|
||||
"copilot-acp",
|
||||
"copilot",
|
||||
"anthropic",
|
||||
"gemini",
|
||||
"xai",
|
||||
"ollama-cloud",
|
||||
"huggingface",
|
||||
"zai",
|
||||
"kimi-coding",
|
||||
"kimi-coding-cn",
|
||||
"stepfun",
|
||||
"minimax",
|
||||
"minimax-cn",
|
||||
"kilocode",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"nvidia",
|
||||
],
|
||||
default=None,
|
||||
help="Inference provider (default: auto)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Verbose output"
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-Q",
|
||||
"--quiet",
|
||||
action="store_true",
|
||||
help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info.",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--resume",
|
||||
"-r",
|
||||
metavar="SESSION_ID",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Resume a previous session by ID (shown on exit)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--continue",
|
||||
"-c",
|
||||
dest="continue_last",
|
||||
nargs="?",
|
||||
const=True,
|
||||
default=argparse.SUPPRESS,
|
||||
metavar="SESSION_NAME",
|
||||
help="Resume a session by name, or the most recent if no name given",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--worktree",
|
||||
"-w",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Run in an isolated git worktree (for parallel agents on the same repo)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--accept-hooks",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help=(
|
||||
"Auto-approve any unseen shell hooks declared in config.yaml "
|
||||
"without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
|
||||
"hooks_auto_accept: in config.yaml)."
|
||||
),
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--checkpoints",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--max-turns",
|
||||
type=int,
|
||||
default=None,
|
||||
metavar="N",
|
||||
help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--yolo",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Bypass all dangerous command approval prompts (use at your own risk)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--pass-session-id",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Include the session ID in the agent's system prompt",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--ignore-user-config",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--ignore-rules",
|
||||
action="store_true",
|
||||
default=argparse.SUPPRESS,
|
||||
help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--source",
|
||||
default=None,
|
||||
help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists.",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--tui",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Launch the modern TUI instead of the classic REPL",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--dev",
|
||||
dest="tui_dev",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="With --tui: run TypeScript sources via tsx (skip dist build)",
|
||||
)
|
||||
parser, subparsers, chat_parser = build_top_level_parser()
|
||||
chat_parser.set_defaults(func=cmd_chat)
|
||||
|
||||
# =========================================================================
|
||||
@@ -9120,6 +9195,26 @@ Examples:
|
||||
except Exception as _exc:
|
||||
logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
|
||||
|
||||
# =========================================================================
|
||||
# curator command — background skill maintenance
|
||||
# =========================================================================
|
||||
curator_parser = subparsers.add_parser(
|
||||
"curator",
|
||||
help="Background skill maintenance (curator) — status, run, pause, pin",
|
||||
description=(
|
||||
"The curator is an auxiliary-model background task that "
|
||||
"periodically reviews agent-created skills, prunes stale ones, "
|
||||
"consolidates overlaps, and archives obsolete skills. "
|
||||
"Bundled and hub-installed skills are never touched. "
|
||||
"Archives are recoverable; auto-deletion never happens."
|
||||
),
|
||||
)
|
||||
try:
|
||||
from hermes_cli.curator import register_cli as _register_curator_cli
|
||||
_register_curator_cli(curator_parser)
|
||||
except Exception as _exc:
|
||||
logging.getLogger(__name__).debug("curator CLI wiring failed: %s", _exc)
|
||||
|
||||
# =========================================================================
|
||||
# memory command
|
||||
# =========================================================================
|
||||
@@ -9585,15 +9680,8 @@ Examples:
|
||||
|
||||
# Launch hermes --resume <id> by replacing the current process
|
||||
print(f"Resuming session: {selected_id}")
|
||||
hermes_bin = shutil.which("hermes")
|
||||
if hermes_bin:
|
||||
os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
|
||||
else:
|
||||
# Fallback: re-invoke via python -m
|
||||
os.execvp(
|
||||
sys.executable,
|
||||
[sys.executable, "-m", "hermes_cli.main", "--resume", selected_id],
|
||||
)
|
||||
from hermes_cli.relaunch import relaunch
|
||||
relaunch(["--resume", selected_id])
|
||||
return # won't reach here after execvp
|
||||
|
||||
elif action == "stats":
|
||||
@@ -9676,17 +9764,26 @@ Examples:
|
||||
"--preset",
|
||||
choices=["user-data", "full"],
|
||||
default="full",
|
||||
help="Migration preset (default: full). 'user-data' excludes secrets",
|
||||
help="Migration preset (default: full). Neither preset imports secrets — "
|
||||
"pass --migrate-secrets to include API keys.",
|
||||
)
|
||||
claw_migrate.add_argument(
|
||||
"--overwrite",
|
||||
action="store_true",
|
||||
help="Overwrite existing files (default: skip conflicts)",
|
||||
help="Overwrite existing files (default: refuse to apply when the plan has conflicts)",
|
||||
)
|
||||
claw_migrate.add_argument(
|
||||
"--migrate-secrets",
|
||||
action="store_true",
|
||||
help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.)",
|
||||
help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.). "
|
||||
"Required even under --preset full.",
|
||||
)
|
||||
claw_migrate.add_argument(
|
||||
"--no-backup",
|
||||
action="store_true",
|
||||
help="Skip the pre-migration zip snapshot of ~/.hermes/ (by default a "
|
||||
"single restore-point archive is written to ~/.hermes/backups/ "
|
||||
"before apply; restorable with 'hermes import').",
|
||||
)
|
||||
claw_migrate.add_argument(
|
||||
"--workspace-target", help="Absolute path to copy workspace instructions into"
|
||||
@@ -9942,6 +10039,22 @@ Examples:
|
||||
"Alternatively set HERMES_DASHBOARD_TUI=1."
|
||||
),
|
||||
)
|
||||
# Lifecycle flags — mutually exclusive with each other and with the
|
||||
# start-a-server flags above (if both are passed, --stop / --status win
|
||||
# because they exit before the server is started). The dashboard has
|
||||
# no service manager and no PID file, so these scan the process table
|
||||
# for `hermes dashboard` cmdlines and SIGTERM them directly — the same
|
||||
# path `hermes update` uses to clean up stale dashboards.
|
||||
dashboard_parser.add_argument(
|
||||
"--stop",
|
||||
action="store_true",
|
||||
help="Stop all running hermes dashboard processes and exit",
|
||||
)
|
||||
dashboard_parser.add_argument(
|
||||
"--status",
|
||||
action="store_true",
|
||||
help="List running hermes dashboard processes and exit",
|
||||
)
|
||||
dashboard_parser.set_defaults(func=cmd_dashboard)
|
||||
|
||||
# =========================================================================
|
||||
@@ -10101,6 +10214,17 @@ Examples:
|
||||
logger.debug(
|
||||
"plugin discovery failed at CLI startup", exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup", exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
@@ -10120,6 +10244,7 @@ Examples:
|
||||
args.oneshot,
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
toolsets=getattr(args, "toolsets", None),
|
||||
))
|
||||
|
||||
# Handle top-level --resume / --continue as shortcut to chat
|
||||
|
||||
@@ -16,6 +16,7 @@ import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_cli.config import (
|
||||
cfg_get,
|
||||
load_config,
|
||||
save_config,
|
||||
get_env_value,
|
||||
@@ -716,7 +717,7 @@ def cmd_mcp_configure(args):
|
||||
|
||||
# Update config
|
||||
config = load_config()
|
||||
server_entry = config.get("mcp_servers", {}).get(name, {})
|
||||
server_entry = cfg_get(config, "mcp_servers", name, default={})
|
||||
|
||||
if len(chosen) == total:
|
||||
# All selected → remove include/exclude (register all)
|
||||
|
||||
@@ -46,7 +46,6 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
@@ -54,6 +53,7 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -190,7 +190,7 @@ def _write_disk_cache(data: dict[str, Any]) -> None:
|
||||
with open(tmp, "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
fh.write("\n")
|
||||
os.replace(tmp, path)
|
||||
atomic_replace(tmp, path)
|
||||
except OSError as exc:
|
||||
logger.info("model catalog cache write failed: %s", exc)
|
||||
|
||||
|
||||
@@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
|
||||
"kimi-coding",
|
||||
"kimi-coding-cn",
|
||||
"minimax",
|
||||
"minimax-oauth",
|
||||
"minimax-cn",
|
||||
"alibaba",
|
||||
"qwen-oauth",
|
||||
|
||||
+137
-7
@@ -213,10 +213,15 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:
|
||||
|
||||
|
||||
def _ensure_direct_aliases() -> None:
|
||||
"""Lazy-load direct aliases on first use."""
|
||||
global DIRECT_ALIASES
|
||||
"""Lazy-load direct aliases on first use.
|
||||
|
||||
Mutates the existing DIRECT_ALIASES dict in place rather than rebinding
|
||||
the module attribute. This keeps `from hermes_cli.model_switch import
|
||||
DIRECT_ALIASES` references valid in callers — rebinding would leave them
|
||||
pointing at a stale empty dict.
|
||||
"""
|
||||
if not DIRECT_ALIASES:
|
||||
DIRECT_ALIASES = _load_direct_aliases()
|
||||
DIRECT_ALIASES.update(_load_direct_aliases())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -534,6 +539,7 @@ def resolve_display_context_length(
|
||||
api_key: str = "",
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
custom_providers: list | None = None,
|
||||
config_context_length: int | None = None,
|
||||
) -> Optional[int]:
|
||||
"""Resolve the context length to show in /model output.
|
||||
|
||||
@@ -560,6 +566,7 @@ def resolve_display_context_length(
|
||||
api_key=api_key or "",
|
||||
provider=provider or None,
|
||||
custom_providers=custom_providers,
|
||||
config_context_length=config_context_length,
|
||||
)
|
||||
if ctx:
|
||||
return int(ctx)
|
||||
@@ -979,6 +986,7 @@ def list_authenticated_providers(
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
max_models: int = 8,
|
||||
current_model: str = "",
|
||||
) -> List[dict]:
|
||||
"""Detect which providers have credentials and list their curated models.
|
||||
|
||||
@@ -1012,6 +1020,37 @@ def list_authenticated_providers(
|
||||
results: List[dict] = []
|
||||
seen_slugs: set = set() # lowercase-normalized to catch case variants (#9545)
|
||||
seen_mdev_ids: set = set() # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)
|
||||
# Effective base URLs of every built-in row we emit (normalized lower+rstrip).
|
||||
# Section 4 uses this to hide ``custom_providers`` entries that point at the
|
||||
# same endpoint as a built-in (e.g. a user-defined "my-dashscope" on
|
||||
# https://coding-intl.dashscope.aliyuncs.com/v1 collides with the built-in
|
||||
# alibaba-coding-plan row when DASHSCOPE_API_KEY is present). Fixes #16970.
|
||||
_builtin_endpoints: set = set()
|
||||
|
||||
def _norm_url(url: str) -> str:
|
||||
return str(url or "").strip().rstrip("/").lower()
|
||||
|
||||
def _record_builtin_endpoint(slug: str) -> None:
|
||||
"""Record the effective base URL for a built-in provider row.
|
||||
|
||||
Prefers the live env-override (e.g. DASHSCOPE_BASE_URL) over the
|
||||
static inference_base_url so the dedup matches what a user typing
|
||||
that URL into custom_providers would actually hit."""
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY as _reg
|
||||
except Exception:
|
||||
return
|
||||
pcfg = _reg.get(slug)
|
||||
if not pcfg:
|
||||
return
|
||||
url = ""
|
||||
if getattr(pcfg, "base_url_env_var", ""):
|
||||
url = os.environ.get(pcfg.base_url_env_var, "") or ""
|
||||
if not url:
|
||||
url = getattr(pcfg, "inference_base_url", "") or ""
|
||||
normed = _norm_url(url)
|
||||
if normed:
|
||||
_builtin_endpoints.add(normed)
|
||||
|
||||
data = fetch_models_dev()
|
||||
|
||||
@@ -1025,6 +1064,34 @@ def list_authenticated_providers(
|
||||
if "ollama-cloud" not in curated:
|
||||
from hermes_cli.models import fetch_ollama_cloud_models
|
||||
curated["ollama-cloud"] = fetch_ollama_cloud_models()
|
||||
# LM Studio has no static catalog — probe its native /api/v1/models
|
||||
# endpoint live so the picker reflects whatever the user has loaded.
|
||||
# Base URL precedence: LM_BASE_URL env var > active config's base_url
|
||||
# (when current provider is lmstudio) > 127.0.0.1 default.
|
||||
# On auth rejection or unreachable server, fall back to the caller-supplied
|
||||
# current model so the picker still shows something when offline / mis-keyed.
|
||||
if "lmstudio" not in curated and (
|
||||
os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL") or current_provider.strip().lower() == "lmstudio"
|
||||
):
|
||||
from hermes_cli.models import fetch_lmstudio_models
|
||||
from hermes_cli.auth import AuthError
|
||||
is_current_lmstudio = current_provider.strip().lower() == "lmstudio"
|
||||
lm_base = (
|
||||
os.environ.get("LM_BASE_URL")
|
||||
or (current_base_url if is_current_lmstudio and current_base_url else None)
|
||||
or "http://127.0.0.1:1234/v1"
|
||||
)
|
||||
try:
|
||||
live = fetch_lmstudio_models(
|
||||
api_key=os.environ.get("LM_API_KEY", ""),
|
||||
base_url=lm_base,
|
||||
timeout=1.5, # Smaller timeout for picker
|
||||
)
|
||||
except AuthError:
|
||||
live = []
|
||||
if not live and is_current_lmstudio and current_model:
|
||||
live = [current_model]
|
||||
curated["lmstudio"] = live
|
||||
|
||||
# --- 1. Check Hermes-mapped providers ---
|
||||
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
|
||||
@@ -1090,6 +1157,7 @@ def list_authenticated_providers(
|
||||
})
|
||||
seen_slugs.add(slug.lower())
|
||||
seen_mdev_ids.add(mdev_id)
|
||||
_record_builtin_endpoint(slug)
|
||||
|
||||
# --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
|
||||
from hermes_cli.providers import HERMES_OVERLAYS
|
||||
@@ -1175,6 +1243,15 @@ def list_authenticated_providers(
|
||||
|
||||
if hermes_slug in {"copilot", "copilot-acp"}:
|
||||
model_ids = provider_model_ids(hermes_slug)
|
||||
# For aws_sdk providers (bedrock), use live discovery so the list
|
||||
# reflects the active region (eu.*, ap.*) not the static us.* list.
|
||||
elif overlay.auth_type == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
_ids = bedrock_model_ids_or_none()
|
||||
model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
|
||||
except Exception:
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
else:
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
@@ -1195,6 +1272,7 @@ def list_authenticated_providers(
|
||||
})
|
||||
seen_slugs.add(pid.lower())
|
||||
seen_slugs.add(hermes_slug.lower())
|
||||
_record_builtin_endpoint(hermes_slug)
|
||||
|
||||
# --- 2b. Cross-check canonical provider list ---
|
||||
# Catches providers that are in CANONICAL_PROVIDERS but weren't found
|
||||
@@ -1237,10 +1315,30 @@ def list_authenticated_providers(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Special case: aws_sdk auth (bedrock) — no API key env vars,
|
||||
# credentials come from the boto3 credential chain (env vars,
|
||||
# ~/.aws/credentials, instance roles, etc.)
|
||||
if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials
|
||||
_cp_has_creds = has_aws_credentials()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not _cp_has_creds:
|
||||
continue
|
||||
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
# For bedrock, use live discovery so the list reflects the active
|
||||
# region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
|
||||
if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
_ids = bedrock_model_ids_or_none()
|
||||
_cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
|
||||
except Exception:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
else:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
_cp_total = len(_cp_model_ids)
|
||||
_cp_top = _cp_model_ids[:max_models]
|
||||
|
||||
@@ -1254,6 +1352,7 @@ def list_authenticated_providers(
|
||||
"source": "canonical",
|
||||
})
|
||||
seen_slugs.add(_cp.slug.lower())
|
||||
_record_builtin_endpoint(_cp.slug)
|
||||
|
||||
# --- 3. User-defined endpoints from config ---
|
||||
# Track (name, base_url) of what section 3 emits so section 4 can skip
|
||||
@@ -1312,8 +1411,23 @@ def list_authenticated_providers(
|
||||
if fb:
|
||||
models_list = list(fb)
|
||||
|
||||
# Try to probe /v1/models if URL is set (but don't block on it)
|
||||
# For now just show what we know from config
|
||||
# Prefer the endpoint's live /models list when credentials are
|
||||
# available. This keeps OpenAI-compatible relays (for example CRS)
|
||||
# in sync when the server catalog changes without requiring the
|
||||
# user to mirror every model into config.yaml.
|
||||
api_key = str(ep_cfg.get("api_key", "") or "").strip()
|
||||
if not api_key:
|
||||
key_env = str(ep_cfg.get("key_env", "") or "").strip()
|
||||
api_key = os.environ.get(key_env, "").strip() if key_env else ""
|
||||
if api_url and api_key:
|
||||
try:
|
||||
from hermes_cli.models import fetch_api_models
|
||||
live_models = fetch_api_models(api_key, api_url)
|
||||
if live_models:
|
||||
models_list = live_models
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
results.append({
|
||||
"slug": ep_name,
|
||||
"name": display_name,
|
||||
@@ -1389,7 +1503,14 @@ def list_authenticated_providers(
|
||||
current_base_url
|
||||
and api_url == current_base_url.strip().rstrip("/")
|
||||
):
|
||||
slug = current_provider or custom_provider_slug(display_name)
|
||||
# Guard against bare "custom" slug left by a prior
|
||||
# failed switch — always resolve to the canonical
|
||||
# custom:<name> form. (GH #17478)
|
||||
slug = (
|
||||
current_provider
|
||||
if current_provider and current_provider != "custom"
|
||||
else custom_provider_slug(display_name)
|
||||
)
|
||||
else:
|
||||
slug = custom_provider_slug(display_name)
|
||||
groups[group_key] = {
|
||||
@@ -1448,6 +1569,15 @@ def list_authenticated_providers(
|
||||
)
|
||||
if _pair_key[0] and _pair_key[1] and _pair_key in _section3_emitted_pairs:
|
||||
continue
|
||||
# Skip if a built-in row (sections 1/2/2b) already represents this
|
||||
# endpoint. Fixes #16970: a user-defined "my-dashscope" pointing at
|
||||
# https://coding-intl.dashscope.aliyuncs.com/v1 duplicates the
|
||||
# built-in alibaba-coding-plan row whenever DASHSCOPE_API_KEY is
|
||||
# set. The built-in row carries the curated model list, correct
|
||||
# auth wiring, and canonical slug — keep it and hide the shadow.
|
||||
_grp_url_norm = _pair_key[1]
|
||||
if _grp_url_norm and _grp_url_norm in _builtin_endpoints:
|
||||
continue
|
||||
results.append({
|
||||
"slug": slug,
|
||||
"name": grp["name"],
|
||||
|
||||
+465
-53
@@ -44,6 +44,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("openai/gpt-5.4-mini", ""),
|
||||
("xiaomi/mimo-v2.5-pro", ""),
|
||||
("xiaomi/mimo-v2.5", ""),
|
||||
("tencent/hy3-preview:free", "free"),
|
||||
("openai/gpt-5.3-codex", ""),
|
||||
("google/gemini-3-pro-image-preview", ""),
|
||||
("google/gemini-3-flash-preview", ""),
|
||||
@@ -106,11 +107,57 @@ def _codex_curated_models() -> list[str]:
|
||||
return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))
|
||||
|
||||
|
||||
# Static fallback for xAI when the models.dev disk cache is empty (fresh
|
||||
# install, offline first run, etc.). Mirrors the xAI-direct model IDs from
|
||||
# $HERMES_HOME/models_dev_cache.json as of 2026-04-28. Whenever xAI renames
|
||||
# or retires a model, the disk cache picks it up on the next refresh and the
|
||||
# fallback here only matters until that refresh lands.
|
||||
_XAI_STATIC_FALLBACK: list[str] = [
|
||||
"grok-4.20-0309-reasoning",
|
||||
"grok-4.20-0309-non-reasoning",
|
||||
"grok-4.20-multi-agent-0309",
|
||||
"grok-4-1-fast",
|
||||
"grok-4-1-fast-non-reasoning",
|
||||
"grok-4-fast",
|
||||
"grok-4-fast-non-reasoning",
|
||||
"grok-4",
|
||||
"grok-code-fast-1",
|
||||
]
|
||||
|
||||
|
||||
def _xai_curated_models() -> list[str]:
|
||||
"""Derive the xAI-direct curated list from models.dev disk cache.
|
||||
|
||||
Reads $HERMES_HOME/models_dev_cache.json directly (no network) so this
|
||||
runs at import time without blocking. Falls back to ``_XAI_STATIC_FALLBACK``
|
||||
when the cache is empty or unreadable. Hermes refreshes the cache from
|
||||
https://models.dev/api.json on normal use, so this list self-heals as
|
||||
xAI renames models.
|
||||
|
||||
Mirrors ``_codex_curated_models()``'s role for openai-codex.
|
||||
"""
|
||||
try:
|
||||
from agent.models_dev import _load_disk_cache
|
||||
data = _load_disk_cache()
|
||||
xai = data.get("xai") if isinstance(data, dict) else None
|
||||
models = xai.get("models") if isinstance(xai, dict) else None
|
||||
if isinstance(models, dict) and models:
|
||||
ids = [mid for mid in models.keys() if isinstance(mid, str)]
|
||||
if ids:
|
||||
return sorted(ids)
|
||||
except Exception:
|
||||
# Any failure (missing file, malformed JSON, import error)
|
||||
# falls through to the static list.
|
||||
pass
|
||||
return list(_XAI_STATIC_FALLBACK)
|
||||
|
||||
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"moonshotai/kimi-k2.6",
|
||||
"xiaomi/mimo-v2.5-pro",
|
||||
"xiaomi/mimo-v2.5",
|
||||
"tencent/hy3-preview",
|
||||
"anthropic/claude-opus-4.7",
|
||||
"anthropic/claude-opus-4.6",
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
@@ -193,10 +240,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"glm-4.5",
|
||||
"glm-4.5-flash",
|
||||
],
|
||||
"xai": [
|
||||
"grok-4.20-reasoning",
|
||||
"grok-4-1-fast-reasoning",
|
||||
],
|
||||
"xai": _xai_curated_models(),
|
||||
"nvidia": [
|
||||
# NVIDIA flagship reasoning models
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
@@ -244,6 +288,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"MiniMax-M2.1",
|
||||
"MiniMax-M2",
|
||||
],
|
||||
"minimax-oauth": [
|
||||
"MiniMax-M2.7",
|
||||
"MiniMax-M2.7-highspeed",
|
||||
],
|
||||
"minimax-cn": [
|
||||
"MiniMax-M2.7",
|
||||
"MiniMax-M2.5",
|
||||
@@ -273,6 +321,9 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"mimo-v2-omni",
|
||||
"mimo-v2-flash",
|
||||
],
|
||||
"tencent-tokenhub": [
|
||||
"hy3-preview",
|
||||
],
|
||||
"arcee": [
|
||||
"trinity-large-thinking",
|
||||
"trinity-large-preview",
|
||||
@@ -350,6 +401,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
# to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
|
||||
# or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
|
||||
"alibaba": [
|
||||
"qwen3.6-plus",
|
||||
"kimi-k2.5",
|
||||
"qwen3.5-plus",
|
||||
"qwen3-coder-plus",
|
||||
@@ -720,10 +772,12 @@ class ProviderEntry(NamedTuple):
|
||||
CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
|
||||
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
|
||||
ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"),
|
||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
|
||||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
@@ -738,6 +792,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)", "Kimi / Moonshot China (Moonshot CN direct API)"),
|
||||
ProviderEntry("stepfun", "StepFun Step Plan", "StepFun Step Plan (agent/coding models via Step Plan API)"),
|
||||
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
|
||||
ProviderEntry("minimax-oauth", "MiniMax (OAuth)", "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
|
||||
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
|
||||
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
|
||||
@@ -781,6 +836,9 @@ _PROVIDER_ALIASES = {
|
||||
"gmicloud": "gmi",
|
||||
"minimax-china": "minimax-cn",
|
||||
"minimax_cn": "minimax-cn",
|
||||
"minimax-portal": "minimax-oauth",
|
||||
"minimax-global": "minimax-oauth",
|
||||
"minimax_oauth": "minimax-oauth",
|
||||
"claude": "anthropic",
|
||||
"claude-code": "anthropic",
|
||||
"deep-seek": "deepseek",
|
||||
@@ -806,6 +864,10 @@ _PROVIDER_ALIASES = {
|
||||
"huggingface-hub": "huggingface",
|
||||
"mimo": "xiaomi",
|
||||
"xiaomi-mimo": "xiaomi",
|
||||
"tencent": "tencent-tokenhub",
|
||||
"tokenhub": "tencent-tokenhub",
|
||||
"tencent-cloud": "tencent-tokenhub",
|
||||
"tencentmaas": "tencent-tokenhub",
|
||||
"aws": "bedrock",
|
||||
"aws-bedrock": "bedrock",
|
||||
"amazon-bedrock": "bedrock",
|
||||
@@ -817,6 +879,9 @@ _PROVIDER_ALIASES = {
|
||||
"nvidia-nim": "nvidia",
|
||||
"build-nvidia": "nvidia",
|
||||
"nemotron": "nvidia",
|
||||
"lmstudio": "lmstudio",
|
||||
"lm-studio": "lmstudio",
|
||||
"lm_studio": "lmstudio",
|
||||
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
|
||||
"ollama_cloud": "ollama-cloud",
|
||||
}
|
||||
@@ -1623,31 +1688,41 @@ def provider_label(provider: Optional[str]) -> str:
|
||||
|
||||
# Models that support OpenAI Priority Processing (service_tier="priority").
|
||||
# See https://openai.com/api-priority-processing/ for the canonical list.
|
||||
# Only the bare model slug is stored (no vendor prefix).
|
||||
_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.2",
|
||||
"gpt-5.1",
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4.1-nano",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
#
|
||||
# Pattern-based matching — any OpenAI flagship model (gpt-*, o1*, o3*, o4*)
|
||||
# is assumed to support Priority Processing. service_tier=priority is silently
|
||||
# ignored by non-OpenAI endpoints (OpenRouter/Copilot/opencode-zen proxies
|
||||
# strip the field), so false positives are harmless. Codex-series models
|
||||
# (gpt-5-codex, gpt-5.3-codex, etc.) are excluded — they don't expose the
|
||||
# service_tier parameter through the Codex Responses API.
|
||||
_OPENAI_FAST_MODE_PREFIXES: tuple[str, ...] = (
|
||||
"gpt-",
|
||||
"o1",
|
||||
"o3",
|
||||
"o4-mini",
|
||||
})
|
||||
"o4",
|
||||
)
|
||||
|
||||
|
||||
def _is_openai_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model is an OpenAI flagship eligible for Priority Processing."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
if not base:
|
||||
return False
|
||||
# Exclude Codex-series — they route through the Codex Responses API
|
||||
# which doesn't accept service_tier.
|
||||
if "codex" in base:
|
||||
return False
|
||||
return any(base.startswith(prefix) for prefix in _OPENAI_FAST_MODE_PREFIXES)
|
||||
|
||||
|
||||
# Models that support Anthropic Fast Mode (speed="fast").
|
||||
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||||
# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored
|
||||
# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
|
||||
_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4.6",
|
||||
})
|
||||
#
|
||||
# Pattern-based matching — any claude-* model is eligible. The anthropic
|
||||
# adapter gates speed=fast on native Anthropic endpoints only (see
|
||||
# _is_third_party_anthropic_endpoint in agent/anthropic_adapter.py), so
|
||||
# third-party proxies that would reject the beta header are protected.
|
||||
|
||||
|
||||
def _strip_vendor_prefix(model_id: str) -> str:
|
||||
@@ -1660,20 +1735,14 @@ def _strip_vendor_prefix(model_id: str) -> str:
|
||||
|
||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||
"""Return whether Hermes should expose the /fast toggle for this model."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
if raw in _PRIORITY_PROCESSING_MODELS:
|
||||
return True
|
||||
# Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
|
||||
# and OpenRouter variant tags (:fast, :beta) for matching.
|
||||
base = raw.split(":")[0]
|
||||
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||
return _is_anthropic_fast_model(model_id) or _is_openai_fast_model(model_id)
|
||||
|
||||
|
||||
def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model supports Anthropic's fast mode (speed='fast')."""
|
||||
"""Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||
return base.startswith("claude-")
|
||||
|
||||
|
||||
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
@@ -1695,14 +1764,61 @@ def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | Non
|
||||
|
||||
|
||||
def _resolve_copilot_catalog_api_key() -> str:
|
||||
"""Best-effort GitHub token for fetching the Copilot model catalog."""
|
||||
"""Best-effort GitHub token for fetching the Copilot model catalog.
|
||||
|
||||
Resolution order:
|
||||
1. ``resolve_api_key_provider_credentials("copilot")`` — env vars
|
||||
(``COPILOT_GITHUB_TOKEN`` / ``GH_TOKEN`` / ``GITHUB_TOKEN``) plus
|
||||
the ``gh auth token`` CLI fallback.
|
||||
2. ``read_credential_pool("copilot")`` — a token (typically a
|
||||
``gho_*`` from device-code login, or a fine-grained PAT) stored in
|
||||
``auth.json`` under ``credential_pool.copilot[]``. The pool is
|
||||
populated by ``hermes auth add copilot`` and by ``_seed_from_env``
|
||||
when the env var is set in ``~/.hermes/.env``.
|
||||
|
||||
Without (2), users whose only Copilot credential is in the pool see
|
||||
the ``/model`` picker fall back to a stale hardcoded list because the
|
||||
live catalog fetch silently 401s. To avoid wedging on a malformed pool
|
||||
entry, each candidate is exchanged via ``exchange_copilot_token`` —
|
||||
only entries that actually exchange successfully are returned, so a
|
||||
later valid entry is reachable when an earlier one is unsupported.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||
|
||||
creds = resolve_api_key_provider_credentials("copilot")
|
||||
return str(creds.get("api_key") or "").strip()
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
if api_key:
|
||||
return api_key
|
||||
except Exception:
|
||||
return ""
|
||||
pass
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import read_credential_pool
|
||||
from hermes_cli.copilot_auth import (
|
||||
exchange_copilot_token,
|
||||
validate_copilot_token,
|
||||
)
|
||||
|
||||
for entry in read_credential_pool("copilot"):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
raw = str(entry.get("access_token") or "").strip()
|
||||
if not raw:
|
||||
continue
|
||||
valid, _ = validate_copilot_token(raw)
|
||||
if not valid:
|
||||
continue
|
||||
try:
|
||||
api_token, _expires_at = exchange_copilot_token(raw)
|
||||
except Exception:
|
||||
continue
|
||||
if api_token:
|
||||
return api_token
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
# Providers where models.dev is treated as authoritative: curated static
|
||||
@@ -1884,6 +2000,18 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
live = fetch_api_models(api_key, base_url)
|
||||
if live:
|
||||
return live
|
||||
# Bedrock uses live discovery keyed by the resolved AWS region so that
|
||||
# EU/AP users see eu.*/ap.* model IDs instead of the static us.* list.
|
||||
# Note: early return intentionally skips _MODELS_DEV_PREFERRED merge
|
||||
# below — bedrock is not expected to appear in that table.
|
||||
if normalized == "bedrock":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
ids = bedrock_model_ids_or_none()
|
||||
if ids is not None:
|
||||
return ids
|
||||
except Exception:
|
||||
pass
|
||||
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
|
||||
if normalized in _MODELS_DEV_PREFERRED:
|
||||
return _merge_with_models_dev(normalized, curated_static)
|
||||
@@ -1906,28 +2034,56 @@ def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
|
||||
return None
|
||||
|
||||
headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
|
||||
if _is_oauth_token(token):
|
||||
is_oauth = _is_oauth_token(token)
|
||||
if is_oauth:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
|
||||
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS, _CONTEXT_1M_BETA
|
||||
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
|
||||
else:
|
||||
headers["x-api-key"] = token
|
||||
|
||||
req = urllib.request.Request(
|
||||
"https://api.anthropic.com/v1/models",
|
||||
headers=headers,
|
||||
)
|
||||
try:
|
||||
def _do_request(h: dict[str, str]):
|
||||
req = urllib.request.Request(
|
||||
"https://api.anthropic.com/v1/models",
|
||||
headers=h,
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
models = [m["id"] for m in data.get("data", []) if m.get("id")]
|
||||
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
|
||||
return sorted(models, key=lambda m: (
|
||||
"opus" not in m, # opus first
|
||||
"sonnet" not in m, # then sonnet
|
||||
"haiku" not in m, # then haiku
|
||||
m, # alphabetical within tier
|
||||
))
|
||||
return json.loads(resp.read().decode())
|
||||
|
||||
try:
|
||||
try:
|
||||
data = _do_request(headers)
|
||||
except urllib.error.HTTPError as http_err:
|
||||
# Reactive recovery for OAuth subscriptions that reject the 1M
|
||||
# context beta with 400 "long context beta is not yet available
|
||||
# for this subscription". Retry once without the beta; re-raise
|
||||
# anything else so the outer except logs it.
|
||||
if (
|
||||
is_oauth
|
||||
and http_err.code == 400
|
||||
):
|
||||
try:
|
||||
body_text = http_err.read().decode(errors="ignore").lower()
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if "long context beta" in body_text and "not yet available" in body_text:
|
||||
headers["anthropic-beta"] = ",".join(
|
||||
[b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
|
||||
+ list(_OAUTH_ONLY_BETAS)
|
||||
)
|
||||
data = _do_request(headers)
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
models = [m["id"] for m in data.get("data", []) if m.get("id")]
|
||||
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
|
||||
return sorted(models, key=lambda m: (
|
||||
"opus" not in m, # opus first
|
||||
"sonnet" not in m, # then sonnet
|
||||
"haiku" not in m, # then haiku
|
||||
m, # alphabetical within tier
|
||||
))
|
||||
except Exception as e:
|
||||
import logging
|
||||
logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
|
||||
@@ -2079,6 +2235,228 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
|
||||
"""Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
|
||||
|
||||
Returns ``None`` when the base URL is empty/invalid.
|
||||
"""
|
||||
root = (base_url or "").strip().rstrip("/")
|
||||
if root.endswith("/v1"):
|
||||
root = root[:-3].rstrip("/")
|
||||
return root or None
|
||||
|
||||
|
||||
def _lmstudio_request_headers(api_key: Optional[str] = None) -> dict:
|
||||
"""Build HTTP headers for LM Studio native API requests."""
|
||||
headers = {"User-Agent": _HERMES_USER_AGENT}
|
||||
token = str(api_key or "").strip()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
return headers
|
||||
|
||||
|
||||
def _lmstudio_fetch_raw_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[dict]]:
|
||||
"""Fetch the raw model list from LM Studio's ``/api/v1/models``.
|
||||
|
||||
Returns the ``models`` list of dicts on success, ``None`` on network
|
||||
errors or malformed responses. Raises ``AuthError`` on HTTP 401/403.
|
||||
"""
|
||||
server_root = _lmstudio_server_root(base_url)
|
||||
if not server_root:
|
||||
return None
|
||||
|
||||
headers = _lmstudio_request_headers(api_key)
|
||||
request = urllib.request.Request(server_root + "/api/v1/models", headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=timeout) as resp:
|
||||
payload = json.loads(resp.read().decode())
|
||||
except urllib.error.HTTPError as exc:
|
||||
if exc.code in (401, 403):
|
||||
from hermes_cli.auth import AuthError
|
||||
raise AuthError(
|
||||
f"LM Studio rejected the request with HTTP {exc.code}.",
|
||||
provider="lmstudio",
|
||||
code="auth_rejected",
|
||||
) from exc
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(
|
||||
"LM Studio probe at %s failed with HTTP %s", server_root, exc.code,
|
||||
)
|
||||
return None
|
||||
except Exception as exc:
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(
|
||||
"LM Studio probe at %s failed: %s", server_root, exc,
|
||||
)
|
||||
return None
|
||||
|
||||
raw_models = payload.get("models") if isinstance(payload, dict) else None
|
||||
if not isinstance(raw_models, list):
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(
|
||||
"LM Studio probe at %s returned malformed payload (no `models` list)",
|
||||
server_root,
|
||||
)
|
||||
return None
|
||||
return raw_models
|
||||
|
||||
|
||||
def probe_lmstudio_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[str]]:
|
||||
"""Probe LM Studio's model listing.
|
||||
|
||||
Returns chat-capable model keys on success, including the valid empty-list
|
||||
case when the server is reachable but has no non-embedding models.
|
||||
Returns ``None`` on network errors, malformed responses, or empty/invalid
|
||||
base URLs.
|
||||
|
||||
Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
|
||||
separately from reachability problems.
|
||||
"""
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
if raw_models is None:
|
||||
return None
|
||||
|
||||
keys: list[str] = []
|
||||
for raw in raw_models:
|
||||
if not isinstance(raw, dict):
|
||||
continue
|
||||
if str(raw.get("type") or "").strip().lower() == "embedding":
|
||||
continue
|
||||
key = str(raw.get("key") or raw.get("id") or "").strip()
|
||||
if key and key not in keys:
|
||||
keys.append(key)
|
||||
return keys
|
||||
|
||||
|
||||
def fetch_lmstudio_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> list[str]:
|
||||
"""Fetch LM Studio chat-capable model keys from native ``/api/v1/models``.
|
||||
|
||||
Returns a list of model keys (e.g. ``publisher/model-name``) with embedding
|
||||
models filtered out. Returns an empty list on network errors, malformed
|
||||
responses, or empty/invalid base URLs.
|
||||
|
||||
Raises ``AuthError`` on HTTP 401/403 so callers can distinguish a missing
|
||||
or wrong ``LM_API_KEY`` from an unreachable server — the most common
|
||||
LM Studio support case once auth-enabled mode is turned on.
|
||||
"""
|
||||
models = probe_lmstudio_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
return models or []
|
||||
|
||||
|
||||
def ensure_lmstudio_model_loaded(
|
||||
model: str,
|
||||
base_url: Optional[str],
|
||||
api_key: Optional[str],
|
||||
target_context_length: int,
|
||||
timeout: float = 120.0,
|
||||
) -> Optional[int]:
|
||||
"""Ensure LM Studio has ``model`` loaded with at least ``target_context_length``.
|
||||
|
||||
No-op when an instance is already loaded with sufficient context. Otherwise
|
||||
POSTs ``/api/v1/models/load`` to (re)load with the target context, capped
|
||||
at the model's ``max_context_length``. Returns the resolved loaded context
|
||||
length, or ``None`` when the probe / load failed.
|
||||
"""
|
||||
server_root = _lmstudio_server_root(base_url)
|
||||
if not server_root:
|
||||
return None
|
||||
|
||||
headers = _lmstudio_request_headers(api_key)
|
||||
|
||||
try:
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=10)
|
||||
except Exception:
|
||||
raw_models = None
|
||||
if raw_models is None:
|
||||
return None
|
||||
|
||||
target_entry = None
|
||||
for raw in raw_models:
|
||||
if not isinstance(raw, dict):
|
||||
continue
|
||||
if raw.get("key") == model or raw.get("id") == model:
|
||||
target_entry = raw
|
||||
break
|
||||
if target_entry is None:
|
||||
return None
|
||||
|
||||
max_ctx = target_entry.get("max_context_length")
|
||||
if isinstance(max_ctx, int) and max_ctx > 0:
|
||||
target_context_length = min(target_context_length, max_ctx)
|
||||
|
||||
for inst in target_entry.get("loaded_instances") or []:
|
||||
cfg = inst.get("config") if isinstance(inst, dict) else None
|
||||
loaded_ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
|
||||
if isinstance(loaded_ctx, int) and loaded_ctx >= target_context_length:
|
||||
return loaded_ctx
|
||||
|
||||
body = json.dumps({
|
||||
"model": model,
|
||||
"context_length": target_context_length,
|
||||
}).encode()
|
||||
load_headers = dict(headers)
|
||||
load_headers["Content-Type"] = "application/json"
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
urllib.request.Request(
|
||||
server_root + "/api/v1/models/load",
|
||||
data=body,
|
||||
headers=load_headers,
|
||||
method="POST",
|
||||
),
|
||||
timeout=timeout,
|
||||
) as resp:
|
||||
resp.read()
|
||||
except Exception:
|
||||
return None
|
||||
return target_context_length
|
||||
|
||||
|
||||
def lmstudio_model_reasoning_options(
|
||||
model: str,
|
||||
base_url: Optional[str],
|
||||
api_key: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> list[str]:
|
||||
"""Return the reasoning ``allowed_options`` LM Studio publishes for ``model``.
|
||||
|
||||
Pulls ``capabilities.reasoning.allowed_options`` from ``/api/v1/models``.
|
||||
Returns ``[]`` when the model is unknown, the endpoint is unreachable,
|
||||
or the model does not declare a reasoning capability.
|
||||
"""
|
||||
try:
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
except Exception:
|
||||
raw_models = None
|
||||
if not raw_models:
|
||||
return []
|
||||
|
||||
for raw in raw_models:
|
||||
if not isinstance(raw, dict):
|
||||
continue
|
||||
if raw.get("key") != model and raw.get("id") != model:
|
||||
continue
|
||||
caps = raw.get("capabilities")
|
||||
reasoning = caps.get("reasoning") if isinstance(caps, dict) else None
|
||||
opts = reasoning.get("allowed_options") if isinstance(reasoning, dict) else None
|
||||
if isinstance(opts, list):
|
||||
return [str(o).strip().lower() for o in opts if isinstance(o, str)]
|
||||
return []
|
||||
return []
|
||||
|
||||
|
||||
def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]:
|
||||
catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout)
|
||||
if not catalog:
|
||||
@@ -2674,6 +3052,40 @@ def validate_requested_model(
|
||||
"message": "Model names cannot contain spaces.",
|
||||
}
|
||||
|
||||
if normalized == "lmstudio":
|
||||
from hermes_cli.auth import AuthError
|
||||
# Use probe_lmstudio_models so we can distinguish None (unreachable
|
||||
# / malformed response) from [] (reachable, but no chat-capable models
|
||||
# are loaded). fetch_lmstudio_models collapses both to [].
|
||||
try:
|
||||
models = probe_lmstudio_models(api_key=api_key, base_url=base_url)
|
||||
except AuthError as exc:
|
||||
return {
|
||||
"accepted": False, "persist": False, "recognized": False,
|
||||
"message": (
|
||||
f"{exc} Set `LM_API_KEY` (or update it) to match the server's bearer token."
|
||||
),
|
||||
}
|
||||
if models is None:
|
||||
return {
|
||||
"accepted": False, "persist": False, "recognized": False,
|
||||
"message": f"Could not reach LM Studio's `/api/v1/models` to validate `{requested}`.",
|
||||
}
|
||||
if not models:
|
||||
return {
|
||||
"accepted": False, "persist": False, "recognized": False,
|
||||
"message": (
|
||||
f"LM Studio is reachable but no chat-capable models are loaded. "
|
||||
f"Load `{requested}` in LM Studio (Developer tab → Load Model) and try again."
|
||||
),
|
||||
}
|
||||
if requested_for_lookup in set(models):
|
||||
return {"accepted": True, "persist": True, "recognized": True, "message": None}
|
||||
return {
|
||||
"accepted": False, "persist": False, "recognized": False,
|
||||
"message": f"Model `{requested}` was not found in LM Studio's model listing.",
|
||||
}
|
||||
|
||||
if normalized == "custom":
|
||||
# Try probing with correct auth for the api_mode.
|
||||
if api_mode == "anthropic_messages":
|
||||
|
||||
+145
-16
@@ -3,7 +3,8 @@
|
||||
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
|
||||
no stderr chatter. Just the agent's final text to stdout.
|
||||
|
||||
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
|
||||
Toolsets = explicit --toolsets when provided, otherwise whatever the user has
|
||||
configured for "cli" in `hermes tools`.
|
||||
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
|
||||
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
|
||||
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
|
||||
@@ -28,10 +29,103 @@ from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _normalize_toolsets(toolsets: object = None) -> list[str] | None:
|
||||
if not toolsets:
|
||||
return None
|
||||
|
||||
raw_items = [toolsets] if isinstance(toolsets, str) else toolsets
|
||||
if not isinstance(raw_items, (list, tuple)):
|
||||
raw_items = [raw_items]
|
||||
|
||||
normalized: list[str] = []
|
||||
for item in raw_items:
|
||||
if isinstance(item, str):
|
||||
normalized.extend(part.strip() for part in item.split(","))
|
||||
else:
|
||||
normalized.append(str(item).strip())
|
||||
|
||||
return [item for item in normalized if item] or None
|
||||
|
||||
|
||||
def _validate_explicit_toolsets(toolsets: object = None) -> tuple[list[str] | None, str | None]:
|
||||
normalized = _normalize_toolsets(toolsets)
|
||||
if normalized is None:
|
||||
return None, None
|
||||
|
||||
try:
|
||||
from toolsets import validate_toolset
|
||||
except Exception as exc:
|
||||
return None, f"hermes -z: failed to validate --toolsets: {exc}\n"
|
||||
|
||||
built_in = [name for name in normalized if validate_toolset(name)]
|
||||
unresolved = [name for name in normalized if name not in built_in]
|
||||
|
||||
if unresolved:
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
plugin_valid = [name for name in unresolved if validate_toolset(name)]
|
||||
except Exception:
|
||||
plugin_valid = []
|
||||
|
||||
if plugin_valid:
|
||||
built_in.extend(plugin_valid)
|
||||
unresolved = [name for name in unresolved if name not in plugin_valid]
|
||||
|
||||
if any(name in {"all", "*"} for name in built_in):
|
||||
ignored = [name for name in normalized if name not in {"all", "*"}]
|
||||
if ignored:
|
||||
sys.stderr.write(
|
||||
"hermes -z: --toolsets all enables every toolset; "
|
||||
f"ignoring additional entries: {', '.join(ignored)}\n"
|
||||
)
|
||||
return None, None
|
||||
|
||||
mcp_names: set[str] = set()
|
||||
mcp_disabled: set[str] = set()
|
||||
if unresolved:
|
||||
try:
|
||||
from hermes_cli.config import read_raw_config
|
||||
from hermes_cli.tools_config import _parse_enabled_flag
|
||||
|
||||
cfg = read_raw_config()
|
||||
mcp_servers = cfg.get("mcp_servers") if isinstance(cfg.get("mcp_servers"), dict) else {}
|
||||
for name, server_cfg in mcp_servers.items():
|
||||
if not isinstance(server_cfg, dict):
|
||||
continue
|
||||
if _parse_enabled_flag(server_cfg.get("enabled", True), default=True):
|
||||
mcp_names.add(str(name))
|
||||
else:
|
||||
mcp_disabled.add(str(name))
|
||||
except Exception:
|
||||
mcp_names = set()
|
||||
mcp_disabled = set()
|
||||
|
||||
mcp_valid = [name for name in unresolved if name in mcp_names]
|
||||
disabled = [name for name in unresolved if name in mcp_disabled]
|
||||
unknown = [name for name in unresolved if name not in mcp_names and name not in mcp_disabled]
|
||||
valid = built_in + mcp_valid
|
||||
|
||||
if unknown:
|
||||
sys.stderr.write(f"hermes -z: ignoring unknown --toolsets entries: {', '.join(unknown)}\n")
|
||||
if disabled:
|
||||
sys.stderr.write(
|
||||
"hermes -z: ignoring disabled MCP servers (set enabled: true in config.yaml to use): "
|
||||
f"{', '.join(disabled)}\n"
|
||||
)
|
||||
|
||||
if not valid:
|
||||
return None, "hermes -z: --toolsets did not contain any valid toolsets.\n"
|
||||
|
||||
return valid, None
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
toolsets: object = None,
|
||||
) -> int:
|
||||
"""Execute a single prompt and print only the final content block.
|
||||
|
||||
@@ -42,6 +136,7 @@ def run_oneshot(
|
||||
provider: Optional provider override. Falls back to
|
||||
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
||||
then "auto".
|
||||
toolsets: Optional comma-separated string or iterable of toolsets.
|
||||
|
||||
Returns the exit code. Caller should sys.exit() with the return.
|
||||
"""
|
||||
@@ -65,6 +160,12 @@ def run_oneshot(
|
||||
)
|
||||
return 2
|
||||
|
||||
explicit_toolsets, toolsets_error = _validate_explicit_toolsets(toolsets)
|
||||
if toolsets_error:
|
||||
sys.stderr.write(toolsets_error)
|
||||
return 2
|
||||
use_config_toolsets = _normalize_toolsets(toolsets) is None
|
||||
|
||||
# Auto-approve any shell / tool approvals. Non-interactive by
|
||||
# definition — a prompt would hang forever.
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
@@ -77,7 +178,13 @@ def run_oneshot(
|
||||
|
||||
try:
|
||||
with redirect_stdout(devnull), redirect_stderr(devnull):
|
||||
response = _run_agent(prompt, model=model, provider=provider)
|
||||
response = _run_agent(
|
||||
prompt,
|
||||
model=model,
|
||||
provider=provider,
|
||||
toolsets=explicit_toolsets,
|
||||
use_config_toolsets=use_config_toolsets,
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
devnull.close()
|
||||
@@ -96,6 +203,8 @@ def _run_agent(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
toolsets: object = None,
|
||||
use_config_toolsets: bool = True,
|
||||
) -> str:
|
||||
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
||||
run a single conversation. Returns the final response string."""
|
||||
@@ -128,32 +237,52 @@ def _run_agent(
|
||||
# the user's configured default provider, which may not host the model
|
||||
# the caller just asked for.
|
||||
effective_provider = (provider or "").strip() or None
|
||||
explicit_base_url_from_alias: Optional[str] = None
|
||||
if effective_provider is None and (model or env_model):
|
||||
# Only auto-detect when the model was explicitly requested via arg or
|
||||
# env var (not when it came from config — that's the "use my defaults"
|
||||
# path and the configured provider is already correct).
|
||||
explicit_model = (model or "").strip() or env_model
|
||||
if explicit_model:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
# First check DIRECT_ALIASES populated from config.yaml `model_aliases:`.
|
||||
# These map a user-defined alias to (model, provider, base_url) for
|
||||
# endpoints not in any catalog (local servers, custom proxies, etc.).
|
||||
try:
|
||||
from hermes_cli import model_switch as _ms
|
||||
_ms._ensure_direct_aliases()
|
||||
direct = _ms.DIRECT_ALIASES.get(explicit_model.strip().lower())
|
||||
except Exception:
|
||||
direct = None
|
||||
if direct is not None:
|
||||
effective_model = direct.model
|
||||
effective_provider = direct.provider
|
||||
if direct.base_url:
|
||||
explicit_base_url_from_alias = direct.base_url.rstrip("/")
|
||||
else:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=effective_provider,
|
||||
target_model=effective_model or None,
|
||||
explicit_base_url=explicit_base_url_from_alias,
|
||||
)
|
||||
|
||||
# Pull in whatever toolsets the user has enabled for "cli".
|
||||
# sorted() gives stable ordering; set→list for AIAgent's signature.
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
# Pull in explicit toolsets when provided; otherwise use whatever the user
|
||||
# has enabled for "cli". sorted() gives stable ordering for config-derived
|
||||
# sets; explicit values preserve user order.
|
||||
toolsets_list = _normalize_toolsets(toolsets)
|
||||
if toolsets_list is None and use_config_toolsets:
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
|
||||
agent = AIAgent(
|
||||
api_key=runtime.get("api_key"),
|
||||
|
||||
+36
-2
@@ -44,6 +44,40 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
||||
|
||||
|
||||
def platform_label(key: str, default: str = "") -> str:
|
||||
"""Return the display label for a platform key, or *default*."""
|
||||
"""Return the display label for a platform key, or *default*.
|
||||
|
||||
Checks the static PLATFORMS dict first, then the plugin platform
|
||||
registry for dynamically registered platforms.
|
||||
"""
|
||||
info = PLATFORMS.get(key)
|
||||
return info.label if info is not None else default
|
||||
if info is not None:
|
||||
return info.label
|
||||
# Check plugin registry
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
entry = platform_registry.get(key)
|
||||
if entry:
|
||||
return f"{entry.emoji} {entry.label}" if entry.emoji else entry.label
|
||||
except Exception:
|
||||
pass
|
||||
return default
|
||||
|
||||
|
||||
def get_all_platforms() -> "OrderedDict[str, PlatformInfo]":
|
||||
"""Return PLATFORMS merged with any plugin-registered platforms.
|
||||
|
||||
Plugin platforms are appended after builtins. This is the function
|
||||
that tools_config and skills_config should use for platform menus.
|
||||
"""
|
||||
merged = OrderedDict(PLATFORMS)
|
||||
try:
|
||||
from gateway.platform_registry import platform_registry
|
||||
for entry in platform_registry.plugin_entries():
|
||||
if entry.name not in merged:
|
||||
merged[entry.name] = PlatformInfo(
|
||||
label=f"{entry.emoji} {entry.label}" if entry.emoji else entry.label,
|
||||
default_toolset=f"hermes-{entry.name}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return merged
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user